Skip to content

Commit

Permalink
Merge branch 'master' into nexter
Browse files Browse the repository at this point in the history
Conflicts:
	graph/leveldb/all_iterator.go
	graph/leveldb/iterator.go
	graph/memstore/triplestore.go
	query/gremlin/finals.go
  • Loading branch information
kortschak committed Aug 6, 2014
2 parents b1a70d9 + b74cb14 commit 62785d2
Show file tree
Hide file tree
Showing 37 changed files with 882 additions and 467 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -44,7 +44,7 @@ If you prefer to build from source, see the documentation on the wiki at [How to

`cd` to the directory and give it a quick test with:
```
./cayley repl --dbpath=testdata.nt
./cayley repl --dbpath=testdata.nq
```

You should see a `cayley>` REPL prompt. Go ahead and give it a try:
Expand Down
2 changes: 1 addition & 1 deletion cayley_appengine.cfg
Expand Up @@ -3,5 +3,5 @@
"db_path": "30kmoviedata.nq.gz",
"read_only": true,
"load_size": 10000,
"gremlin_timeout": 10
"timeout": 10
}
32 changes: 17 additions & 15 deletions cayley_test.go
Expand Up @@ -17,6 +17,7 @@ package main
import (
"sync"
"testing"
"time"

"github.com/google/cayley/config"
"github.com/google/cayley/db"
Expand Down Expand Up @@ -292,9 +293,9 @@ var m2_actors = movie2.Save("name","movie2").Follow(filmToActor)
var (
once sync.Once
cfg = &config.Config{
DatabasePath: "30kmoviedata.nq.gz",
DatabaseType: "memstore",
GremlinTimeout: 300,
DatabasePath: "30kmoviedata.nq.gz",
DatabaseType: "memstore",
Timeout: 300 * time.Second,
}

ts graph.TripleStore
Expand All @@ -316,7 +317,7 @@ func TestQueries(t *testing.T) {
if testing.Short() && test.long {
continue
}
ses := gremlin.NewSession(ts, cfg.GremlinTimeout, true)
ses := gremlin.NewSession(ts, cfg.Timeout, true)
_, err := ses.InputParses(test.query)
if err != nil {
t.Fatalf("Failed to parse benchmark gremlin %s: %v", test.message, err)
Expand All @@ -333,7 +334,7 @@ func TestQueries(t *testing.T) {
if j == nil && err == nil {
continue
}
if err != nil && err.Error() == "Query Timeout" {
if err == gremlin.ErrKillTimeout {
timedOut = true
continue
}
Expand All @@ -347,7 +348,7 @@ func TestQueries(t *testing.T) {

// TODO(kortschak) Be more rigorous in this result validation.
if len(got) != len(test.expect) {
t.Errorf("Unexpected number of results, got:%d expect:%d.", len(got), len(test.expect))
t.Errorf("Unexpected number of results, got:%d expect:%d on %s.", len(got), len(test.expect), test.message)
}
}
}
Expand All @@ -357,17 +358,18 @@ func runBench(n int, b *testing.B) {
b.Skip()
}
prepare(b)
ses := gremlin.NewSession(ts, cfg.GremlinTimeout, true)
_, err := ses.InputParses(benchmarkQueries[n].query)
if err != nil {
b.Fatalf("Failed to parse benchmark gremlin %s: %v", benchmarkQueries[n].message, err)
}
b.StopTimer()
b.ResetTimer()
for i := 0; i < b.N; i++ {
c := make(chan interface{}, 5)
ses := gremlin.NewSession(ts, cfg.Timeout, true)
// Do the parsing we know works.
ses.InputParses(benchmarkQueries[n].query)
b.StartTimer()
go ses.ExecInput(benchmarkQueries[n].query, c, 100)
for _ = range c {
}
b.StopTimer()
}
}

Expand All @@ -391,18 +393,18 @@ func BenchmarkNetAndSpeed(b *testing.B) {
runBench(4, b)
}

func BenchmarkKeannuAndNet(b *testing.B) {
func BenchmarkKeanuAndNet(b *testing.B) {
runBench(5, b)
}

func BenchmarkKeannuAndSpeed(b *testing.B) {
func BenchmarkKeanuAndSpeed(b *testing.B) {
runBench(6, b)
}

func BenchmarkKeannuOther(b *testing.B) {
func BenchmarkKeanuOther(b *testing.B) {
runBench(7, b)
}

func BenchmarkKeannuBullockOther(b *testing.B) {
func BenchmarkKeanuBullockOther(b *testing.B) {
runBench(8, b)
}
103 changes: 93 additions & 10 deletions config/config.go
Expand Up @@ -17,29 +17,112 @@ package config
import (
"encoding/json"
"flag"
"fmt"
"os"
"strconv"
"time"

"github.com/barakmich/glog"
)

type Config struct {
DatabaseType string
DatabasePath string
DatabaseOptions map[string]interface{}
ListenHost string
ListenPort string
ReadOnly bool
Timeout time.Duration
LoadSize int
}

type config struct {
DatabaseType string `json:"database"`
DatabasePath string `json:"db_path"`
DatabaseOptions map[string]interface{} `json:"db_options"`
ListenHost string `json:"listen_host"`
ListenPort string `json:"listen_port"`
ReadOnly bool `json:"read_only"`
GremlinTimeout int `json:"gremlin_timeout"`
Timeout duration `json:"timeout"`
LoadSize int `json:"load_size"`
}

var databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.")
var databaseBackend = flag.String("db", "memstore", "Database Backend.")
var host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).")
var loadSize = flag.Int("load_size", 10000, "Size of triplesets to load")
var port = flag.String("port", "64210", "Port to listen on.")
var readOnly = flag.Bool("read_only", false, "Disable writing via HTTP.")
var gremlinTimeout = flag.Int("gremlin_timeout", 30, "Number of seconds until an individual query times out.")
func (c *Config) UnmarshalJSON(data []byte) error {
var t config
err := json.Unmarshal(data, &t)
if err != nil {
return err
}
*c = Config{
DatabaseType: t.DatabaseType,
DatabasePath: t.DatabasePath,
DatabaseOptions: t.DatabaseOptions,
ListenHost: t.ListenHost,
ListenPort: t.ListenPort,
ReadOnly: t.ReadOnly,
Timeout: time.Duration(t.Timeout),
LoadSize: t.LoadSize,
}
return nil
}

func (c *Config) MarshalJSON() ([]byte, error) {
return json.Marshal(config{
DatabaseType: c.DatabaseType,
DatabasePath: c.DatabasePath,
DatabaseOptions: c.DatabaseOptions,
ListenHost: c.ListenHost,
ListenPort: c.ListenPort,
ReadOnly: c.ReadOnly,
Timeout: duration(c.Timeout),
LoadSize: c.LoadSize,
})
}

// duration is a time.Duration that satisfies the
// json.UnMarshaler and json.Marshaler interfaces.
type duration time.Duration

// UnmarshalJSON unmarshals a duration according to the following scheme:
// * If the element is absent the duration is zero.
// * If the element is parsable as a time.Duration, the parsed value is kept.
// * If the element is parsable as a number, that number of seconds is kept.
func (d *duration) UnmarshalJSON(data []byte) error {
if len(data) == 0 {
*d = 0
return nil
}
text := string(data)
t, err := time.ParseDuration(text)
if err == nil {
*d = duration(t)
return nil
}
i, err := strconv.ParseInt(text, 10, 64)
if err == nil {
*d = duration(time.Duration(i) * time.Second)
return nil
}
// This hack is to get around strconv.ParseFloat
// not handling e-notation for integers.
f, err := strconv.ParseFloat(text, 64)
*d = duration(time.Duration(f) * time.Second)
return err
}

func (d *duration) MarshalJSON() ([]byte, error) {
return []byte(fmt.Sprintf("%q", *d)), nil
}

var (
databasePath = flag.String("dbpath", "/tmp/testdb", "Path to the database.")
databaseBackend = flag.String("db", "memstore", "Database Backend.")
host = flag.String("host", "0.0.0.0", "Host to listen on (defaults to all).")
loadSize = flag.Int("load_size", 10000, "Size of triplesets to load")
port = flag.String("port", "64210", "Port to listen on.")
readOnly = flag.Bool("read_only", false, "Disable writing via HTTP.")
timeout = flag.Duration("timeout", 30*time.Second, "Elapsed time until an individual query times out.")
)

func ParseConfigFromFile(filename string) *Config {
config := &Config{}
Expand Down Expand Up @@ -100,8 +183,8 @@ func ParseConfigFromFlagsAndFile(fileFlag string) *Config {
config.ListenPort = *port
}

if config.GremlinTimeout == 0 {
config.GremlinTimeout = *gremlinTimeout
if config.Timeout == 0 {
config.Timeout = *timeout
}

if config.LoadSize == 0 {
Expand Down
2 changes: 1 addition & 1 deletion db/load.go
Expand Up @@ -57,7 +57,7 @@ func Load(ts graph.TripleStore, cfg *config.Config, path string) error {
return err
}

block := make([]*quad.Quad, 0, cfg.LoadSize)
block := make([]quad.Quad, 0, cfg.LoadSize)
for {
t, err := dec.Unmarshal()
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions db/repl.go
Expand Up @@ -72,7 +72,7 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error
case "gremlin":
fallthrough
default:
ses = gremlin.NewSession(ts, cfg.GremlinTimeout, true)
ses = gremlin.NewSession(ts, cfg.Timeout, true)
}
buf := bufio.NewReader(os.Stdin)
var line []byte
Expand Down Expand Up @@ -114,7 +114,7 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error
if bytes.HasPrefix(line, []byte(":a")) {
var tripleStmt = line[3:]
triple, err := cquads.Parse(string(tripleStmt))
if triple == nil {
if !triple.IsValid() {
if err != nil {
fmt.Printf("not a valid triple: %v\n", err)
}
Expand All @@ -128,7 +128,7 @@ func Repl(ts graph.TripleStore, queryLanguage string, cfg *config.Config) error
if bytes.HasPrefix(line, []byte(":d")) {
var tripleStmt = line[3:]
triple, err := cquads.Parse(string(tripleStmt))
if triple == nil {
if !triple.IsValid() {
if err != nil {
fmt.Printf("not a valid triple: %v\n", err)
}
Expand Down
6 changes: 3 additions & 3 deletions docs/Configuration.md
Expand Up @@ -72,12 +72,12 @@ All command line flags take precedence over the configuration file.

## Language Options

#### **`gremlin_timeout`**
#### **`timeout`**

* Type: Integer
* Type: Integer or String
* Default: 30

The value in seconds of the maximum length of time the Javascript runtime should run until cancelling the query and returning a 408 Timeout. A negative value means no limit.
The maximum length of time the Javascript runtime should run until cancelling the query and returning a 408 Timeout. When timeout is an integer is is interpretted as seconds, when it is a string it is [parsed](http://golang.org/pkg/time/#ParseDuration) as a Go time.Duration. A negative duration means no limit.

## Per-Database Options

Expand Down
18 changes: 18 additions & 0 deletions graph/iterator.go
Expand Up @@ -157,6 +157,22 @@ func Next(it Iterator) bool {
return false
}

// Height is a convienence function to measure the height of an iterator tree.
func Height(it Iterator, until Type) int {
if it.Type() == until {
return 1
}
subs := it.SubIterators()
maxDepth := 0
for _, sub := range subs {
h := Height(sub, until)
if h > maxDepth {
maxDepth = h
}
}
return maxDepth + 1
}

// FixedIterator wraps iterators that are modifiable by addition of fixed value sets.
type FixedIterator interface {
Iterator
Expand Down Expand Up @@ -184,6 +200,7 @@ const (
Fixed
Not
Optional
Materialize
)

var (
Expand All @@ -204,6 +221,7 @@ var (
"fixed",
"not",
"optional",
"materialize",
}
)

Expand Down
17 changes: 17 additions & 0 deletions graph/iterator/and_iterator_optimize.go
Expand Up @@ -70,6 +70,8 @@ func (it *And) Optimize() (graph.Iterator, bool) {
// now a permutation of itself, but the contents are unchanged.
its = optimizeOrder(its)

its = materializeIts(its)

// Okay! At this point we have an optimized order.

// The easiest thing to do at this point is merely to create a new And iterator
Expand Down Expand Up @@ -293,6 +295,21 @@ func hasOneUsefulIterator(its []graph.Iterator) graph.Iterator {
return nil
}

func materializeIts(its []graph.Iterator) []graph.Iterator {
var out []graph.Iterator
for _, it := range its {
stats := it.Stats()
if stats.Size*stats.NextCost < stats.ContainsCost {
if graph.Height(it, graph.Materialize) > 10 {
out = append(out, NewMaterialize(it))
continue
}
}
out = append(out, it)
}
return out
}

// and.Stats() lives here in and-iterator-optimize.go because it may
// in the future return different statistics based on how it is optimized.
// For now, however, it's pretty static.
Expand Down

0 comments on commit 62785d2

Please sign in to comment.