/
main.go
145 lines (124 loc) · 4.78 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// Copyright 2016 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package main
import (
"fmt"
"log"
"os"
"sync"
"github.com/attic-labs/noms/go/datas"
"github.com/attic-labs/noms/go/hash"
"github.com/attic-labs/noms/go/nbs"
"github.com/attic-labs/noms/go/types"
"github.com/attic-labs/noms/go/util/profile"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/dynamodb"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/dustin/go-humanize"
flag "gx/ipfs/QmQLaYRd41dEe13kYwHtKBfXkkZuXzAEsKz56FA17NNT8A/gnuflag"
)
var (
dir = flag.String("dir", "", "Write to an NBS store in the given directory")
table = flag.String("table", "", "Write to an NBS store in AWS, using this table")
bucket = flag.String("bucket", "", "Write to an NBS store in AWS, using this bucket")
dbName = flag.String("db", "", "Write to an NBS store in AWS, using this db name")
)
const memTableSize = 128 * humanize.MiByte
func main() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s [options]\n", os.Args[0])
flag.PrintDefaults()
}
profile.RegisterProfileFlags(flag.CommandLine)
flag.Parse(true)
if flag.NArg() != 0 {
flag.Usage()
return
}
var store *nbs.NomsBlockStore
if *dir != "" {
store = nbs.NewLocalStore(*dir, memTableSize)
*dbName = *dir
} else if *table != "" && *bucket != "" && *dbName != "" {
sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2")))
store = nbs.NewAWSStore(*table, *dbName, *bucket, s3.New(sess), dynamodb.New(sess), memTableSize)
} else {
log.Fatalf("Must set either --dir or ALL of --table, --bucket and --db\n")
}
db := datas.NewDatabase(store)
defer db.Close()
defer profile.MaybeStartProfile().Stop()
height := types.NewRef(db.Datasets()).Height()
fmt.Println("Store is of height", height)
fmt.Println("| Height | Nodes | Children | Branching | Groups | Reads | Pruned |")
fmt.Println("+--------+---------+----------+-----------+--------+-------+--------+")
chartFmt := "| %6d | %7d | %8d | %9d | %6d | %5d | %6d |\n"
var optimal, sum int
visited := map[hash.Hash]bool{}
current := hash.HashSlice{store.Root()}
for numNodes := 1; numNodes > 0; numNodes = len(current) {
// Start by reading the values of the current level of the graph
currentValues := make(map[hash.Hash]types.Value, len(current))
readValues := db.ReadManyValues(current)
for i, v := range readValues {
h := current[i]
currentValues[h] = v
visited[h] = true
}
// Iterate all the Values at the current level of the graph IN ORDER (as specified in |current|) and gather up their embedded refs. We'll build two different lists of hash.Hashes during this process:
// 1) An ordered list of ALL the children of the current level.
// 2) An ordered list of the child nodes that contain refs to chunks we haven't yet visited. This *excludes* already-visted nodes and nodes without children.
// We'll use 1) to get an estimate of how good the locality is among the children of the current level, and then 2) to descend to the next level of the graph.
orderedChildren := hash.HashSlice{}
nextLevel := hash.HashSlice{}
for _, h := range current {
currentValues[h].WalkRefs(func(r types.Ref) {
target := r.TargetHash()
orderedChildren = append(orderedChildren, target)
if !visited[target] && r.Height() > 1 {
nextLevel = append(nextLevel, target)
}
})
}
// Estimate locality among the members of |orderedChildren| by splitting into groups that are roughly |branchFactor| in size and calling CalcReads on each group. With perfect locality, we'd expect that each group could be read in a single physical read.
numChildren := len(orderedChildren)
branchFactor := numChildren / numNodes
numGroups := numNodes
if numChildren%numNodes != 0 {
numGroups++
}
wg := &sync.WaitGroup{}
reads := make([]int, numGroups)
for i := 0; i < numGroups; i++ {
wg.Add(1)
if i+1 == numGroups { // last group
go func(i int) {
defer wg.Done()
reads[i], _ = store.CalcReads(orderedChildren[i*branchFactor:].HashSet(), 0)
}(i)
continue
}
go func(i int) {
defer wg.Done()
reads[i], _ = store.CalcReads(orderedChildren[i*branchFactor:(i+1)*branchFactor].HashSet(), 0)
}(i)
}
wg.Wait()
sumOfReads := sumInts(reads)
fmt.Printf(chartFmt, height, numNodes, numChildren, branchFactor, numGroups, sumOfReads, numChildren-len(nextLevel))
sum += sumOfReads
optimal += numGroups
height--
current = nextLevel
}
fmt.Printf("\nVisited %d chunk groups\n", optimal)
fmt.Printf("Reading DB %s requires %.01fx optimal number of reads\n", *dbName, float64(sum)/float64(optimal))
}
func sumInts(nums []int) (sum int) {
for _, n := range nums {
sum += n
}
return
}