From 3de57e9f7466af52050ffdcadb23cb1ff58c5caa Mon Sep 17 00:00:00 2001 From: bom-d-van Date: Thu, 20 Aug 2020 09:13:30 +0200 Subject: [PATCH] ctrie: ci and bug fixes * Fix ci errors * Handle potential index out of bound in all the trie walking funcs * Refactor test logs --- carbonserver/carbonserver.go | 15 +++++--- carbonserver/trie.go | 69 +++++++++++++++++++++++++----------- carbonserver/trie_test.go | 29 +++++++++------ 3 files changed, 77 insertions(+), 36 deletions(-) diff --git a/carbonserver/carbonserver.go b/carbonserver/carbonserver.go index 3a76cb0a8..4a01d1815 100644 --- a/carbonserver/carbonserver.go +++ b/carbonserver/carbonserver.go @@ -704,8 +704,11 @@ func (listener *CarbonserverListener) updateFileList(dir string, cacheMetricName } } else { defer func() { - if err := flc.close(); err != nil { - logger.Error("failed to close flie list cache", zap.Error(err)) + // flc could be reset to nil during filepath walk + if flc != nil { + if err := flc.close(); err != nil { + logger.Error("failed to close flie list cache", zap.Error(err)) + } } }() } @@ -738,7 +741,9 @@ func (listener *CarbonserverListener) updateFileList(dir string, cacheMetricName if flc != nil { if err := flc.write(trimmedName); err != nil { logger.Error("failed to write to file list cache", zap.Error(err)) - flc.file.Close() + if err := flc.close(); err != nil { + logger.Error("failed to close flie list cache", zap.Error(err)) + } flc = nil } } @@ -783,7 +788,7 @@ func (listener *CarbonserverListener) updateFileList(dir string, cacheMetricName atomic.StoreUint64(&listener.metrics.TrieNodes, uint64(count)) atomic.StoreUint64(&listener.metrics.TrieFiles, uint64(files)) atomic.StoreUint64(&listener.metrics.TrieDirs, uint64(dirs)) - infos = append(infos, zap.Duration("trie_count_nodes_time", time.Now().Sub(start))) + infos = append(infos, zap.Duration("trie_count_nodes_time", time.Since(start))) } var stat syscall.Statfs_t @@ -826,7 +831,7 @@ func (listener *CarbonserverListener) updateFileList(dir string, cacheMetricName if listener.trigramIndex && !listener.concurrentIndex { start := time.Now() nfidx.trieIdx.setTrigrams() - infos = append(infos, zap.Duration("set_trigram_time", time.Now().Sub(start))) + infos = append(infos, zap.Duration("set_trigram_time", time.Since(start))) } } else { nfidx.files = files diff --git a/carbonserver/trie.go b/carbonserver/trie.go index e7ec04adf..2c9a85a6a 100644 --- a/carbonserver/trie.go +++ b/carbonserver/trie.go @@ -358,6 +358,9 @@ func newTrie(fileExt string) *trieIndex { } } +func (ti *trieIndex) getDepth() uint64 { return atomic.LoadUint64(&ti.depth) } +func (ti *trieIndex) setDepth(d uint64) { atomic.StoreUint64(&ti.depth, d) } + // TODO: add some defensive logics agains bad paths? // // abc.def.ghi @@ -374,8 +377,8 @@ func (ti *trieIndex) insert(path string) error { } cur := ti.root - if uint64(len(path)) > atomic.LoadUint64(&ti.depth) { - atomic.StoreUint64(&ti.depth, uint64(len(path))) + if uint64(len(path)) > ti.getDepth() { + ti.setDepth(uint64(len(path))) ti.longestMetric = path } @@ -557,9 +560,10 @@ func (ti *trieIndex) query(expr string, limit int, expand func(globs []string) ( var cur = ti.root var curChildrens = cur.getChildrens() - var nindex = make([]int, atomic.LoadUint64(&ti.depth)+1) - var trieNodes = make([]*trieNode, atomic.LoadUint64(&ti.depth)+1) - var childrensStack = make([][]*trieNode, atomic.LoadUint64(&ti.depth)+1) + var depth = ti.getDepth() + 1 + var nindex = make([]int, depth) + var trieNodes = make([]*trieNode, depth) + var childrensStack = make([][]*trieNode, depth) var ncindex int var mindex int var curm = matchers[0] @@ -725,11 +729,12 @@ func dumpTrigrams(data []uint32) []trigram.T { func (ti *trieIndex) allMetrics(sep byte) []string { var files = make([]string, 0, ti.fileCount) - var nindex = make([]int, ti.depth+1) + var depth = ti.getDepth() + 1 + 1 + var nindex = make([]int, depth) var ncindex int var cur = ti.root var curChildrens = cur.getChildrens() - var trieNodes = make([]*trieNode, ti.depth+1) + var trieNodes = make([]*trieNode, depth) for { if nindex[ncindex] >= len(curChildrens) { goto parent @@ -739,6 +744,9 @@ func (ti *trieIndex) allMetrics(sep byte) []string { cur = cur.getChild(curChildrens, nindex[ncindex]) curChildrens = cur.getChildrens() ncindex++ + if ncindex >= len(trieNodes)-1 { + goto parent + } if cur.file() { files = append(files, cur.fullPath(sep, trieNodes[:ncindex])) @@ -765,11 +773,12 @@ func (ti *trieIndex) allMetrics(sep byte) []string { } func (ti *trieIndex) dump(w io.Writer) { - var nindex = make([]int, ti.depth+1) + var depth = ti.getDepth() + 1 + var nindex = make([]int, depth) var ncindex int var cur = ti.root var curChildrens = cur.getChildrens() - var trieNodes = make([]*trieNode, ti.depth+1) + var trieNodes = make([]*trieNode, depth) var ident []byte for { if nindex[ncindex] >= len(curChildrens) { @@ -780,6 +789,9 @@ func (ti *trieIndex) dump(w io.Writer) { cur = cur.getChild(curChildrens, nindex[ncindex]) curChildrens = cur.getChildrens() ncindex++ + if ncindex >= len(trieNodes)-1 { + goto parent + } if cur.file() { fmt.Fprintf(w, "%s$ (%d/%d)\n", ident, len(*cur.childrens), cur.gen) @@ -814,12 +826,13 @@ func (ti *trieIndex) dump(w io.Writer) { // boundary) func (ti *trieIndex) statNodes() map[*trieNode]int { var stats = map[*trieNode]int{} - var nindex = make([]int, ti.depth+1) + var depth = ti.getDepth() + 1 + var nindex = make([]int, depth) var ncindex int var cur = ti.root var curChildrens = cur.getChildrens() - var trieNodes = make([]*trieNode, ti.depth+1) - var curdirs = make([]int, ti.depth+1) + var trieNodes = make([]*trieNode, depth) + var curdirs = make([]int, depth) var curindex int for { @@ -836,6 +849,9 @@ func (ti *trieIndex) statNodes() map[*trieNode]int { trieNodes[ncindex] = cur cur = cur.getChild(curChildrens, nindex[ncindex]) ncindex++ + if ncindex >= len(nindex)-1 { + goto parent + } if cur.dir() { curdirs[curindex]++ @@ -866,11 +882,12 @@ func (ti *trieIndex) statNodes() map[*trieNode]int { // TODO: support ctrie func (ti *trieIndex) setTrigrams() { - var nindex = make([]int, ti.depth+1) + var depth = ti.getDepth() + 1 + var nindex = make([]int, depth) var ncindex int var cur = ti.root - var trieNodes = make([]*trieNode, ti.depth+1) - var trigrams = make([][]uint32, ti.depth+1) + var trieNodes = make([]*trieNode, depth) + var trigrams = make([][]uint32, depth) var stats = ti.statNodes() // chosen semi-randomly. maybe we could turn this into a configurations @@ -887,7 +904,9 @@ func (ti *trieIndex) setTrigrams() { trieNodes[ncindex] = cur cur = (*cur.childrens)[nindex[ncindex]] ncindex++ - trieNodes[ncindex] = cur + if ncindex >= len(nindex)-1 { + goto parent + } // abc.xyz.cjk trigrams[ncindex] = []uint32{} @@ -979,7 +998,8 @@ func (ti *trieIndex) prune() { cur.childrens = *cur.node.childrens var idx int - var states = make([]state, ti.depth+1) + var depth = ti.getDepth() + 1 + var states = make([]state, depth) for { if cur.next >= len(cur.childrens) { @@ -1007,6 +1027,9 @@ func (ti *trieIndex) prune() { states[idx] = cur idx++ + if idx >= len(states)-1 { + goto parent + } cur.next = 0 cur.node = (cur.childrens)[states[idx-1].next] @@ -1047,7 +1070,7 @@ func (ti *trieIndex) prune() { type trieCounter [256]int -func (tc trieCounter) String() string { +func (tc *trieCounter) String() string { var str string for i, c := range tc { if c > 0 { @@ -1060,7 +1083,7 @@ func (tc trieCounter) String() string { return fmt.Sprintf("{%s}", str) } -func (ti *trieIndex) countNodes() (count, files, dirs, onec, onefc, onedc int, countByChildren, nodesByGen trieCounter) { +func (ti *trieIndex) countNodes() (count, files, dirs, onec, onefc, onedc int, countByChildren, nodesByGen *trieCounter) { type state struct { next int node *trieNode @@ -1072,7 +1095,10 @@ func (ti *trieIndex) countNodes() (count, files, dirs, onec, onefc, onedc int, c cur.childrens = cur.node.childrens var idx int - var states = make([]state, ti.depth+1) + var depth = ti.getDepth() + 1 + var states = make([]state, depth) + countByChildren = &trieCounter{} + nodesByGen = &trieCounter{} for { if cur.next >= len(*cur.childrens) { @@ -1090,6 +1116,9 @@ func (ti *trieIndex) countNodes() (count, files, dirs, onec, onefc, onedc int, c states[idx] = cur idx++ + if idx >= len(states)-1 { + goto parent + } cur.next = 0 cur.node = (*cur.childrens)[states[idx-1].next] diff --git a/carbonserver/trie_test.go b/carbonserver/trie_test.go index 52d9cd70f..c10001d57 100644 --- a/carbonserver/trie_test.go +++ b/carbonserver/trie_test.go @@ -21,7 +21,11 @@ import ( func init() { log.SetFlags(log.Lshortfile) } -func newTrieServer(files []string, withTrigram bool) *CarbonserverListener { +type logf interface { + Logf(format string, args ...interface{}) +} + +func newTrieServer(files []string, withTrigram bool, l logf) *CarbonserverListener { var listener CarbonserverListener listener.logger = zap.NewNop() listener.accessLogger = zap.NewNop() @@ -37,15 +41,15 @@ func newTrieServer(files []string, withTrigram bool) *CarbonserverListener { for _, file := range files { trieIndex.insert(file) } - fmt.Printf("trie index took %s\n", time.Now().Sub(start)) + l.Logf("trie index took %s\n", time.Since(start)) if withTrigram { start = time.Now() trieIndex.setTrigrams() - fmt.Printf("trie setTrigrams took %s size %d\n", time.Now().Sub(start), len(trieIndex.trigrams)) + l.Logf("trie setTrigrams took %s size %d\n", time.Since(start), len(trieIndex.trigrams)) } - fmt.Printf("longest metric(%d): %s\n", trieIndex.depth, trieIndex.longestMetric) + l.Logf("longest metric(%d): %s\n", trieIndex.depth, trieIndex.longestMetric) listener.UpdateFileIndex(&fileIndex{ trieIdx: trieIndex, @@ -54,7 +58,7 @@ func newTrieServer(files []string, withTrigram bool) *CarbonserverListener { return &listener } -func newTrigramServer(files []string) *CarbonserverListener { +func newTrigramServer(files []string, l logf) *CarbonserverListener { var listener CarbonserverListener listener.logger = zap.NewNop() listener.accessLogger = zap.NewNop() @@ -67,7 +71,7 @@ func newTrigramServer(files []string) *CarbonserverListener { start := time.Now() idx := trigram.NewIndex(files) - fmt.Printf("trigram index took %s\n", time.Now().Sub(start)) + l.Logf("trigram index took %s\n", time.Since(start)) listener.UpdateFileIndex(&fileIndex{ idx: idx, @@ -634,7 +638,7 @@ func TestTrieIndex(t *testing.T) { t.Run(c.query, func(t *testing.T) { t.Logf("case: TestTrieIndex/'^%s$'", regexp.QuoteMeta(c.query)) - trieServer := newTrieServer(c.input, false) + trieServer := newTrieServer(c.input, false, t) resultCh := make(chan *ExpandedGlobResponse, 1) trieServer.expandGlobs(context.TODO(), c.query, resultCh) result := <-resultCh @@ -720,6 +724,9 @@ func TestTrieConcurrentReadWrite(t *testing.T) { } } +// for fixing Unused code error from deepsource.io, dump is useful for debugging +var _ = (&trieIndex{}).dump + func TestTriePrune(t *testing.T) { cases := []struct { files1 []string @@ -827,7 +834,7 @@ func TestTriePrune(t *testing.T) { if conedc != sonedc { t.Errorf("conedc = %v; sonedc %v", conedc, sonedc) } - if ccountByChildren != scountByChildren { + if *ccountByChildren != *scountByChildren { t.Errorf("ccountByChildren = %s; scountByChildren %s", ccountByChildren, scountByChildren) } }) @@ -859,11 +866,11 @@ func BenchmarkGlobIndex(b *testing.B) { var wg sync.WaitGroup wg.Add(2) go func() { - btrieServer = newTrieServer(files, true) + btrieServer = newTrieServer(files, true, b) wg.Done() }() go func() { - btrigramServer = newTrigramServer(files) + btrigramServer = newTrigramServer(files, b) wg.Done() }() @@ -952,7 +959,7 @@ func TestDumpAllMetrics(t *testing.T) { "/service-01/server-170/metric-namespace-007-008-xdp/cpu.wsp", "/service-01/server-170/metric-namespace-006-xdp/cpu.wsp", } - trie := newTrieServer(files, true) + trie := newTrieServer(files, true, t) metrics := trie.CurrentFileIndex().trieIdx.allMetrics('.') for i := range files { files[i] = files[i][:len(files[i])-4]