Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trie/bug fixes and finally adds some simple fuzzing logics #383

Merged
merged 2 commits into from
Oct 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
68 changes: 48 additions & 20 deletions carbonserver/trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ import (
// dfa inspiration: https://swtch.com/~rsc/regexp/

const (
gstateSplit = 128
gstateSplit = 256
maxGstateCLen = 257

// used in walking over the tree, a lazy way to make sure that all the nodes are
// covered without risking index out of range.
trieDepthBuffer = 7
)

var endGstate = &gstate{}
Expand All @@ -38,14 +43,12 @@ type gmatcher struct {

type gstate struct {
// TODO: make c compact
c [131]bool
c [maxGstateCLen]bool
next []*gstate
}

type gdstate struct {
gstates []*gstate
// next [131]*gdstate
// cacheHit int
}

func (g *gmatcher) dstate() *gdstate { return g.dstates[len(g.dstates)-1] }
Expand Down Expand Up @@ -155,6 +158,9 @@ func newGlobState(expr string, expand func(globs []string) ([]string, error)) (*
m.exact = false
s := &gstate{}
i++
if i >= len(expr) {
return nil, errors.New("glob: broken range syntax")
}
negative := expr[i] == '^'
if negative {
i++
Expand All @@ -164,6 +170,14 @@ func newGlobState(expr string, expand func(globs []string) ([]string, error)) (*
if i+1 >= len(expr) {
return nil, errors.New("glob: missing closing range")
}
if expr[i-1] > expr[i+1] {
return nil, errors.New("glob: range start is bigger than range end")
}
// a simple check to make sure that range doesn't ends with 0xff,
// which would causes endless loop bellow
if expr[i-1] > 128 || expr[i+1] > 128 {
return nil, errors.New("glob: range overflow")
}

for j := expr[i-1] + 1; j <= expr[i+1]; j++ {
if j != '*' {
Expand Down Expand Up @@ -223,8 +237,8 @@ func newGlobState(expr string, expand func(globs []string) ([]string, error)) (*

cur = &split
case '{':
alterStart := &gstate{c: [131]bool{gstateSplit: true}}
alterEnd := &gstate{c: [131]bool{gstateSplit: true}}
alterStart := &gstate{c: [maxGstateCLen]bool{gstateSplit: true}}
alterEnd := &gstate{c: [maxGstateCLen]bool{gstateSplit: true}}
cur.next = append(cur.next, alterStart)
cur = alterStart
alters = append(alters, [2]*gstate{alterStart, alterEnd})
Expand Down Expand Up @@ -267,10 +281,11 @@ func newGlobState(expr string, expand func(globs []string) ([]string, error)) (*
}
m.dstates = append(m.dstates, &droot)

// TODO: consider dropping trigram integration
if m.lsComplex {
es, err := expand([]string{expr})
if err != nil {
return nil, nil
return &m, nil
}
for _, e := range es {
trigrams := extractTrigrams(e)
Expand Down Expand Up @@ -359,6 +374,10 @@ func newTrie(fileExt string) *trieIndex {
func (ti *trieIndex) getDepth() uint64 { return atomic.LoadUint64(&ti.depth) }
func (ti *trieIndex) setDepth(d uint64) { atomic.StoreUint64(&ti.depth, d) }

type nilFilenameError string

func (nfe nilFilenameError) Error() string { return string(nfe) }

// TODO: add some defensive logics agains bad paths?
//
// abc.def.ghi
Expand All @@ -374,20 +393,25 @@ func (ti *trieIndex) insert(path string) error {
return nil
}

cur := ti.root
if uint64(len(path)) > ti.getDepth() {
ti.setDepth(uint64(len(path)))
ti.longestMetric = path
}

isFile := strings.HasSuffix(path, ti.fileExt)
if isFile {
path = path[:len(path)-len(ti.fileExt)]
}

if path == "" || path[len(path)-1] == '/' {
return nilFilenameError(fmt.Sprintf("metric fileename is nil: %s", path))
}

if uint64(len(path)) > ti.getDepth() {
ti.setDepth(uint64(len(path)))
ti.longestMetric = path
}

var start, nlen int
var sn, newn *trieNode
var cur = ti.root
outer:
// why len(path)+1: make sure the last node is also processed in the loop
for i := 0; i < len(path)+1; i++ {
// getting a full node
if i < len(path) && path[i] != '/' {
Expand Down Expand Up @@ -556,9 +580,13 @@ func (ti *trieIndex) query(expr string, limit int, expand func(globs []string) (
matchers = append(matchers, gs)
}

if len(matchers) == 0 {
return nil, nil, nil
}

var cur = ti.root
var curChildrens = cur.getChildrens()
var depth = ti.getDepth() + 1
var depth = ti.getDepth() + trieDepthBuffer
var nindex = make([]int, depth)
var trieNodes = make([]*trieNode, depth)
var childrensStack = make([][]*trieNode, depth)
Expand Down Expand Up @@ -728,7 +756,7 @@ func dumpTrigrams(data []uint32) []trigram.T { //nolint:deadcode,unused

func (ti *trieIndex) allMetrics(sep byte) []string {
var files = make([]string, 0, ti.fileCount)
var depth = ti.getDepth() + 1
var depth = ti.getDepth() + trieDepthBuffer
var nindex = make([]int, depth)
var ncindex int
var cur = ti.root
Expand Down Expand Up @@ -772,7 +800,7 @@ func (ti *trieIndex) allMetrics(sep byte) []string {
}

func (ti *trieIndex) dump(w io.Writer) {
var depth = ti.getDepth() + 1
var depth = ti.getDepth() + trieDepthBuffer
var nindex = make([]int, depth)
var ncindex int
var cur = ti.root
Expand Down Expand Up @@ -825,7 +853,7 @@ func (ti *trieIndex) dump(w io.Writer) {
// boundary)
func (ti *trieIndex) statNodes() map[*trieNode]int {
var stats = map[*trieNode]int{}
var depth = ti.getDepth() + 1
var depth = ti.getDepth() + trieDepthBuffer
var nindex = make([]int, depth)
var ncindex int
var cur = ti.root
Expand Down Expand Up @@ -881,7 +909,7 @@ func (ti *trieIndex) statNodes() map[*trieNode]int {

// TODO: support ctrie
func (ti *trieIndex) setTrigrams() {
var depth = ti.getDepth() + 1
var depth = ti.getDepth() + trieDepthBuffer
var nindex = make([]int, depth)
var ncindex int
var cur = ti.root
Expand Down Expand Up @@ -997,7 +1025,7 @@ func (ti *trieIndex) prune() {
cur.childrens = *cur.node.childrens

var idx int
var depth = ti.getDepth() + 1
var depth = ti.getDepth() + trieDepthBuffer
var states = make([]state, depth)
for {
if cur.next >= len(cur.childrens) {
Expand Down Expand Up @@ -1088,7 +1116,7 @@ func (ti *trieIndex) countNodes() (count, files, dirs, onec, onefc, onedc int, c
cur.childrens = cur.node.childrens

var idx int
var depth = ti.getDepth() + 1
var depth = ti.getDepth() + trieDepthBuffer
var states = make([]state, depth)
countByChildren = &trieCounter{}
nodesByGen = &trieCounter{}
Expand Down
22 changes: 22 additions & 0 deletions carbonserver/trie_fuzz_index.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// +build fuzz_trie_index

package carbonserver

// run instructions:
// mkdir -p fuzz/trie_index
// go-fuzz-build -tags fuzz_trie_index
// go-fuzz -workdir fuzz/trie_index

var trie = newTrie(".wsp")

func Fuzz(data []byte) int {
err := trie.insert(string(data))
if err != nil {
_, ok := err.(nilFilenameError)
if !ok {
panic(err)
}
}

return 1
}
50 changes: 50 additions & 0 deletions carbonserver/trie_fuzz_query.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// +build fuzz_trie_query

package carbonserver

import (
"math/rand"
"strings"
"time"
)

// run instructions:
// mkdir -p fuzz/trie_query
// go-fuzz-build -tags fuzz_trie_query
// go-fuzz -workdir fuzz/trie_query

var trie = func() *trieIndex {
trie := newTrie(".wsp")
rand.Seed(time.Now().UnixNano())
for i := 0; i < 1000; i++ {
var nodes []string
var numj = rand.Intn(20)
for j := 0; j < numj+1; j++ {
var node []byte
var numz = rand.Intn(256)
for z := 0; z < numz+1; z++ {
node = append(node, byte(rand.Intn(256)))
}

nodes = append(nodes, string(node))
}
err := trie.insert(strings.Join(nodes, "/") + ".wsp")
if err != nil {
_, ok := err.(nilFilenameError)
if !ok {
panic(err)
}
}
}

return trie
}()

func Fuzz(data []byte) int {
_, _, err := trie.query(string(data), 1000, func([]string) ([]string, error) { return nil, nil })
if err != nil {
// panic(err)
return 0
}
return 0
}
47 changes: 47 additions & 0 deletions carbonserver/trie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,44 @@ func TestTrieIndex(t *testing.T) {
},
expectLeafs: []bool{true, true},
},
{
input: []string{
"/系统/核心/cpu.wsp",
"/系统/核心/memory.wsp",
"/ns1/ns2/ns3/ns4/ns5/ns6/ns7_handle.wsp",
"/ns1/ns2/ns3/ns4/ns5/ns6/ns7.wsp",
},
query: "系统.核心.*",
expect: []string{
"系统.核心.cpu",
"系统.核心.memory",
},
expectLeafs: []bool{true, true},
},
{
input: []string{
"/ns1/ns2/ns3/ns4/ns5/ns6/ns7_handle.wsp",
"/ns1/ns2/ns3/ns4/ns5/ns6/.wsp", // should not panic
},
query: "*",
expect: []string{
"ns1",
},
expectLeafs: []bool{false},
},
{
input: []string{
"/ns1/ns2/ns3/ns4/ns5/ns6/ns7_handle.wsp",
"./..wsp",
"..wsp", // should not panic
},
query: "*",
expect: []string{
".", // should we even support . as filename?
"ns1",
},
expectLeafs: []bool{true, false},
},
}

for _, c := range cases {
Expand Down Expand Up @@ -667,6 +705,15 @@ func TestTrieIndex(t *testing.T) {
}
}

func TestTrieEdgeCases(t *testing.T) {
var trie = newTrie(".wsp")

_, _, err := trie.query("[\xff\xff-\xff", 1000, func([]string) ([]string, error) { return nil, nil })
if err == nil || err.Error() != "glob: range overflow" {
t.Errorf("trie should return an range overflow error")
}
}

func TestTrieConcurrentReadWrite(t *testing.T) {
trieIndex := newTrie(".wsp")

Expand Down