Skip to content

Commit

Permalink
Merge pull request #1 from ihexxa/dev
Browse files Browse the repository at this point in the history
feat: add RenamePath and refine Unmarshal
  • Loading branch information
ihexxa committed Jul 10, 2022
2 parents 41ce1a0 + 38c6241 commit f6f0c90
Show file tree
Hide file tree
Showing 5 changed files with 363 additions and 28 deletions.
67 changes: 49 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Fsearch
_An in-memory index which finds a keyword in millions of pathnames within milliseconds._
_An in-memory index which finds a keyword from millions of pathnames within milliseconds._

<a href="https://github.com/ihexxa/fsearch/actions">
<img src="https://github.com/ihexxa/fsearch/workflows/ci-fsearch/badge.svg" />
Expand All @@ -9,29 +9,60 @@ _An in-memory index which finds a keyword in millions of pathnames within millis
</a>

## Features
- Fast: search a keyword in millions of directories within milliseconds (see benchmark).
- Fast: search a keyword from millions of directories within milliseconds (see benchmark).
- Compact: indexing 1M pathnames with around 500MB memory.
- Simple: less than 5 APIs
- Serializable: the index can be serialized and persisted.
- Simple: AddPath, DelPath, MovePath, Rename and so on.

## Examples
```golang
const maxResultSize = 50 // the upper bound of matched results size
const pathSeparator = "/"
fs := New(pathSeparator, maxResultSize)
import (
"fmt"
"testing"
)

// add paths
path1 := "a/keyword/c"
path2 := "a/b/keyword"
_ := fs.AddPath(path1)
_ := fs.AddPath(path2)
func TestFSearchExample(t *testing.T) {
t.Run("test example", func(t *testing.T) {
const maxResultSize = 50 // the upper bound of matched results size
const pathSeparator = "/"
fs := New(pathSeparator, maxResultSize)

// search for a key word
matchedPaths, _ := fs.Search("keyword") // matchedPaths should contain both path1 and path2
// add paths
path1 := "a/keyword/c"
path2 := "a/b/keyword"
err := fs.AddPath(path1)
if err != nil {
t.Fatal(err)
}
err = fs.AddPath(path2)
if err != nil {
t.Fatal(err)
}

// delete paths
_ := fs.DelPath(path1)
_ := fs.DelPath(path2)
// search for a key word
matchedPaths, err := fs.Search("keyword") // matchedPaths should contain both path1 and path2
if err != nil {
t.Fatal(err)
}
fmt.Printf("%+v", matchedPaths)

// move a path
_ := fs.MovePath("a", "a/b/keyword") // "a", "a/keyword", "a/keyword/c" will be under path2
// move a path
err = fs.MovePath("a/keyword", "a/b/keyword") // "a/keyword", "a/keyword/c" will be under path2
if err != nil {
t.Fatal(err)
}

// rename a path
err = fs.RenamePath("a/b/keyword", "keyword2") // entry "a/b/keyword" is renamed to "a/b/keyword2"
if err != nil {
t.Fatal(err)
}

// delete paths
err = fs.DelPath("a/b/keyworde")
if err != nil {
t.Fatal(err)
}
})
}
```
117 changes: 115 additions & 2 deletions fsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package fsearch

import (
"errors"
"path/filepath"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -160,6 +162,81 @@ func (fs *FSearch) MovePath(pathname, dstParentPath string) error {
return fs.tree.MovePath(pathname, dstParentPath)
}

// Rename renames the file/folder name
func (fs *FSearch) RenamePath(pathname, newName string) error {
if !fs.on {
return ErrStopped
}
fs.lock.Lock()
defer fs.lock.Unlock()

if strings.Contains(newName, fs.tree.PathSeparator) {
return ErrInvalidPath
}

originalName := filepath.Base(pathname)
if len(originalName) == 0 {
return ErrInvalidPath
}

renamedNode, err := fs.tree.Rename(pathname, newName)
if err != nil {
return err
}

var keyword string
var nodeIdsVal interface{}
runes := []rune(originalName)
for i := 0; i < len(runes); i++ {
keyword = string(runes[i:])
nodeIdsVal, err = fs.radix.Get(keyword)
nodeIds := nodeIdsVal.([]int64)

if err != nil {
if errors.Is(err, qradix.ErrNotExist) {
continue
} else {
return err
}
}

for i, nodeId := range nodeIds {
if nodeId == renamedNode.id {
nodeIdsVal, err = fs.radix.Insert(keyword, append(nodeIds[:i], nodeIds[i+1:]...))
if err != nil {
// TODO: although it is impossible reach here
// better to add a checking in searching side since not all keys are removed
return err
}
break
}
}
}

runes = []rune(newName)
for i := 0; i < len(runes); i++ {
keyword = string(runes[i:])
nodeIdsVal, err = fs.radix.Get(keyword)
if err != nil {
if errors.Is(err, qradix.ErrNotExist) {
nodeIdsVal = []int64{}
} else {
return err
}
}

nodeIds := nodeIdsVal.([]int64)
_, err = fs.radix.Insert(keyword, append(nodeIds, renamedNode.id))
if err != nil {
// TODO: although it is impossible reach here
// better to add a checking in searching side since not all keys are removed
return err
}
}

return nil
}

// Search searches keyword in the FSearch
// It returns pahtnames which contains keyword, the result size is limited by the resultLimit
func (fs *FSearch) Search(keyword string) ([]string, error) {
Expand Down Expand Up @@ -222,9 +299,45 @@ func (fs *FSearch) Marshal() chan string {
}

// Marshal deserializes string rows and restore the FSearch index
func (fs *FSearch) Unmarshal(rows chan string) {
// TODO: add nodes, add tries
func (fs *FSearch) Unmarshal(rows chan string) error {
fs.tree.Unmarshal(rows)

var keyword string
var err error
var nodeIdsVal interface{}
queue := []*Node{fs.tree.root}
for len(queue) > 0 {
node := queue[0]
queue = queue[1:]

if node.name != "" {
fs.nodes[node.id] = node

runes := []rune(node.name)
for i := 0; i < len(runes); i++ {
keyword = string(runes[i:])
nodeIdsVal, err = fs.radix.Get(keyword)
if err != nil {
if errors.Is(err, qradix.ErrNotExist) {
nodeIdsVal = []int64{}
} else {
return err
}
}
nodeIds := nodeIdsVal.([]int64)
_, err = fs.radix.Insert(keyword, append(nodeIds, node.id))
if err != nil {
return err
}
}
}

for _, child := range node.children {
queue = append(queue, child)
}
}

return nil
}

func (fs *FSearch) Error() error {
Expand Down
114 changes: 110 additions & 4 deletions fsearch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,21 @@ import (
"github.com/ihexxa/randstr"
)

func TestFSearch(t *testing.T) {
keywordRandStr := randstr.NewRandStr([]string{}, true, 2)
seed := time.Now().UnixNano()
var (
keywordRandStr *randstr.RandStr
seed int64
)

const resultSize = 1000000000

func init() {
seed = time.Now().UnixNano()
fmt.Printf("seed: %d\n", seed)
keywordRandStr.Seed(seed)
const resultSize = 1000000000
}

func TestFSearchOperations(t *testing.T) {
keywordRandStr := randstr.NewRandStr([]string{}, true, 2)

t.Run("test Search", func(t *testing.T) {
fs := New("/", resultSize)
Expand Down Expand Up @@ -224,6 +233,70 @@ func TestFSearch(t *testing.T) {
}
})

t.Run("AddPath/Rename: rename segments test", func(t *testing.T) {
fs := New("/", resultSize)
newPathSeg := "renamed"

for _, pathname := range []string{
"a/b/c",
} {
err := fs.AddPath(pathname)
if err != nil {
t.Fatal(err)
}

parts := strings.Split(pathname, "/")
renamedPrefixParts := []string{}
for i, part := range parts {
oldPrefixParts := append(renamedPrefixParts, part)
oldPrefix := strings.Join(oldPrefixParts, "/")

fmt.Println(oldPrefix, newPathSeg)
fmt.Println(fs.tree.String())
err := fs.RenamePath(oldPrefix, newPathSeg)
if err != nil {
t.Fatal(err)
}

oldPath := strings.Join(parts[:i+1], "/")
newPath := strings.Join(append(append(renamedPrefixParts, newPathSeg), parts[i+1:]...), "/")
fmt.Println("check", oldPath, newPath)
checkPaths(t, map[string][]*Node{oldPath: nil}, fs, false)
checkPaths(t, map[string][]*Node{newPath: nil}, fs, true)

renamedPrefixParts = append(renamedPrefixParts, newPathSeg)
}
}
})
}

func TestFSearchPersistency(t *testing.T) {
t.Run("test persistency", func(t *testing.T) {
fs := New("/", resultSize)
paths := genPaths(128)

expectedPaths := map[string][]*Node{}
for pathname := range paths {
err := fs.AddPath(pathname)
if err != nil {
t.Fatal(err)
}
expectedPaths[pathname] = nil
}

rowsChan := fs.Marshal()

fs2 := New("/", resultSize)
err := fs2.Unmarshal(rowsChan)
if err != nil {
t.Fatal(err)
}

checkPaths(t, expectedPaths, fs2, true)
})
}

func TestFSearchRandom(t *testing.T) {
t.Run("AddPath/DelPath random test", func(t *testing.T) {
fs := New("/", resultSize)
paths := genPaths(128)
Expand Down Expand Up @@ -290,6 +363,39 @@ func TestFSearch(t *testing.T) {
checkPaths(t, movedPaths, fs, true)
checkPaths(t, paths, fs, false)
})

t.Run("AddPath/Rename: rename root random test", func(t *testing.T) {
fs := New("/", resultSize)
paths := genPaths(128)

oldRoot := "000"
newRootName := "111"
var err error
for pathname := range paths {
originalPath := fmt.Sprintf("%s/%s", oldRoot, pathname)
err = fs.AddPath(originalPath)
if err != nil {
t.Fatal(err)
}
}

err = fs.RenamePath(oldRoot, newRootName)
if err != nil {
t.Fatal(err)
}

originalPaths := map[string][]*Node{}
renamedPaths := map[string][]*Node{}
for pathname := range paths {
originalPath := fmt.Sprintf("%s/%s", oldRoot, pathname)
renamedPath := fmt.Sprintf("%s/%s", newRootName, pathname)
originalPaths[originalPath] = nil
renamedPaths[renamedPath] = nil
}

checkPaths(t, originalPaths, fs, false)
checkPaths(t, renamedPaths, fs, true)
})
}

func checkPaths(t *testing.T, pathnames map[string][]*Node, fs *FSearch, shouldExist bool) {
Expand Down

0 comments on commit f6f0c90

Please sign in to comment.