Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add RenamePath and refine Unmarshal #1

Merged
merged 4 commits into from
Jul 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 49 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Fsearch
_An in-memory index which finds a keyword in millions of pathnames within milliseconds._
_An in-memory index which finds a keyword from millions of pathnames within milliseconds._

<a href="https://github.com/ihexxa/fsearch/actions">
<img src="https://github.com/ihexxa/fsearch/workflows/ci-fsearch/badge.svg" />
Expand All @@ -9,29 +9,60 @@ _An in-memory index which finds a keyword in millions of pathnames within millis
</a>

## Features
- Fast: search a keyword in millions of directories within milliseconds (see benchmark).
- Fast: search a keyword from millions of directories within milliseconds (see benchmark).
- Compact: indexing 1M pathnames with around 500MB memory.
- Simple: less than 5 APIs
- Serializable: the index can be serialized and persisted.
- Simple: AddPath, DelPath, MovePath, Rename and so on.

## Examples
```golang
const maxResultSize = 50 // the upper bound of matched results size
const pathSeparator = "/"
fs := New(pathSeparator, maxResultSize)
import (
"fmt"
"testing"
)

// add paths
path1 := "a/keyword/c"
path2 := "a/b/keyword"
_ := fs.AddPath(path1)
_ := fs.AddPath(path2)
func TestFSearchExample(t *testing.T) {
t.Run("test example", func(t *testing.T) {
const maxResultSize = 50 // the upper bound of matched results size
const pathSeparator = "/"
fs := New(pathSeparator, maxResultSize)

// search for a key word
matchedPaths, _ := fs.Search("keyword") // matchedPaths should contain both path1 and path2
// add paths
path1 := "a/keyword/c"
path2 := "a/b/keyword"
err := fs.AddPath(path1)
if err != nil {
t.Fatal(err)
}
err = fs.AddPath(path2)
if err != nil {
t.Fatal(err)
}

// delete paths
_ := fs.DelPath(path1)
_ := fs.DelPath(path2)
// search for a key word
matchedPaths, err := fs.Search("keyword") // matchedPaths should contain both path1 and path2
if err != nil {
t.Fatal(err)
}
fmt.Printf("%+v", matchedPaths)

// move a path
_ := fs.MovePath("a", "a/b/keyword") // "a", "a/keyword", "a/keyword/c" will be under path2
// move a path
err = fs.MovePath("a/keyword", "a/b/keyword") // "a/keyword", "a/keyword/c" will be under path2
if err != nil {
t.Fatal(err)
}

// rename a path
err = fs.RenamePath("a/b/keyword", "keyword2") // entry "a/b/keyword" is renamed to "a/b/keyword2"
if err != nil {
t.Fatal(err)
}

// delete paths
err = fs.DelPath("a/b/keyworde")
if err != nil {
t.Fatal(err)
}
})
}
```
117 changes: 115 additions & 2 deletions fsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package fsearch

import (
"errors"
"path/filepath"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -160,6 +162,81 @@ func (fs *FSearch) MovePath(pathname, dstParentPath string) error {
return fs.tree.MovePath(pathname, dstParentPath)
}

// Rename renames the file/folder name
func (fs *FSearch) RenamePath(pathname, newName string) error {
if !fs.on {
return ErrStopped
}
fs.lock.Lock()
defer fs.lock.Unlock()

if strings.Contains(newName, fs.tree.PathSeparator) {
return ErrInvalidPath
}

originalName := filepath.Base(pathname)
if len(originalName) == 0 {
return ErrInvalidPath
}

renamedNode, err := fs.tree.Rename(pathname, newName)
if err != nil {
return err
}

var keyword string
var nodeIdsVal interface{}
runes := []rune(originalName)
for i := 0; i < len(runes); i++ {
keyword = string(runes[i:])
nodeIdsVal, err = fs.radix.Get(keyword)
nodeIds := nodeIdsVal.([]int64)

if err != nil {
if errors.Is(err, qradix.ErrNotExist) {
continue
} else {
return err
}
}

for i, nodeId := range nodeIds {
if nodeId == renamedNode.id {
nodeIdsVal, err = fs.radix.Insert(keyword, append(nodeIds[:i], nodeIds[i+1:]...))
if err != nil {
// TODO: although it is impossible reach here
// better to add a checking in searching side since not all keys are removed
return err
}
break
}
}
}

runes = []rune(newName)
for i := 0; i < len(runes); i++ {
keyword = string(runes[i:])
nodeIdsVal, err = fs.radix.Get(keyword)
if err != nil {
if errors.Is(err, qradix.ErrNotExist) {
nodeIdsVal = []int64{}
} else {
return err
}
}

nodeIds := nodeIdsVal.([]int64)
_, err = fs.radix.Insert(keyword, append(nodeIds, renamedNode.id))
if err != nil {
// TODO: although it is impossible reach here
// better to add a checking in searching side since not all keys are removed
return err
}
}

return nil
}

// Search searches keyword in the FSearch
// It returns pahtnames which contains keyword, the result size is limited by the resultLimit
func (fs *FSearch) Search(keyword string) ([]string, error) {
Expand Down Expand Up @@ -222,9 +299,45 @@ func (fs *FSearch) Marshal() chan string {
}

// Marshal deserializes string rows and restore the FSearch index
func (fs *FSearch) Unmarshal(rows chan string) {
// TODO: add nodes, add tries
func (fs *FSearch) Unmarshal(rows chan string) error {
fs.tree.Unmarshal(rows)

var keyword string
var err error
var nodeIdsVal interface{}
queue := []*Node{fs.tree.root}
for len(queue) > 0 {
node := queue[0]
queue = queue[1:]

if node.name != "" {
fs.nodes[node.id] = node

runes := []rune(node.name)
for i := 0; i < len(runes); i++ {
keyword = string(runes[i:])
nodeIdsVal, err = fs.radix.Get(keyword)
if err != nil {
if errors.Is(err, qradix.ErrNotExist) {
nodeIdsVal = []int64{}
} else {
return err
}
}
nodeIds := nodeIdsVal.([]int64)
_, err = fs.radix.Insert(keyword, append(nodeIds, node.id))
if err != nil {
return err
}
}
}

for _, child := range node.children {
queue = append(queue, child)
}
}

return nil
}

func (fs *FSearch) Error() error {
Expand Down
114 changes: 110 additions & 4 deletions fsearch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,21 @@ import (
"github.com/ihexxa/randstr"
)

func TestFSearch(t *testing.T) {
keywordRandStr := randstr.NewRandStr([]string{}, true, 2)
seed := time.Now().UnixNano()
var (
keywordRandStr *randstr.RandStr
seed int64
)

const resultSize = 1000000000

func init() {
seed = time.Now().UnixNano()
fmt.Printf("seed: %d\n", seed)
keywordRandStr.Seed(seed)
const resultSize = 1000000000
}

func TestFSearchOperations(t *testing.T) {
keywordRandStr := randstr.NewRandStr([]string{}, true, 2)

t.Run("test Search", func(t *testing.T) {
fs := New("/", resultSize)
Expand Down Expand Up @@ -224,6 +233,70 @@ func TestFSearch(t *testing.T) {
}
})

t.Run("AddPath/Rename: rename segments test", func(t *testing.T) {
fs := New("/", resultSize)
newPathSeg := "renamed"

for _, pathname := range []string{
"a/b/c",
} {
err := fs.AddPath(pathname)
if err != nil {
t.Fatal(err)
}

parts := strings.Split(pathname, "/")
renamedPrefixParts := []string{}
for i, part := range parts {
oldPrefixParts := append(renamedPrefixParts, part)
oldPrefix := strings.Join(oldPrefixParts, "/")

fmt.Println(oldPrefix, newPathSeg)
fmt.Println(fs.tree.String())
err := fs.RenamePath(oldPrefix, newPathSeg)
if err != nil {
t.Fatal(err)
}

oldPath := strings.Join(parts[:i+1], "/")
newPath := strings.Join(append(append(renamedPrefixParts, newPathSeg), parts[i+1:]...), "/")
fmt.Println("check", oldPath, newPath)
checkPaths(t, map[string][]*Node{oldPath: nil}, fs, false)
checkPaths(t, map[string][]*Node{newPath: nil}, fs, true)

renamedPrefixParts = append(renamedPrefixParts, newPathSeg)
}
}
})
}

func TestFSearchPersistency(t *testing.T) {
t.Run("test persistency", func(t *testing.T) {
fs := New("/", resultSize)
paths := genPaths(128)

expectedPaths := map[string][]*Node{}
for pathname := range paths {
err := fs.AddPath(pathname)
if err != nil {
t.Fatal(err)
}
expectedPaths[pathname] = nil
}

rowsChan := fs.Marshal()

fs2 := New("/", resultSize)
err := fs2.Unmarshal(rowsChan)
if err != nil {
t.Fatal(err)
}

checkPaths(t, expectedPaths, fs2, true)
})
}

func TestFSearchRandom(t *testing.T) {
t.Run("AddPath/DelPath random test", func(t *testing.T) {
fs := New("/", resultSize)
paths := genPaths(128)
Expand Down Expand Up @@ -290,6 +363,39 @@ func TestFSearch(t *testing.T) {
checkPaths(t, movedPaths, fs, true)
checkPaths(t, paths, fs, false)
})

t.Run("AddPath/Rename: rename root random test", func(t *testing.T) {
fs := New("/", resultSize)
paths := genPaths(128)

oldRoot := "000"
newRootName := "111"
var err error
for pathname := range paths {
originalPath := fmt.Sprintf("%s/%s", oldRoot, pathname)
err = fs.AddPath(originalPath)
if err != nil {
t.Fatal(err)
}
}

err = fs.RenamePath(oldRoot, newRootName)
if err != nil {
t.Fatal(err)
}

originalPaths := map[string][]*Node{}
renamedPaths := map[string][]*Node{}
for pathname := range paths {
originalPath := fmt.Sprintf("%s/%s", oldRoot, pathname)
renamedPath := fmt.Sprintf("%s/%s", newRootName, pathname)
originalPaths[originalPath] = nil
renamedPaths[renamedPath] = nil
}

checkPaths(t, originalPaths, fs, false)
checkPaths(t, renamedPaths, fs, true)
})
}

func checkPaths(t *testing.T, pathnames map[string][]*Node, fs *FSearch, shouldExist bool) {
Expand Down
Loading