From abc49e4070448cc9df6ed45129a969500bdb4df8 Mon Sep 17 00:00:00 2001 From: Ben Boyter Date: Tue, 23 Jun 2020 17:06:50 +1000 Subject: [PATCH] various updates --- asset/tui/main.go | 332 ++++++++++++++++++++--------- main.go | 2 +- processor/fuzz.go | 6 +- processor/http.go | 8 +- processor/http_test.go | 20 +- processor/processor.go | 4 +- processor/result_ranker.go | 24 ++- processor/result_ranker_test.go | 14 +- processor/snippet.go | 8 +- processor/snippet_test.go | 32 +-- processor/structs.go | 4 +- processor/tui.go | 11 +- processor/tui_helpers.go | 2 +- processor/worker_file.go | 10 +- processor/worker_searcher.go | 6 +- processor/worker_summarize.go | 10 +- processor/worker_summarize_test.go | 16 +- 17 files changed, 320 insertions(+), 189 deletions(-) diff --git a/asset/tui/main.go b/asset/tui/main.go index a6e9fd9..0a67fbc 100644 --- a/asset/tui/main.go +++ b/asset/tui/main.go @@ -4,9 +4,12 @@ package main import ( "fmt" + "github.com/boyter/cs/file" + "github.com/boyter/cs/processor" "github.com/gdamore/tcell" "github.com/rivo/tview" "os" + "runtime" "strings" "sync" "time" @@ -24,49 +27,83 @@ type codeResult struct { Score float64 } -type drawResultsStruct struct { - Query string - Count int - Sync sync.Mutex - Changed bool - Running bool +type tuiApplicationController struct { + Query string + Count int + Sync sync.Mutex + Changed bool + Running bool + Offset int + Results []*processor.FileJob + TuiFileWalker *file.FileWalker + TuiFileReaderWorker *processor.FileReaderWorker + TuiSearcherWorker *processor.SearcherWorker + + // View requirements + TviewApplication *tview.Application } -func (drs *drawResultsStruct) SetChanged(b bool) { - drs.Sync.Lock() - defer drs.Sync.Unlock() - drs.Changed = b +func (cont *tuiApplicationController) IncrementOffset() { + cont.Sync.Lock() + defer cont.Sync.Unlock() + cont.Offset++ } -func (drs *drawResultsStruct) GetChanged() bool { - drs.Sync.Lock() - defer drs.Sync.Unlock() - return drs.Changed +func (cont *tuiApplicationController) DecrementOffset() { + cont.Sync.Lock() + defer cont.Sync.Unlock() + if cont.Offset != 0 { + cont.Offset-- + } +} + +func (cont *tuiApplicationController) GetOffset() int { + cont.Sync.Lock() + defer cont.Sync.Unlock() + return cont.Offset +} + +func (cont *tuiApplicationController) SetChanged(b bool) { + cont.Sync.Lock() + defer cont.Sync.Unlock() + cont.Changed = b } -func (drs *drawResultsStruct) SetRunning(b bool) { - drs.Sync.Lock() - defer drs.Sync.Unlock() - drs.Running = b +func (cont *tuiApplicationController) GetChanged() bool { + cont.Sync.Lock() + defer cont.Sync.Unlock() + return cont.Changed } -func (drs *drawResultsStruct) GetRunning() bool { - drs.Sync.Lock() - defer drs.Sync.Unlock() - return drs.Running +func (cont *tuiApplicationController) SetRunning(b bool) { + cont.Sync.Lock() + defer cont.Sync.Unlock() + cont.Running = b } -// This is responsible for drawing all changes on the screen -func (drs *drawResultsStruct) drawResults(displayResults []displayResult, codeResults []codeResult, selected int, status string, resultsFlex *tview.Flex, statusView *tview.TextView, app *tview.Application) { - drs.Sync.Lock() - defer drs.Sync.Unlock() +func (cont *tuiApplicationController) GetRunning() bool { + cont.Sync.Lock() + defer cont.Sync.Unlock() + return cont.Running +} + +func (cont *tuiApplicationController) Search(s string) { + cont.Sync.Lock() + defer cont.Sync.Unlock() + cont.Query = s +} - if !drs.Changed { +// After any change is made that requires something drawn on the screen this is the method that does +func (cont *tuiApplicationController) drawResults(displayResults []displayResult, codeResults []codeResult, status string, resultsFlex *tview.Flex, statusView *tview.TextView) { + cont.Sync.Lock() + defer cont.Sync.Unlock() + + if !cont.Changed { return } // NB this is just here so we can see updates in this test - drs.Count++ + cont.Count++ // reset the elements by clearing out every one for _, t := range displayResults { @@ -74,18 +111,25 @@ func (drs *drawResultsStruct) drawResults(displayResults []displayResult, codeRe t.Body.SetText("") } + // rank all results + // then go and get the relevant portion for display + // go and get the codeResults the user wants to see using selected as the offset to display from var p []codeResult - for i, t := range codeResults { - if i >= selected { - p = append(p, t) + for i, t := range cont.Results { + if i >= cont.Offset { + p = append(p, codeResult{ + Title: t.Filename, + Content: string(t.Content)[:300], + Score: t.Score, + }) } } // render out what the user wants to see based on the results that have been chosen - app.QueueUpdateDraw(func() { + cont.TviewApplication.QueueUpdateDraw(func() { for i, t := range p { - displayResults[i].Title.SetText(fmt.Sprintf("%d [fuchsia]%s (%f)[-:-:-]", drs.Count, t.Title, t.Score)) + displayResults[i].Title.SetText(fmt.Sprintf("%d [fuchsia]%s (%f)[-:-:-]", cont.Count, t.Title, t.Score)) displayResults[i].Body.SetText(t.Content) // we need to update the item so that it displays everything we have put in @@ -96,12 +140,145 @@ func (drs *drawResultsStruct) drawResults(displayResults []displayResult, codeRe }) // we can only set that nothing - drs.Changed = false + cont.Changed = false +} + +func (cont *tuiApplicationController) doSearch() { + cont.Sync.Lock() + // deal with the user clearing out the search + if cont.Query == "" { + cont.Results = []*processor.FileJob{} + cont.Changed = true + cont.Sync.Unlock() + return + } + cont.Sync.Unlock() + + // keep the query we are working with + query := cont.Query + cont.Query = "" + + //if cont.TuiFileWalker != nil && cont.TuiFileWalker.Walking() { + // cont.TuiFileWalker.Terminate() + //} + + fileQueue := make(chan *file.File) // NB unbuffered because we want the UI to respond and this is what causes affects + toProcessQueue := make(chan *processor.FileJob, runtime.NumCPU()) // Files to be read into memory for processing + summaryQueue := make(chan *processor.FileJob, runtime.NumCPU()) // Files that match and need to be displayed + + cont.TuiFileWalker = file.NewFileWalker(".", fileQueue) + cont.TuiFileReaderWorker = processor.NewFileReaderWorker(fileQueue, toProcessQueue) + cont.TuiSearcherWorker = processor.NewSearcherWorker(toProcessQueue, summaryQueue) + cont.TuiSearcherWorker.SearchString = strings.Split(query, " ") + + go cont.TuiFileWalker.Start() + go cont.TuiFileReaderWorker.Start() + go cont.TuiSearcherWorker.Start() + + // Updated with results as we get them NB this is + // painted as we go + var results []*processor.FileJob + var resultsMutex sync.Mutex + update := true + + go func() { + for update { + // Every 50 ms redraw the current set of results + resultsMutex.Lock() + cont.Sync.Lock() + cont.Results = results + cont.Sync.Unlock() + resultsMutex.Unlock() + + cont.SetChanged(true) + time.Sleep(50 * time.Millisecond) + } + }() + + for res := range summaryQueue { + resultsMutex.Lock() + results = append(results, res) + resultsMutex.Unlock() + } + + update = false + + cont.Sync.Lock() + cont.Results = results + cont.Sync.Unlock() + cont.SetChanged(true) +} + +func (cont *tuiApplicationController) updateView() { + // render loop running background is the only thing responsible for updating the results based on the state + // in the applicationController + go func() { + // Used to show what is happening on the page + var spinString = `\|/-` + var spinLocation = 0 + var spinRun = 0 + + for { + status := "" + if cont.TuiFileWalker != nil { + status = fmt.Sprintf("%d results(s) for '%s' from %d files", len(cont.Results), cont.Query, cont.TuiFileReaderWorker.GetFileCount()) + if cont.GetRunning() { + status = fmt.Sprintf("%d results(s) for '%s' from %d files %s", len(cont.Results), cont.Query, cont.TuiFileReaderWorker.GetFileCount(), string(spinString[spinLocation])) + + spinRun++ + if spinRun == 4 { + spinLocation++ + if spinLocation >= len(spinString) { + spinLocation = 0 + } + spinRun = 0 + cont.SetChanged(true) + } + } + } + + fmt.Println(status) + //cont.drawResults(displayResults, codeResults, status, resultsFlex, statusView) + time.Sleep(30 * time.Millisecond) + } + }() +} + +func (cont *tuiApplicationController) processSearch() { + // we only ever want to have one search trigger at a time which is what this does + // searches come in... we trigger them to run + go func() { + for { + cont.doSearch() + time.Sleep(5 * time.Millisecond) + } + }() +} + +func NewTuiApplication() { + //tviewApplication := tview.NewApplication() + //applicationController := tuiApplicationController{} + // + //var overallFlex *tview.Flex + //var inputField *tview.InputField + //var queryFlex *tview.Flex + //var resultsFlex *tview.Flex + //var statusView *tview.TextView + //var displayResults []displayResult + + //if err := tviewApplication.SetRoot(overallFlex, true).SetFocus(inputField).Run(); err != nil { + // panic(err) + //} } func main() { - app := tview.NewApplication() - drawResultsState := drawResultsStruct{} + tviewApplication := tview.NewApplication() + applicationController := tuiApplicationController{ + TviewApplication: tviewApplication, + Sync: sync.Mutex{}, + } + applicationController.updateView() + applicationController.processSearch() var overallFlex *tview.Flex var inputField *tview.InputField @@ -110,9 +287,9 @@ func main() { var statusView *tview.TextView var displayResults []displayResult - var codeResults []codeResult - // Sets up all of the UI components we need to actually display + + // Create the elements we use to display the code results here for i := 1; i < 50; i++ { var textViewTitle *tview.TextView var textViewBody *tview.TextView @@ -134,8 +311,7 @@ func main() { }) } - selected := 0 - + // input field which deals with the user input for the main search which ultimately triggers a search inputField = tview.NewInputField(). SetFieldBackgroundColor(tcell.Color16). SetLabel("> "). @@ -144,29 +320,28 @@ func main() { SetDoneFunc(func(key tcell.Key) { switch key { case tcell.KeyEnter: - app.Stop() + tviewApplication.Stop() // we want to work like fzf for piping into other things hence print out the selected version - fmt.Println(codeResults[selected].Title) + if len(applicationController.Results) != 0 { + fmt.Println(applicationController.Results[applicationController.GetOffset()].Location) + } os.Exit(0) case tcell.KeyTab: - //app.SetFocus(textView) need to change focus to the others but not the text itself + //tviewApplication.SetFocus(textView) need to change focus to the others but not the text itself case tcell.KeyUp: - if selected != 0 { - selected-- - } - drawResultsState.SetRunning(true) - drawResultsState.SetChanged(true) + applicationController.DecrementOffset() + applicationController.SetChanged(true) case tcell.KeyDown: - if selected != len(codeResults)-1 { - selected++ - } - drawResultsState.SetRunning(false) - drawResultsState.SetChanged(true) + applicationController.IncrementOffset() + applicationController.SetChanged(true) + case tcell.KeyESC: + tviewApplication.Stop() + os.Exit(0) } }). SetChangedFunc(func(text string) { text = strings.TrimSpace(text) - drawResultsState.Query = text + applicationController.Query = text }) statusView = tview.NewTextView(). @@ -194,55 +369,10 @@ func main() { resultsFlex.AddItem(t.Body, t.BodyHeight, 1, false) } - // add in a few results just to get things going - for i := 1; i < 21; i++ { - codeResults = append(codeResults, codeResult{ - Title: fmt.Sprintf(`main.go`), - Score: float64(i), - Content: fmt.Sprintf(`func NewFlex%d() *Flex { - f := &Flex{ - Box: NewBox().SetBackgroundColor(tcell.ColorDefault), - direction: [red]FlexColumn[white], - } - f.focus = f - return f -}`, i), - }) - } - // trigger the first render without user action - drawResultsState.SetChanged(true) - - // render loop running background is the only thing responsible for updating the results - go func() { - // Used to show what is happening on the page - var spinString = `\|/-` - var spinLocation = 0 - var spinRun = 0 + applicationController.SetChanged(true) - for { - status := fmt.Sprintf("%d results(s) for '%s' from %d files", len(codeResults), drawResultsState.Query, 87) - if drawResultsState.GetRunning() { - status = fmt.Sprintf("%d results(s) for '%s' from %d files %s", len(codeResults), drawResultsState.Query, 87, string(spinString[spinLocation])) - - spinRun++ - if spinRun == 4 { - spinLocation++ - if spinLocation >= len(spinString) { - spinLocation = 0 - } - spinRun = 0 - drawResultsState.SetChanged(true) - } - } - - drawResultsState.drawResults(displayResults, codeResults, selected, status, resultsFlex, statusView, app) - time.Sleep(30 * time.Millisecond) - } - }() - - if err := app.SetRoot(overallFlex, true).SetFocus(inputField).Run(); err != nil { + if err := tviewApplication.SetRoot(overallFlex, true).SetFocus(inputField).Run(); err != nil { panic(err) } } - diff --git a/main.go b/main.go index c30a003..ad2512f 100644 --- a/main.go +++ b/main.go @@ -182,7 +182,7 @@ func main() { &processor.Ranker, "ranker", "bm25", - "set ranking algorithm [wc, tfidf, tfidf2, bm25]", + "set ranking algorithm [simple, tfidf, tfidf2, bm25]", ) flags.StringVarP( &processor.FileOutput, diff --git a/processor/fuzz.go b/processor/fuzz.go index 1096fd0..ccd2bd9 100644 --- a/processor/fuzz.go +++ b/processor/fuzz.go @@ -24,15 +24,15 @@ func Fuzz(data []byte) int { freq := map[string]int{} freq[find[:2]] = 5 - res := &fileJob{ + res := &FileJob{ Content: data, MatchLocations: loc, } extractRelevantV3(res, freq, 300, "...") - findSpaceRight(&fileJob{Content: data}, 0, 10000) - findSpaceLeft(&fileJob{Content: data}, len(data)-1, 10000) + findSpaceRight(&FileJob{Content: data}, 0, 10000) + findSpaceLeft(&FileJob{Content: data}, len(data)-1, 10000) return 1 } diff --git a/processor/http.go b/processor/http.go index 995df99..f48bb20 100644 --- a/processor/http.go +++ b/processor/http.go @@ -114,7 +114,7 @@ func StartHttpServer() { page := tryParseInt(r.URL.Query().Get("p"), 0) pageSize := 20 - var results []*fileJob + var results []*FileJob var fileCount int64 log.Info(). @@ -136,8 +136,8 @@ func StartHttpServer() { } fileQueue := make(chan *file.File, 1000) // Files ready to be read from disk NB we buffer here because http runs till finished or the process is cancelled - toProcessQueue := make(chan *fileJob, runtime.NumCPU()) // Files to be read into memory for processing - summaryQueue := make(chan *fileJob, runtime.NumCPU()) // Files that match and need to be displayed + toProcessQueue := make(chan *FileJob, runtime.NumCPU()) // Files to be read into memory for processing + summaryQueue := make(chan *FileJob, runtime.NumCPU()) // Files that match and need to be displayed fileWalker := file.NewFileWalker(directory, fileQueue) fileWalker.PathExclude = PathDenylist @@ -314,7 +314,7 @@ func calculateExtensionFacet(extensionFacets map[string]int, query string, snipp return ef } -func calculatePages(results []*fileJob, pageSize int, query string, snippetLength int) []pageResult { +func calculatePages(results []*FileJob, pageSize int, query string, snippetLength int) []pageResult { var pages []pageResult if len(results) == 0 { diff --git a/processor/http_test.go b/processor/http_test.go index 8194d2d..0229882 100644 --- a/processor/http_test.go +++ b/processor/http_test.go @@ -7,7 +7,7 @@ import ( ) func TestCalculatePagesNone(t *testing.T) { - var pages = calculatePages([]*fileJob{}, 20, "", 100) + var pages = calculatePages([]*FileJob{}, 20, "", 100) if len(pages) != 0 { t.Error("expected no result") @@ -15,7 +15,7 @@ func TestCalculatePagesNone(t *testing.T) { } func TestCalculatePagesSingle(t *testing.T) { - var pages = calculatePages([]*fileJob{ + var pages = calculatePages([]*FileJob{ {}, }, 20, "", 100) @@ -33,9 +33,9 @@ func TestCalculatePagesSingle(t *testing.T) { } func TestCalculatePagesEdgeStart(t *testing.T) { - var fj []*fileJob + var fj []*FileJob for i := 0; i < 20; i++ { - fj = append(fj, &fileJob{}) + fj = append(fj, &FileJob{}) } var pages = calculatePages(fj, 20, "", 100) @@ -46,9 +46,9 @@ func TestCalculatePagesEdgeStart(t *testing.T) { } func TestCalculatePagesEdgeOver(t *testing.T) { - var fj []*fileJob + var fj []*FileJob for i := 0; i < 21; i++ { - fj = append(fj, &fileJob{}) + fj = append(fj, &FileJob{}) } var pages = calculatePages(fj, 20, "", 100) @@ -59,9 +59,9 @@ func TestCalculatePagesEdgeOver(t *testing.T) { } func TestCalculatePagesSecondPageEdge(t *testing.T) { - var fj []*fileJob + var fj []*FileJob for i := 0; i < 40; i++ { - fj = append(fj, &fileJob{}) + fj = append(fj, &FileJob{}) } var pages = calculatePages(fj, 20, "", 100) @@ -72,9 +72,9 @@ func TestCalculatePagesSecondPageEdge(t *testing.T) { } func TestCalculatePagesSecondPageEdgeOver(t *testing.T) { - var fj []*fileJob + var fj []*FileJob for i := 0; i < 41; i++ { - fj = append(fj, &fileJob{}) + fj = append(fj, &FileJob{}) } var pages = calculatePages(fj, 20, "", 100) diff --git a/processor/processor.go b/processor/processor.go index 6204c2f..bb8b449 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -51,8 +51,8 @@ func (process *Process) StartProcess() { } fileQueue := make(chan *file.File, 1000) // Files ready to be read from disk NB we buffer here because CLI runs till finished or the process is cancelled - toProcessQueue := make(chan *fileJob, runtime.NumCPU()) // Files to be read into memory for processing - summaryQueue := make(chan *fileJob, runtime.NumCPU()) // Files that match and need to be displayed + toProcessQueue := make(chan *FileJob, runtime.NumCPU()) // Files to be read into memory for processing + summaryQueue := make(chan *FileJob, runtime.NumCPU()) // Files that match and need to be displayed fileWalker := file.NewFileWalker(process.Directory, fileQueue) fileWalker.PathExclude = PathDenylist diff --git a/processor/result_ranker.go b/processor/result_ranker.go index fb62fac..484ca62 100644 --- a/processor/result_ranker.go +++ b/processor/result_ranker.go @@ -15,20 +15,22 @@ import ( // Note that this method will evolve over time // and as such you should never rely on the returned results being // the same -func rankResults(corpusCount int, results []*fileJob) []*fileJob { +func rankResults(corpusCount int, results []*FileJob) []*FileJob { // needs to come first because it resets the scores switch Ranker { - case "wc": - results = results + case "simple": + // in this case the results are already ranked by the number of matches case "bm25": results = rankResultsBM25(corpusCount, results, calculateDocumentFrequency(results)) + results = rankResultsLocation(results) case "tfidf2": results = rankResultsTFIDF(corpusCount, results, calculateDocumentFrequency(results), false) + results = rankResultsLocation(results) default: results = rankResultsTFIDF(corpusCount, results, calculateDocumentFrequency(results), true) + results = rankResultsLocation(results) } - results = rankResultsLocation(results) // TODO maybe need to add something here to reward phrases sortResults(results) return results @@ -47,7 +49,7 @@ const ( // heavy. This is fairly similar to how the snippet extraction works but with less work because it does // not need to deal with cutting between unicode endpoints // NB this is one of the more expensive parts of the ranking -func rankResultsPhrase(results []*fileJob, documentFrequencies map[string]int) []*fileJob { +func rankResultsPhrase(results []*FileJob, documentFrequencies map[string]int) []*FileJob { for i := 0; i < len(results); i++ { rv3 := convertToRelevant(results[i]) @@ -72,7 +74,7 @@ func rankResultsPhrase(results []*fileJob, documentFrequencies map[string]int) [ // file location field. // This is not using TF-IDF or any fancy algorithm just basic checks // and boosts -func rankResultsLocation(results []*fileJob) []*fileJob { +func rankResultsLocation(results []*FileJob) []*FileJob { for i := 0; i < len(results); i++ { foundTerms := 0 for key := range results[i].MatchLocations { @@ -134,7 +136,7 @@ func rankResultsLocation(results []*fileJob) []*fileJob { // NB loops in here use increment to avoid duffcopy // https://stackoverflow.com/questions/45786687/runtime-duffcopy-is-called-a-lot // due to how often it is called by things like the TUI mode -func rankResultsTFIDF(corpusCount int, results []*fileJob, documentFrequencies map[string]int, classic bool) []*fileJob { +func rankResultsTFIDF(corpusCount int, results []*FileJob, documentFrequencies map[string]int, classic bool) []*FileJob { var weight float64 for i := 0; i < len(results); i++ { weight = 0 @@ -198,7 +200,7 @@ func rankResultsTFIDF(corpusCount int, results []*fileJob, documentFrequencies m // IDF * TF * (k1 + 1) // BM25 = sum ---------------------------- // TF + k1 * (1 - b + b * D / L) -func rankResultsBM25(corpusCount int, results []*fileJob, documentFrequencies map[string]int) []*fileJob { +func rankResultsBM25(corpusCount int, results []*FileJob, documentFrequencies map[string]int) []*FileJob { var weight float64 // Get the average number of words across all documents because we need that in BM25 to calculate correctly @@ -247,7 +249,7 @@ func rankResultsBM25(corpusCount int, results []*fileJob, documentFrequencies ma // Calculate the document term frequency for all words across all documents // letting us know how many times a term appears across the corpus // This is mostly used for snippet extraction -func calculateDocumentTermFrequency(results []*fileJob) map[string]int { +func calculateDocumentTermFrequency(results []*FileJob) map[string]int { documentFrequencies := map[string]int{} for i := 0; i < len(results); i++ { for k := range results[i].MatchLocations { @@ -261,7 +263,7 @@ func calculateDocumentTermFrequency(results []*fileJob) map[string]int { // Calculate the document frequency for all words across all documents // allowing us to know the number of documents for which a term appears // This is mostly used for TF-IDF calculation -func calculateDocumentFrequency(results []*fileJob) map[string]int { +func calculateDocumentFrequency(results []*FileJob) map[string]int { documentFrequencies := map[string]int{} for i := 0; i < len(results); i++ { for k := range results[i].MatchLocations { @@ -276,7 +278,7 @@ func calculateDocumentFrequency(results []*fileJob) map[string]int { // and then sort based on location to stop any undeterministic ordering happening // as since the location includes the filename we should never have two matches // that are 100% equal based on the two criteria we use. -func sortResults(results []*fileJob) { +func sortResults(results []*FileJob) { sort.Slice(results, func(i, j int) bool { if results[i].Score == results[j].Score { return strings.Compare(results[i].Location, results[j].Location) < 0 diff --git a/processor/result_ranker_test.go b/processor/result_ranker_test.go index 11b0f6e..38dc356 100644 --- a/processor/result_ranker_test.go +++ b/processor/result_ranker_test.go @@ -14,7 +14,7 @@ func TestRankResultsTFIDFTraditional(t *testing.T) { ml2 := map[string][][]int{} ml2["example"] = [][]int{{1}, {2}, {3}} - s := []*fileJob{ + s := []*FileJob{ { MatchLocations: ml1, Location: "/test/other.go", @@ -42,7 +42,7 @@ func TestRankResultsTFIDFComparison(t *testing.T) { ml1 := map[string][][]int{} ml1["example"] = [][]int{{1}, {2}, {3}} - s := []*fileJob{ + s := []*FileJob{ { MatchLocations: ml1, Location: "/test/other.go", @@ -65,7 +65,7 @@ func TestRankResultsRankerComparison(t *testing.T) { ml1 := map[string][][]int{} ml1["example"] = [][]int{{1}, {2}, {3}} - s := []*fileJob{ + s := []*FileJob{ { MatchLocations: ml1, Location: "/test/other.go", @@ -91,7 +91,7 @@ func TestRankResultsLocation(t *testing.T) { ml := map[string][][]int{} ml["test"] = [][]int{{1}, {2}, {3}} - s := []*fileJob{ + s := []*FileJob{ { MatchLocations: ml, Location: "/test/other.go", @@ -113,7 +113,7 @@ func TestCalculateDocumentFrequency(t *testing.T) { ml := map[string][][]int{} ml["test"] = [][]int{{1}, {2}, {3}} - s := []*fileJob{ + s := []*FileJob{ { MatchLocations: ml, }, @@ -130,7 +130,7 @@ func TestCalculateDocumentFrequency(t *testing.T) { } func TestSortResults(t *testing.T) { - s := []*fileJob{ + s := []*FileJob{ { Filename: "1", Location: "", @@ -150,7 +150,7 @@ func TestSortResults(t *testing.T) { } func TestSortResultsEqualScore(t *testing.T) { - s := []*fileJob{ + s := []*FileJob{ { Filename: "1", Location: "2", diff --git a/processor/snippet.go b/processor/snippet.go index cdd1183..d7aec2d 100644 --- a/processor/snippet.go +++ b/processor/snippet.go @@ -74,7 +74,7 @@ type Snippet struct { // to differ between people. Heck a few times I have been disappointed with results that I was previously happy with. // As such this is not tested as much as other methods and you should not rely on the results being static over time // as the internals will be modified to produce better results where possible -func extractRelevantV3(res *fileJob, documentFrequencies map[string]int, relLength int, indicator string) []Snippet { +func extractRelevantV3(res *FileJob, documentFrequencies map[string]int, relLength int, indicator string) []Snippet { wrapLength := relLength / 2 var bestMatches []bestMatch @@ -275,7 +275,7 @@ func extractRelevantV3(res *fileJob, documentFrequencies map[string]int, relLeng // Get all of the locations into a new data structure // which makes things easy to sort and deal with -func convertToRelevant(res *fileJob) []relevantV3 { +func convertToRelevant(res *FileJob) []relevantV3 { var rv3 []relevantV3 for k, v := range res.MatchLocations { @@ -299,7 +299,7 @@ func convertToRelevant(res *fileJob) []relevantV3 { // Looks for a nearby whitespace character near this position (`pos`) // up to `distance` away. Returns index of space if a space was found and // true, otherwise returns the original index and false -func findSpaceRight(res *fileJob, pos int, distance int) (int, bool) { +func findSpaceRight(res *FileJob, pos int, distance int) (int, bool) { if len(res.Content) == 0 { return pos, false } @@ -322,7 +322,7 @@ func findSpaceRight(res *fileJob, pos int, distance int) (int, bool) { // Looks for nearby whitespace character near this position // up to distance away. Returns index of space if a space was found and tru // otherwise the original index is return and false -func findSpaceLeft(res *fileJob, pos int, distance int) (int, bool) { +func findSpaceLeft(res *FileJob, pos int, distance int) (int, bool) { if len(res.Content) == 0 { return pos, false } diff --git a/processor/snippet_test.go b/processor/snippet_test.go index 1bd6dc1..5e3115d 100644 --- a/processor/snippet_test.go +++ b/processor/snippet_test.go @@ -41,7 +41,7 @@ func TestFindSpaceRight(t *testing.T) { } for i, c := range cases { - pos, found := findSpaceRight(&fileJob{Content: []byte(c.s)}, c.startpos, c.distance) + pos, found := findSpaceRight(&FileJob{Content: []byte(c.s)}, c.startpos, c.distance) if pos != c.want { t.Error(" pos for", i, "wanted", c.want, "got", pos) @@ -76,7 +76,7 @@ func TestFindSpaceLeft(t *testing.T) { } for i, c := range cases { - pos, found := findSpaceLeft(&fileJob{Content: []byte(c.s)}, c.startpos, c.distance) + pos, found := findSpaceLeft(&FileJob{Content: []byte(c.s)}, c.startpos, c.distance) if pos != c.want { t.Error(" pos for", i, "wanted", c.want, "got", pos) @@ -95,7 +95,7 @@ func TestExtractRelevantV3PaintedShip(t *testing.T) { "ocean", } - res := &fileJob{ + res := &FileJob{ Content: []byte(rhymeOfTheAncient), MatchLocations: map[string][][]int{}, } @@ -104,7 +104,7 @@ func TestExtractRelevantV3PaintedShip(t *testing.T) { res.MatchLocations[t] = str.IndexAllIgnoreCaseUnicode(rhymeOfTheAncient, t, -1) } - df := calculateDocumentTermFrequency([]*fileJob{res}) + df := calculateDocumentTermFrequency([]*FileJob{res}) snippets := extractRelevantV3(res, df, 300, "") if !strings.Contains(snippets[0].Content, `Day after day, day after day, @@ -123,7 +123,7 @@ func TestExtractRelevantV3WaterWaterEverywhere(t *testing.T) { "drink", } - res := &fileJob{ + res := &FileJob{ Content: []byte(rhymeOfTheAncient), MatchLocations: map[string][][]int{}, } @@ -132,7 +132,7 @@ func TestExtractRelevantV3WaterWaterEverywhere(t *testing.T) { res.MatchLocations[t] = str.IndexAllIgnoreCaseUnicode(rhymeOfTheAncient, t, -1) } - df := calculateDocumentTermFrequency([]*fileJob{res}) + df := calculateDocumentTermFrequency([]*FileJob{res}) snippets := extractRelevantV3(res, df, 300, "") if !strings.Contains(snippets[0].Content, `Water, water, every where, @@ -149,7 +149,7 @@ func TestExtractRelevantV3GroanedDead(t *testing.T) { "dead", } - res := &fileJob{ + res := &FileJob{ Content: []byte(rhymeOfTheAncient), MatchLocations: map[string][][]int{}, } @@ -158,7 +158,7 @@ func TestExtractRelevantV3GroanedDead(t *testing.T) { res.MatchLocations[t] = str.IndexAllIgnoreCaseUnicode(rhymeOfTheAncient, t, -1) } - df := calculateDocumentTermFrequency([]*fileJob{res}) + df := calculateDocumentTermFrequency([]*FileJob{res}) snippets := extractRelevantV3(res, df, 300, "") if !strings.Contains(snippets[0].Content, `They groaned, they stirred, they all uprose, @@ -175,7 +175,7 @@ func TestExtractRelevantV3DeathFires(t *testing.T) { "fires", } - res := &fileJob{ + res := &FileJob{ Content: []byte(rhymeOfTheAncient), MatchLocations: map[string][][]int{}, } @@ -184,7 +184,7 @@ func TestExtractRelevantV3DeathFires(t *testing.T) { res.MatchLocations[t] = str.IndexAllIgnoreCaseUnicode(rhymeOfTheAncient, t, -1) } - df := calculateDocumentTermFrequency([]*fileJob{res}) + df := calculateDocumentTermFrequency([]*FileJob{res}) snippets := extractRelevantV3(res, df, 300, "") if !strings.Contains(snippets[0].Content, `About, about, in reel and rout @@ -201,7 +201,7 @@ func TestExtractRelevantV3PoorNerves(t *testing.T) { "nerves", } - res := &fileJob{ + res := &FileJob{ Content: []byte(prideAndPrejudice), MatchLocations: map[string][][]int{}, } @@ -210,7 +210,7 @@ func TestExtractRelevantV3PoorNerves(t *testing.T) { res.MatchLocations[t] = str.IndexAllIgnoreCaseUnicode(prideAndPrejudice, t, -1) } - df := calculateDocumentTermFrequency([]*fileJob{res}) + df := calculateDocumentTermFrequency([]*FileJob{res}) snippets := extractRelevantV3(res, df, 300, "") if !strings.Contains(snippets[0].Content, `You take delight in vexing me. You have no compassion for my poor @@ -227,7 +227,7 @@ func TestExtractRelevantV3TenThousandAYear(t *testing.T) { "year", } - res := &fileJob{ + res := &FileJob{ Content: []byte(prideAndPrejudice), MatchLocations: map[string][][]int{}, } @@ -236,7 +236,7 @@ func TestExtractRelevantV3TenThousandAYear(t *testing.T) { res.MatchLocations[t] = str.IndexAllIgnoreCaseUnicode(prideAndPrejudice, t, -1) } - df := calculateDocumentTermFrequency([]*fileJob{res}) + df := calculateDocumentTermFrequency([]*FileJob{res}) snippets := extractRelevantV3(res, df, 300, "") if !strings.Contains(snippets[0].Content, `of his having @@ -251,7 +251,7 @@ func TestExtractRelevantV3StrangerParents(t *testing.T) { "parents", } - res := &fileJob{ + res := &FileJob{ Content: []byte(prideAndPrejudice), MatchLocations: map[string][][]int{}, } @@ -260,7 +260,7 @@ func TestExtractRelevantV3StrangerParents(t *testing.T) { res.MatchLocations[t] = str.IndexAllIgnoreCaseUnicode(prideAndPrejudice, t, -1) } - df := calculateDocumentTermFrequency([]*fileJob{res}) + df := calculateDocumentTermFrequency([]*FileJob{res}) snippets := extractRelevantV3(res, df, 300, "") if !strings.Contains(snippets[0].Content, `An unhappy alternative is before you, Elizabeth. From this day diff --git a/processor/structs.go b/processor/structs.go index 817ac15..a591ced 100644 --- a/processor/structs.go +++ b/processor/structs.go @@ -2,8 +2,8 @@ package processor -// fileJob is a struct used to hold all of the results of processing internally before sent to the formatter -type fileJob struct { +// FileJob is a struct used to hold all of the results of processing internally before sent to the formatter +type FileJob struct { Filename string Extension string Location string diff --git a/processor/tui.go b/processor/tui.go index d15277a..29f6235 100644 --- a/processor/tui.go +++ b/processor/tui.go @@ -98,8 +98,8 @@ func tuiSearch(app *tview.Application, textView *tview.TextView, searchTerm sear } fileQueue := make(chan *file.File) // NB unbuffered because we want the UI to respond and this is what causes affects - toProcessQueue := make(chan *fileJob, runtime.NumCPU()) // Files to be read into memory for processing - summaryQueue := make(chan *fileJob, runtime.NumCPU()) // Files that match and need to be displayed + toProcessQueue := make(chan *FileJob, runtime.NumCPU()) // Files to be read into memory for processing + summaryQueue := make(chan *FileJob, runtime.NumCPU()) // Files that match and need to be displayed tuiFileWalker = file.NewFileWalker(startDirectory, fileQueue) tuiFileWalker.IgnoreIgnoreFile = IgnoreIgnoreFile @@ -133,7 +133,7 @@ func tuiSearch(app *tview.Application, textView *tview.TextView, searchTerm sear // Updated with results as we get them NB this is // painted as we go - var results []*fileJob + var results []*FileJob // Used to display a spinner indicating a search is happening var spinLocation int @@ -168,7 +168,7 @@ func tuiSearch(app *tview.Application, textView *tview.TextView, searchTerm sear debugCount++ } -func drawResults(app *tview.Application, results []*fileJob, textView *tview.TextView, searchTerm string, fileCount int64, inProgress string) { +func drawResults(app *tview.Application, results []*FileJob, textView *tview.TextView, searchTerm string, fileCount int64, inProgress string) { rankResults(int(fileCount), results) // TODO this should not be hardcoded @@ -182,10 +182,9 @@ func drawResults(app *tview.Application, results []*fileJob, textView *tview.Tex documentTermFrequency := calculateDocumentTermFrequency(results) for i, res := range pResults { - // NB this just gets the first snippet which should in theory be the most relevant snippets := extractRelevantV3(res, documentTermFrequency, int(SnippetLength), "…") - resultText += fmt.Sprintf("[purple]%d. %s (%.3f)", i+1, res.Location, res.Score) + "[white]\n\n" + resultText += fmt.Sprintf("[fuchsia]%d. %s (%.3f)", i+1, res.Location, res.Score) + "[white]\n\n" if int64(len(snippets)) > SnippetCount { snippets = snippets[:SnippetCount] diff --git a/processor/tui_helpers.go b/processor/tui_helpers.go index a86db89..681b599 100644 --- a/processor/tui_helpers.go +++ b/processor/tui_helpers.go @@ -7,7 +7,7 @@ import ( "strconv" ) -func getLocated(res *fileJob, v3 Snippet) [][]int { +func getLocated(res *FileJob, v3 Snippet) [][]int { var l [][]int // For all of the match locations we have only keep the ones that should be inside diff --git a/processor/worker_file.go b/processor/worker_file.go index 4d1ef02..3bf3138 100644 --- a/processor/worker_file.go +++ b/processor/worker_file.go @@ -16,14 +16,14 @@ import ( type FileReaderWorker struct { input chan *file.File - output chan *fileJob + output chan *FileJob fileCount int64 // Count of the number of files that have been read InstanceId int SearchPDF bool MaxReadSizeBytes int64 } -func NewFileReaderWorker(input chan *file.File, output chan *fileJob) *FileReaderWorker { +func NewFileReaderWorker(input chan *file.File, output chan *FileJob) *FileReaderWorker { return &FileReaderWorker{ input: input, output: output, @@ -70,7 +70,7 @@ func (f *FileReaderWorker) processPdf(res *file.File) { c, ok := __pdfCache[res.Location] if ok { atomic.AddInt64(&f.fileCount, 1) - f.output <- &fileJob{ + f.output <- &FileJob{ Filename: res.Filename, Extension: "", Location: res.Location, @@ -92,7 +92,7 @@ func (f *FileReaderWorker) processPdf(res *file.File) { __pdfCache[res.Location] = content atomic.AddInt64(&f.fileCount, 1) - f.output <- &fileJob{ + f.output <- &FileJob{ Filename: res.Filename, Extension: "", Location: res.Location, @@ -132,7 +132,7 @@ func (f *FileReaderWorker) processUnknown(res *file.File) { if err == nil { atomic.AddInt64(&f.fileCount, 1) - f.output <- &fileJob{ + f.output <- &FileJob{ Filename: res.Filename, Extension: "", Location: res.Location, diff --git a/processor/worker_searcher.go b/processor/worker_searcher.go index 8fca81a..31aaa1d 100644 --- a/processor/worker_searcher.go +++ b/processor/worker_searcher.go @@ -13,8 +13,8 @@ import ( ) type SearcherWorker struct { - input chan *fileJob - output chan *fileJob + input chan *FileJob + output chan *FileJob searchParams []searchParams FileCount int64 // Count of the number of files that have been processed BinaryCount int64 // Count the number of binary files @@ -28,7 +28,7 @@ type SearcherWorker struct { MinifiedLineByteLength int } -func NewSearcherWorker(input chan *fileJob, output chan *fileJob) *SearcherWorker { +func NewSearcherWorker(input chan *FileJob, output chan *FileJob) *SearcherWorker { return &SearcherWorker{ input: input, output: output, diff --git a/processor/worker_summarize.go b/processor/worker_summarize.go index 11bc981..7b1a186 100644 --- a/processor/worker_summarize.go +++ b/processor/worker_summarize.go @@ -13,7 +13,7 @@ import ( ) type ResultSummarizer struct { - input chan *fileJob + input chan *FileJob ResultLimit int64 FileReaderWorker *FileReaderWorker SnippetCount int64 @@ -22,7 +22,7 @@ type ResultSummarizer struct { FileOutput string } -func NewResultSummarizer(input chan *fileJob) ResultSummarizer { +func NewResultSummarizer(input chan *FileJob) ResultSummarizer { return ResultSummarizer{ input: input, ResultLimit: -1, @@ -35,7 +35,7 @@ func NewResultSummarizer(input chan *fileJob) ResultSummarizer { func (f *ResultSummarizer) Start() { // First step is to collect results so we can rank them - results := []*fileJob{} + results := []*FileJob{} for res := range f.input { results = append(results, res) } @@ -58,7 +58,7 @@ func (f *ResultSummarizer) Start() { } } -func (f *ResultSummarizer) formatJson(results []*fileJob) { +func (f *ResultSummarizer) formatJson(results []*FileJob) { var jsonResults []jsonResult documentFrequency := calculateDocumentTermFrequency(results) @@ -99,7 +99,7 @@ func (f *ResultSummarizer) formatJson(results []*fileJob) { } } -func (f *ResultSummarizer) formatDefault(results []*fileJob) { +func (f *ResultSummarizer) formatDefault(results []*FileJob) { fmtBegin := "\033[1;31m" fmtEnd := "\033[0m" if f.NoColor { diff --git a/processor/worker_summarize_test.go b/processor/worker_summarize_test.go index 3099131..5b59461 100644 --- a/processor/worker_summarize_test.go +++ b/processor/worker_summarize_test.go @@ -35,9 +35,9 @@ func TestPrintError(t *testing.T) { //func TestToJson(t *testing.T) { // ResultLimit = 100 -// fileListQueue := make(chan *fileJob, 100) +// fileListQueue := make(chan *FileJob, 100) // -// fileListQueue <- &fileJob{ +// fileListQueue <- &FileJob{ // Filename: "", // Extension: "", // Location: "", @@ -59,9 +59,9 @@ func TestPrintError(t *testing.T) { //func TestToJsonMultiple(t *testing.T) { // ResultLimit = 100 -// fileListQueue := make(chan *fileJob, 100) +// fileListQueue := make(chan *FileJob, 100) // -// fileListQueue <- &fileJob{ +// fileListQueue <- &FileJob{ // Filename: "Something", // Extension: "", // Location: "", @@ -74,7 +74,7 @@ func TestPrintError(t *testing.T) { // } // // for i := 0; i < 10; i++ { -// fileListQueue <- &fileJob{ +// fileListQueue <- &FileJob{ // Filename: strconv.Itoa(i), // Extension: "", // Location: "", @@ -98,9 +98,9 @@ func TestPrintError(t *testing.T) { //func TestFileSummerize(t *testing.T) { // ResultLimit = 100 // Format = "text" -// fileListQueue := make(chan *fileJob, 100) +// fileListQueue := make(chan *FileJob, 100) // -// fileListQueue <- &fileJob{ +// fileListQueue <- &FileJob{ // Filename: "Something", // Extension: "", // Location: "", @@ -113,7 +113,7 @@ func TestPrintError(t *testing.T) { // } // // for i := 0; i < 10; i++ { -// fileListQueue <- &fileJob{ +// fileListQueue <- &FileJob{ // Filename: strconv.Itoa(i), // Extension: "", // Location: "",