Skip to content

Commit

Permalink
remove double results (fix #19)
Browse files Browse the repository at this point in the history
  • Loading branch information
dimus committed May 12, 2022
1 parent 08d8dcd commit 774e8d0
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 6 deletions.
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
.idea
t
*.json
*.tsv
*.csv
*t.json
*t.tsv
*t.csv
prof
.vscode
200k-lines.txt
Expand Down
2 changes: 2 additions & 0 deletions ent/matcher/match.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package matcher

import (
"fmt"
"strings"

"github.com/gnames/gndiff/ent/record"
Expand All @@ -14,6 +15,7 @@ func (m *matcher) Match(rec record.Record) ([]record.Record, error) {
}

res, err = m.MatchFuzzy(rec.Canonical.Simple, rec.Canonical.Stemmed)
fmt.Printf("FUZZY: %#v\n\n", res)
if len(res) > 0 || err != nil {
return res, err
}
Expand Down
30 changes: 30 additions & 0 deletions gndiff_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package gndiff_test

import (
"fmt"
"path/filepath"
"testing"

Expand Down Expand Up @@ -40,6 +41,35 @@ func TestScore(t *testing.T) {
assert.Equal("Abelia forrestii var. gracilenta (W.W.Sm.) Landrein", abelia.ReferenceRecords[0].Name)
}

// Issue #19: duplicated results for similar names
func TestNoDuplicates(t *testing.T) {
assert := assert.New(t)
cfg := config.New()
ing := ingestio.New(cfg)

src := filepath.Join(path, "issue-19-src.csv")
recSrc, err := ing.Records(src)
assert.Nil(err)

ref := filepath.Join(path, "issue-19-ref.csv")
recRef, err := ing.Records(ref)
assert.Nil(err)

gnd := gndiff.New(cfg)
res, err := gnd.Compare(recSrc, recRef)
assert.Nil(err)
fmt.Printf("SRC: %#v\n\n", len(recSrc))
for _, v := range res.Matches[0].ReferenceRecords {
fmt.Printf("RES: %#v\n\n", v)
}
assert.Equal(len(recSrc), len(res.Matches))

obione := res.Matches[0]
abelia := res.Matches[1]
bubo := res.Matches[2]
_, _, _ = obione, abelia, bubo
}

func TestGNdiff(t *testing.T) {
cfg := config.New()
ing := ingestio.New(cfg)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ require (
github.com/devopsfaith/bloomfilter v1.4.0
github.com/dgraph-io/badger/v2 v2.2007.4
github.com/dvirsky/levenshtein v0.0.0-20200624034316-59b26b61c3c8
github.com/gnames/gnames v1.0.0-RC1
github.com/gnames/gnames v1.0.0-RC2
github.com/gnames/gnfmt v0.2.0
github.com/gnames/gnlib v0.14.0
github.com/gnames/gnparser v1.6.5
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ github.com/georgysavva/scany v0.3.0/go.mod h1:q8QyrfXjmBk9iJD00igd4lbkAKEXAH/zIY
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/gin-contrib/sse v0.0.0-20170109093832-22d885f9ecc7/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s=
github.com/gin-gonic/gin v1.1.5-0.20170702092826-d459835d2b07/go.mod h1:7cKuhb5qV2ggCFctp2fJQ+ErvciLZrIeoOSOm6mUr7Y=
github.com/gnames/gnames v1.0.0-RC1 h1:lf/+5rBUczUYGDo9LBgwT9Xykkc0hdQqlvldlaKrHTo=
github.com/gnames/gnames v1.0.0-RC1/go.mod h1:RT99vAA83z/qxuVp9rH13YWtZk4hOIZ1N9fo5vWioYI=
github.com/gnames/gnames v1.0.0-RC2 h1:y6fc8ROHnOTN0zXPnkjvdCEhiBC0zvIFz67vEbUHCTk=
github.com/gnames/gnames v1.0.0-RC2/go.mod h1:RT99vAA83z/qxuVp9rH13YWtZk4hOIZ1N9fo5vWioYI=
github.com/gnames/gnfmt v0.1.0/go.mod h1:WG9c3CoiVrGc1SDsxLk7zjmv2B4UIzI00m4K5Khc/d0=
github.com/gnames/gnfmt v0.2.0 h1:CjE1HxdqyTwufua5wMCdILWnCsCfRiHe5G4TgxR8aAI=
github.com/gnames/gnfmt v0.2.0/go.mod h1:0Aog37s1ZNpmUwVQOf+lnx0SQq8r2EvfE/pLYGiJlJQ=
Expand Down
3 changes: 3 additions & 0 deletions testdata/issue-19-ref.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TaxonID,Family,ScientificName
1001,,Obione maritima (Alfredo) Pacino var. maritima
1002,,Obione maritima (Alfredo) Pacino subsp. maritima
4 changes: 4 additions & 0 deletions testdata/issue-19-src.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ScientificName
Obione maritima (Alfredo) Pacino var. maritimaa
Obione maritima (Alfredo) Di'Stefano subsp. maritima
Quercus lamarkensis

0 comments on commit 774e8d0

Please sign in to comment.