Skip to content

Commit

Permalink
Do not re-compute page_entities files unless needed
Browse files Browse the repository at this point in the history
Fixes #33.
  • Loading branch information
brawer committed May 8, 2024
1 parent 558cfaa commit 8c6d60e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
2 changes: 1 addition & 1 deletion cmd/qrank-builder/pageentities.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func buildPageEntities(ctx context.Context, dumps string, sites *map[string]Wiki

for _, site := range *sites {
ymd := site.LastDumped.Format("20060102")
if arr, ok := stored[ymd]; !ok || !slices.Contains(arr, ymd) {
if arr, ok := stored[site.Key]; !ok || !slices.Contains(arr, ymd) {
tasks <- site
}
}
Expand Down
15 changes: 12 additions & 3 deletions cmd/qrank-builder/pageentities_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ func TestBuildPageEntities(t *testing.T) {
ctx := context.Background()
dumps := filepath.Join("testdata", "dumps")
s3 := NewFakeS3()
s3.data["page_entities/rmwiki-20010203-page_entities.zst"] = []byte("old")
s3.data["page_entities/loginwiki-20240501-page_entities.zst"] = []byte("old-loginwiki")
s3.data["page_entities/rmwiki-20010203-page_entities.zst"] = []byte("old-rmwiki")
sites, err := ReadWikiSites(dumps)
if err != nil {
t.Fatal(err)
Expand All @@ -32,6 +33,14 @@ func TestBuildPageEntities(t *testing.T) {
t.Fatal(err)
}

// page_entities should be cached across pipeline runs
// https://github.com/brawer/wikidata-qrank/issues/33
got := string(s3.data["page_entities/loginwiki-20240501-page_entities.zst"])
want := "old-loginwiki"
if got != want {
t.Errorf("previously stored page_entities should not re-computed")
}

path := "page_entities/rmwiki-20240301-page_entities.zst"
reader, err := zstd.NewReader(bytes.NewReader(s3.data[path]))
if err != nil {
Expand All @@ -43,8 +52,8 @@ func TestBuildPageEntities(t *testing.T) {
if _, err = io.Copy(&buf, reader); err != nil {
t.Error(err)
}
got := buf.String()
want := "1,Q5296\n3824,Q662541\n799,Q72\n"
got = buf.String()
want = "1,Q5296\n3824,Q662541\n799,Q72\n"
if got != want {
t.Errorf("got %v, want %v", got, want)
}
Expand Down

0 comments on commit 8c6d60e

Please sign in to comment.