Skip to content

Commit

Permalink
fix csv and tsv files (close #25)
Browse files Browse the repository at this point in the history
  • Loading branch information
dimus committed Sep 6, 2023
1 parent d89a60d commit 6ff8edc
Show file tree
Hide file tree
Showing 45 changed files with 107 additions and 110 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ prof
200k-lines.txt
test_data.new.txt
.idea
coverage.txt
gnparser/gnparser
bench*.txt
binding/libgnparser.h
binding/*.so
build/**
.DS_Store
gndiff
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

## Unreleased

## [v0.2.2] - 2023-09-06 Wed

- Add: refactor to more standard file structure.
- Add: update modules to most recent.
- Fix [#25]: CSV and TSV files have correct number of fields.
- Fix [#19]: duplication of results of fuzzy matching.

## [v0.2.1] - 2022-05-11 Wed
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ ENV LAST_FULL_REBUILD 2021-12-27

WORKDIR /bin

COPY ./gndiff/gndiff /bin
COPY ./gndiff /bin

ENTRYPOINT [ "gndiff" ]
12 changes: 6 additions & 6 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License
The MIT License (MIT)

Copyright (c) 2021-2022 gnames
Copyright © 2021-2023 Dmitry Mozzherin <dmozzherin@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand All @@ -9,13 +9,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
52 changes: 30 additions & 22 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,73 +1,81 @@
PROJ_NAME = gndiff

VERSION = $(shell git describe --tags)
VER = $(shell git describe --tags --abbrev=0)
DATE = $(shell date -u '+%Y-%m-%d_%H:%M:%S%Z')

FLAGS_INTEL64 = GOARCH=amd64
NO_C = CGO_ENABLED=0
FLAGS_LINUX = $(FLAGS_INTEL) GOOS=linux
FLAGS_MAC = $(FLAGS_INTEL) GOOS=darwin
FLAGS_LINUX = $(FLAGS_INTEL64) GOOS=linux
FLAGS_MAC = $(FLAGS_INTEL64) GOOS=darwin
FLAGS_MAC_ARM = $GOARCH=arm64 GOOS=darwin
FLAGS_WIN = $(FLAGS_INTEL64) GOOS=windows
FLAGS_LD=-ldflags "-s -w -X github.com/gnames/gndiff.Build=${DATE} \
-X github.com/gnames/gndiff.Version=${VERSION}"
FLAGS_LD = -ldflags "-X github.com/gnames/$(PROJ_NAME)/pkg.Build=$(DATE) \
-X github.com/gnames/$(PROJ_NAME)/pkg.Version=$(VERSION)"
FLAGS_REL = -trimpath -ldflags "-s -w \
-X github.com/gnames/$(PROJ_NAME)/pkg.Build=$(DATE)"
GOCMD = go
GOBUILD = $(GOCMD) build $(FLAGS_LD)
GORELEASE = $(GOCMD) build $(FLAGS_REL)
GOINSTALL = $(GOCMD) install $(FLAGS_LD)
GOCLEAN = $(GOCMD) clean
GOGET = $(GOCMD) get

RELEASE_DIR ?= "/tmp"
BUILD_DIR ?= "."
CLIB_DIR ?= "."

all: install

test: deps install
$(FLAG_INTEL) go test -race ./...
@echo Run tests
$(GOCMD) test -shuffle=on -count=1 -race -coverprofile=coverage.txt ./...

test-build: deps build

deps:
@echo Download go.mod dependencies
$(GOCMD) mod download;

tools: deps
@echo Installing tools from tools.go
@cat gndiff/tools.go | grep _ | awk -F'"' '{print $$2}' | xargs -tI % go install %
@cat tools.go | grep _ | awk -F'"' '{print $$2}' | xargs -tI % go install %

build:
cd gndiff; \
@echo Building
$(GOCLEAN); \
$(FLAGS_SHARED) $(NO_C) $(GOBUILD) -o $(BUILD_DIR)

buildrel:
@echo Building release binary
$(GOCLEAN); \
$(NO_C) $(GORELEASE) -o $(BUILD_DIR);

install:
cd gndiff; \
$(GOCLEAN); \
$(FLAGS_SHARED) $(NO_C) $(GOINSTALL)

release: dockerhub
cd gndiff; \
@echo Make release
$(GOCLEAN); \
$(FLAGS_LINUX) $(NO_C) $(GOBUILD); \
tar zcf $(RELEASE_DIR)/gndiff-$(VER)-linux.tar.gz gndiff; \
tar zcf $(RELEASE_DIR)/$(PROJ_NAME)-$(VER)-linux.tar.gz $(PROJ_NAME); \
$(GOCLEAN); \
$(FLAGS_MAC) $(NO_C) $(GOBUILD); \
tar zcf $(RELEASE_DIR)/gndiff-$(VER)-mac.tar.gz gndiff; \
tar zcf $(RELEASE_DIR)/$(PROJ_NAME)-$(VER)-mac.tar.gz $(PROJ_NAME); \
$(GOCLEAN); \
$(FLAGS_MAC_ARM) $(NO_C) $(GOBUILD); \
tar zcf $(RELEASE_DIR)/gndiff-$(VER)-mac-arm64.tar.gz gndiff; \
tar zcf $(RELEASE_DIR)/$(PROJ_NAME)-$(VER)-mac-arm64.tar.gz $(PROJ_NAME); \
$(GOCLEAN); \
$(FLAGS_WIN) $(NO_C) $(GOBUILD); \
zip -9 $(RELEASE_DIR)/gndiff-$(VER)-win-64.zip gndiff.exe; \
zip -9 $(RELEASE_DIR)/$(PROJ_NAME)-$(VER)-win-64.zip $(PROJ_NAME).exe; \
$(GOCLEAN);

dc: build
docker-compose build;

docker: build
docker build -t gnames/gndiff:latest -t gnames/gndiff:$(VERSION) .; \
cd gndiff; \
docker: buildrel
@echo Build Docker images
docker buildx build -t gnames/$(PROJ_NAME):latest -t gnames/$(PROJ_NAME):$(VERSION) .; \
$(GOCLEAN);

dockerhub: docker
docker push gnames/gndiff; \
docker push gnames/gndiff:$(VERSION)
@echo Push Docker images to DockerHub
docker push gnames/$(PROJ_NAME); \
docker push gnames/$(PROJ_NAME):$(VERSION)
10 changes: 5 additions & 5 deletions gndiff/cmd/root.go → cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ import (
"os"
"path/filepath"

"github.com/gnames/gndiff"
"github.com/gnames/gndiff/config"
"github.com/gnames/gndiff/ent/output"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/io/ingestio"
"github.com/gnames/gndiff/internal/io/ingestio"
gndiff "github.com/gnames/gndiff/pkg"
"github.com/gnames/gndiff/pkg/config"
"github.com/gnames/gndiff/pkg/ent/output"
"github.com/gnames/gndiff/pkg/ent/record"
"github.com/gnames/gnfmt"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
Expand Down
21 changes: 0 additions & 21 deletions gndiff/LICENSE

This file was deleted.

6 changes: 3 additions & 3 deletions io/ingestio/ingestio.go → internal/io/ingestio/ingestio.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import (
"strconv"
"strings"

"github.com/gnames/gndiff/config"
"github.com/gnames/gndiff/ent/ingester"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/pkg/config"
"github.com/gnames/gndiff/pkg/ent/ingester"
"github.com/gnames/gndiff/pkg/ent/record"
"github.com/gnames/gnparser"
"github.com/gnames/gnsys"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ import (
"path/filepath"
"testing"

"github.com/gnames/gndiff/config"
"github.com/gnames/gndiff/io/ingestio"
"github.com/gnames/gndiff/internal/io/ingestio"
"github.com/gnames/gndiff/pkg/config"
"github.com/stretchr/testify/assert"
)

var path = "../../testdata/"
var path = "../../../pkg/testdata/"

func TestRecordsBad(t *testing.T) {
cfg := config.New()
Expand Down
2 changes: 1 addition & 1 deletion gndiff/main.go → main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ THE SOFTWARE.
*/
package main

import "github.com/gnames/gndiff/gndiff/cmd"
import "github.com/gnames/gndiff/cmd"

func main() {
cmd.Execute()
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion ent/dbase/dbase.go → pkg/ent/dbase/dbase.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"log"

badger "github.com/dgraph-io/badger/v2"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/pkg/ent/record"
"github.com/gnames/gnfmt"
)

Expand Down
8 changes: 4 additions & 4 deletions ent/dbase/dbase_test.go → pkg/ent/dbase/dbase_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ import (
"path/filepath"
"testing"

"github.com/gnames/gndiff/config"
"github.com/gnames/gndiff/ent/dbase"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/io/ingestio"
"github.com/gnames/gndiff/internal/io/ingestio"
"github.com/gnames/gndiff/pkg/config"
"github.com/gnames/gndiff/pkg/ent/dbase"
"github.com/gnames/gndiff/pkg/ent/record"
"github.com/stretchr/testify/assert"
)

Expand Down
2 changes: 1 addition & 1 deletion ent/dbase/init.go → pkg/ent/dbase/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package dbase

import (
badger "github.com/dgraph-io/badger/v2"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/pkg/ent/record"
)

func (db *dbase) Init(recs []record.Record) error {
Expand Down
2 changes: 1 addition & 1 deletion ent/dbase/interface.go → pkg/ent/dbase/interface.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package dbase

import "github.com/gnames/gndiff/ent/record"
import "github.com/gnames/gndiff/pkg/ent/record"

type DBase interface {
Init([]record.Record) error
Expand Down
2 changes: 1 addition & 1 deletion ent/exact/exact.go → pkg/ent/exact/exact.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (

"github.com/devopsfaith/bloomfilter"
baseBloomfilter "github.com/devopsfaith/bloomfilter/bloomfilter"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/pkg/ent/record"
)

type exact struct {
Expand Down
8 changes: 4 additions & 4 deletions ent/exact/exact_test.go → pkg/ent/exact/exact_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ import (
"path/filepath"
"testing"

"github.com/gnames/gndiff/config"
"github.com/gnames/gndiff/ent/exact"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/io/ingestio"
"github.com/gnames/gndiff/internal/io/ingestio"
"github.com/gnames/gndiff/pkg/config"
"github.com/gnames/gndiff/pkg/ent/exact"
"github.com/gnames/gndiff/pkg/ent/record"
"github.com/stretchr/testify/assert"
)

Expand Down
2 changes: 1 addition & 1 deletion ent/exact/interface.go → pkg/ent/exact/interface.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package exact

import "github.com/gnames/gndiff/ent/record"
import "github.com/gnames/gndiff/pkg/ent/record"

type Exact interface {
Init([]record.Record)
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion ent/fuzzy/fuzzy.go → pkg/ent/fuzzy/fuzzy.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"sort"

"github.com/dvirsky/levenshtein"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/pkg/ent/record"
)

type fuzzy struct {
Expand Down
8 changes: 4 additions & 4 deletions ent/fuzzy/fuzzy_test.go → pkg/ent/fuzzy/fuzzy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ import (
"path/filepath"
"testing"

"github.com/gnames/gndiff/config"
"github.com/gnames/gndiff/ent/fuzzy"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/io/ingestio"
"github.com/gnames/gndiff/internal/io/ingestio"
"github.com/gnames/gndiff/pkg/config"
"github.com/gnames/gndiff/pkg/ent/fuzzy"
"github.com/gnames/gndiff/pkg/ent/record"
"github.com/stretchr/testify/assert"
)

Expand Down
2 changes: 1 addition & 1 deletion ent/fuzzy/interface.go → pkg/ent/fuzzy/interface.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package fuzzy

import "github.com/gnames/gndiff/ent/record"
import "github.com/gnames/gndiff/pkg/ent/record"

type Fuzzy interface {
Init([]record.Record) error
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package ingester

import "github.com/gnames/gndiff/ent/record"
import "github.com/gnames/gndiff/pkg/ent/record"

type Ingester interface {
Records(path string) ([]record.Record, error)
Expand Down
2 changes: 1 addition & 1 deletion ent/matcher/interface.go → pkg/ent/matcher/interface.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package matcher

import "github.com/gnames/gndiff/ent/record"
import "github.com/gnames/gndiff/pkg/ent/record"

type Matcher interface {
Init([]record.Record) error
Expand Down
2 changes: 1 addition & 1 deletion ent/matcher/match.go → pkg/ent/matcher/match.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package matcher
import (
"strings"

"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/pkg/ent/record"
"github.com/gnames/gnlib/ent/verifier"
)

Expand Down
8 changes: 4 additions & 4 deletions ent/matcher/matcher.go → pkg/ent/matcher/matcher.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package matcher

import (
"github.com/gnames/gndiff/ent/dbase"
"github.com/gnames/gndiff/ent/exact"
"github.com/gnames/gndiff/ent/fuzzy"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/pkg/ent/dbase"
"github.com/gnames/gndiff/pkg/ent/exact"
"github.com/gnames/gndiff/pkg/ent/fuzzy"
"github.com/gnames/gndiff/pkg/ent/record"
"github.com/gnames/gnlib/ent/verifier"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ import (
"path/filepath"
"testing"

"github.com/gnames/gndiff/config"
"github.com/gnames/gndiff/ent/matcher"
"github.com/gnames/gndiff/ent/record"
"github.com/gnames/gndiff/io/ingestio"
"github.com/gnames/gndiff/internal/io/ingestio"
"github.com/gnames/gndiff/pkg/config"
"github.com/gnames/gndiff/pkg/ent/matcher"
"github.com/gnames/gndiff/pkg/ent/record"
"github.com/gnames/gnlib/ent/verifier"
"github.com/gnames/gnparser"
"github.com/stretchr/testify/assert"
Expand Down
Loading

0 comments on commit 6ff8edc

Please sign in to comment.