Skip to content

Commit

Permalink
Add support for unsorted CSV files
Browse files Browse the repository at this point in the history
Two new options are provided -csv1.sort and -csv2.sort, which if either
are set, will sort the corresponding data by the -key columns before
diffing with the other source.

This uses SQLite behind the scenes which requires CGO. This release drops
Windows binaries for the time being until an alternate implementation is
considered.

Signed-off-by: Byron Ruth <b@devel.io>
  • Loading branch information
bruth committed Oct 26, 2017
1 parent e8257ba commit 22f3cd2
Show file tree
Hide file tree
Showing 10 changed files with 259 additions and 43 deletions.
9 changes: 5 additions & 4 deletions Dockerfile
@@ -1,7 +1,8 @@
FROM alpine:3.6
FROM golang:1.9 AS build-env
WORKDIR /go/src/github.com/chop-dbhi/diff-table
RUN make dist-build-linux

FROM alpine:3.6
RUN apk add --update ca-certificates

COPY ./dist/linux-amd64/diff-table /

COPY --from=build-env /go/src/github.com/chop-dbhi/diff-table/dist/linux-amd64/diff-table /
ENTRYPOINT ["/diff-table"]
7 changes: 7 additions & 0 deletions Dockerfile.build
@@ -0,0 +1,7 @@
FROM golang:1.9
ADD build.sh /

ENV CGO_ENABLED=1
ENV ROOT=/go/src/github.com/chop-dbhi/diff-table

ENTRYPOINT ["/build.sh"]
27 changes: 27 additions & 0 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions Gopkg.toml
@@ -0,0 +1,30 @@

# Gopkg.toml example
#
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
# for detailed Gopkg.toml documentation.
#
# required = ["github.com/user/thing/cmd/thing"]
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
#
# [[constraint]]
# name = "github.com/user/project"
# version = "1.0.0"
#
# [[constraint]]
# name = "github.com/user/project2"
# branch = "dev"
# source = "github.com/myfork/project2"
#
# [[override]]
# name = "github.com/x/y"
# version = "2.4.0"


[[constraint]]
branch = "master"
name = "github.com/lib/pq"

[[constraint]]
name = "github.com/mattn/go-sqlite3"
version = "1.3.0"
24 changes: 17 additions & 7 deletions Makefile
Expand Up @@ -11,23 +11,33 @@ build:
go build -ldflags "-X \"main.buildVersion=$(GIT_VERSION)\"" \
-o $(GOPATH)/bin/$(PROG_NAME) $(CMD_PATH)

dist-build:
dist-build-linux:
mkdir -p dist

gox -output="./dist/{{.OS}}-{{.Arch}}/$(PROG_NAME)" \
-ldflags "-X \"main.buildVersion=$(GIT_VERSION)\"" \
-os "windows linux darwin" \
-arch "amd64" $(CMD_PATH) > /dev/null
go build -ldflags "-extldflags \"-static\" -X \"main.buildVersion=$(GIT_VERSION)\"" \
-o ./dist/linux-amd64/$(PROG_NAME) $(CMD_PATH)

dist-linux:
docker build -f Dockerfile.build -t dbhi/diff-table-builder .

docker run --rm -it \
-v ${PWD}:/go/src/github.com/chop-dbhi/diff-table \
dbhi/diff-table-builder

dist-build: dist-linux
mkdir -p dist

go build -ldflags "-X \"main.buildVersion=$(GIT_VERSION)\"" \
-o ./dist/darwin-amd64/$(PROG_NAME) $(CMD_PATH)

dist-zip:
cd dist && zip $(PROG_NAME)-darwin-amd64.zip darwin-amd64/*
cd dist && zip $(PROG_NAME)-linux-amd64.zip linux-amd64/*
cd dist && zip $(PROG_NAME)-windows-amd64.zip windows-amd64/*

dist: dist-build dist-zip

docker:
docker build -t ${IMAGE_NAME}:${GIT_SHA} .
docker build -v .:/go/src/gitub.com/chop-dbhi/diff-table -t ${IMAGE_NAME}:${GIT_SHA} .
docker tag ${IMAGE_NAME}:${GIT_SHA} ${IMAGE_NAME}:${GIT_BRANCH}
if [ -n "${GIT_TAG}" ] ; then \
docker tag ${IMAGE_NAME}:${GIT_SHA} ${IMAGE_NAME}:${GIT_TAG} ; \
Expand Down
4 changes: 4 additions & 0 deletions build.sh
@@ -0,0 +1,4 @@
#!/bin/bash

cd $ROOT
make dist-build-linux
54 changes: 42 additions & 12 deletions cmd/diff-table/main.go
Expand Up @@ -23,9 +23,11 @@ func main() {

csv1 string
csv1delim string
csv1sort bool

csv2 string
csv2delim string
csv2sort bool

url1 string
schema1 string
Expand All @@ -41,9 +43,11 @@ func main() {

flag.StringVar(&csv1, "csv1", "", "Path to CSV file.")
flag.StringVar(&csv1delim, "csv1.delim", ",", "CSV delimiter.")
flag.BoolVar(&csv1sort, "csv1.sort", false, "CSV requires sorting.")

flag.StringVar(&csv2, "csv2", "", "Path to CSV file.")
flag.StringVar(&csv2delim, "csv2.delim", ",", "CSV delimiter.")
flag.BoolVar(&csv2sort, "csv2.sort", false, "CSV requires sorting.")

flag.StringVar(&url1, "db", "", "Database 1 connection URL.")
flag.StringVar(&schema1, "schema", "", "Name of the first schema.")
Expand Down Expand Up @@ -71,8 +75,9 @@ func main() {
}

var (
t1, t2 difftable.Table
err error
t1, t2 difftable.Table
db1, db2 *sql.DB
err error
)

if csv1 != "" && url1 != "" {
Expand Down Expand Up @@ -100,10 +105,21 @@ func main() {
cr1.TrimLeadingSpace = true
cr1.ReuseRecord = true

t1, err = difftable.CSVTable(cr1, key)
if err != nil {
log.Printf("csv1 table: %s", err)
return
if csv1sort {
table1 = "results"
db1, err = difftable.CsvDB(cr1, table1, key)
if err != nil {
log.Printf("csv1 table: %s", err)
return
}
defer db1.Close()
table1 = "results"
} else {
t1, err = difftable.CSVTable(cr1, key)
if err != nil {
log.Printf("csv1 table: %s", err)
return
}
}
}

Expand All @@ -122,22 +138,34 @@ func main() {
cr2.TrimLeadingSpace = true
cr2.ReuseRecord = true

t2, err = difftable.CSVTable(cr2, key)
if err != nil {
log.Printf("csv2 table: %s", err)
return
if csv2sort {
table2 = "results"
db2, err = difftable.CsvDB(cr2, table2, key)
if err != nil {
log.Printf("csv2 table: %s", err)
return
}
defer db2.Close()
} else {
t2, err = difftable.CSVTable(cr2, key)
if err != nil {
log.Printf("csv2 table: %s", err)
return
}
}
}

if url1 != "" {
// TODO: remove hard-coded postgres dependency
db1, err := sql.Open("postgres", url1)
db1, err = sql.Open("postgres", url1)
if err != nil {
log.Printf("db1 open: %s", err)
return
}
defer db1.Close()
}

if db1 != nil {
rows1, err := runQuery(db1, schema1, table1, key)
if err != nil {
log.Printf("db1 query: %s", err)
Expand All @@ -153,13 +181,15 @@ func main() {
}

if url2 != "" {
db2, err := sql.Open("postgres", url2)
db2, err = sql.Open("postgres", url2)
if err != nil {
log.Printf("db2 open: %s", err)
return
}
defer db2.Close()
}

if db2 != nil {
rows2, err := runQuery(db2, schema2, table2, key)
if err != nil {
log.Printf("db2 query: %s", err)
Expand Down
13 changes: 0 additions & 13 deletions lock.json

This file was deleted.

7 changes: 0 additions & 7 deletions manifest.json

This file was deleted.

127 changes: 127 additions & 0 deletions unsorted_csv.go
@@ -0,0 +1,127 @@
package difftable

import (
"database/sql"
"encoding/csv"
"fmt"
"io"
"strings"

_ "github.com/mattn/go-sqlite3"
)

const (
indexName = "results_key_index"
)

func newDb(cr *csv.Reader, table string, head, key []string) (*sql.DB, error) {
db, err := sql.Open("sqlite3", ":memory:")
if err != nil {
return nil, err
}

cols := make([]string, len(head))

for i, col := range head {
cols[i] = fmt.Sprintf("`%s` TEXT", col)
}

keyCols := make([]string, len(key))
for i, col := range key {
keyCols[i] = fmt.Sprintf("`%s`", col)
}

stmts := []string{
fmt.Sprintf(`CREATE TABLE %s (%s)`, table, strings.Join(cols, ",\n")),
fmt.Sprintf(`CREATE INDEX %s ON %s (%s)`, indexName, table, strings.Join(keyCols, ",")),
}

for _, stmt := range stmts {
if _, err = db.Exec(stmt); err != nil {
db.Close()
return nil, err
}
}

return db, nil
}

func insertStmt(table string, head []string) string {
header := make([]string, len(head))
for i, c := range head {
header[i] = fmt.Sprintf("`%s`", c)
}

params := make([]string, len(head))

for i, _ := range params {
params[i] = "?"
}

return fmt.Sprintf(`
INSERT INTO %s (%s)
VALUES (%s)
`, table, strings.Join(header, ","), strings.Join(params, ","))
}

func CsvDB(cr *csv.Reader, table string, key []string) (*sql.DB, error) {
head, err := cr.Read()
if err != nil {
return nil, err
}

db, err := newDb(cr, table, head, key)
if err != nil {
return nil, err
}

tx, err := db.Begin()
if err != nil {
db.Close()
return nil, err
}
defer tx.Rollback()

sql := insertStmt(table, head)
stmt, err := tx.Prepare(sql)
if err != nil {
db.Close()
return nil, err
}

vals := make([]interface{}, len(head))

for {
row, err := cr.Read()
if err != nil {
if err == io.EOF {
break
}

stmt.Close()
tx.Rollback()
db.Close()
return nil, err
}

for i, s := range row {
vals[i] = s
}

if _, err := stmt.Exec(vals...); err != nil {
stmt.Close()
tx.Rollback()
db.Close()
return nil, err
}
}

stmt.Close()

if err := tx.Commit(); err != nil {
db.Close()
return nil, err
}

return db, nil
}

0 comments on commit 22f3cd2

Please sign in to comment.