Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Seqhash functions #6

Merged
merged 13 commits into from
Jun 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 192 additions & 32 deletions commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package main
import (
"bufio"
"bytes"
"crypto"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"

"github.com/urfave/cli/v2"
Expand Down Expand Up @@ -40,16 +42,7 @@ parse them, and then spit out a similiarly named file with the .json extension.
func convert(c *cli.Context) error {
if isPipe() {

var annotatedSequence AnnotatedSequence

// logic for determining input format, then parses accordingly.
if c.String("i") == "json" {
json.Unmarshal([]byte(stdinToString(os.Stdin)), &annotatedSequence)
} else if c.String("i") == "gbk" || c.String("i") == "gb" {
annotatedSequence = ParseGbk(stdinToString(os.Stdin))
} else if c.String("i") == "gff" {
annotatedSequence = ParseGff(stdinToString(os.Stdin))
}
annotatedSequence := parseStdin(c)

var output []byte

Expand All @@ -66,16 +59,8 @@ func convert(c *cli.Context) error {
//
} else {

var matches []string

//take all args and get their pattern matches.
for argIndex := 0; argIndex < c.Args().Len(); argIndex++ {
match, _ := filepath.Glob(c.Args().Get(argIndex))
matches = append(matches, match...)
}

//filtering pattern matches for duplicates.
matches = uniqueNonEmptyElementsOf(matches)
// gets glob pattern matches to determine which files to use.
matches := getMatches(c)

// TODO write basic check to see if input flag or all paths have accepted file extensions.

Expand All @@ -93,18 +78,7 @@ func convert(c *cli.Context) error {
// executing Go routine.
go func(match string) {
extension := filepath.Ext(match)
var annotatedSequence AnnotatedSequence

// determining which reader to use and parse into AnnotatedSequence struct.
if extension == ".gff" || c.String("i") == "gff" {
annotatedSequence = ReadGff(match)
} else if extension == ".gbk" || extension == ".gb" || c.String("i") == "gbk" || c.String("i") == "gb" {
annotatedSequence = ReadGbk(match)
} else if extension == ".json" || c.String("i") == "json" {
annotatedSequence = ReadJSON(match)
} else {
// TODO put default error handling here.
}
annotatedSequence := fileParser(c, match)

// determining output format and name, then writing out to name.
outputPath := match[0 : len(match)-len(extension)]
Expand All @@ -130,6 +104,95 @@ func convert(c *cli.Context) error {
return nil
}

/******************************************************************************

hash currently has two modes. Pipe and fileio.

The function isPipe() detects if input is coming from a pipe like:

cat data/bsub.gbk | poly hash -i gbk -o json > test.json

In this case the output goes directly to standard out and can be redirected
into a file.

Without the "-o json" json flag only the hash is printed to stdout.

to force all output to go to stdout use --stdout.

If not from a pipe convert checks args for file patterns to find, then iterates
over each matched file pattern to read in a file, then spit out the desired
output.

For example:

poly hash -o json *.gbk *.gff

will read all files in a directory with ".gbk" or ".gff" as their extension
parse them, and then spit out a similiarly named file with the .json extension along with their hashes.

******************************************************************************/
func hash(c *cli.Context) {

if isPipe() {
annotatedSequence := parseStdin(c)
sequenceHash := flagSwitchHash(c, annotatedSequence)
if c.String("o") == "json" {
annotatedSequence.Sequence.Hash = sequenceHash
annotatedSequence.Sequence.HashFunction = strings.ToUpper(c.String("t"))
output, _ := json.MarshalIndent(annotatedSequence, "", " ")
fmt.Print(string(output))
} else {
fmt.Print(sequenceHash)
}
} else {

// gets glob pattern matches to determine which files to use.
matches := getMatches(c)

// declaring wait group outside loop
var wg sync.WaitGroup

// concurrently iterate through each pattern match, read the file, output to new format.
for _, match := range matches {

// incrementing wait group for Go routine
wg.Add(1)

// executing Go routine.
go func(match string) {
extension := filepath.Ext(match)
annotatedSequence := fileParser(c, match)
sequenceHash := flagSwitchHash(c, annotatedSequence)
if c.String("o") == "json" {
annotatedSequence.Sequence.Hash = sequenceHash
annotatedSequence.Sequence.HashFunction = strings.ToUpper(c.String("t"))

if c.Bool("stdout") == true {
output, _ := json.MarshalIndent(annotatedSequence, "", " ")
fmt.Print(string(output))
} else {
outputPath := match[0 : len(match)-len(extension)]
WriteJSON(annotatedSequence, outputPath+".json")
}

} else {
fmt.Println(sequenceHash)
}

// decrementing wait group.
wg.Done()

}(match) // passing match to Go routine anonymous function.

}

// waiting outside for loop for Go routines so they can run concurrently.
wg.Wait()

}

}

// a simple helper function to convert an *os.File type into a string.
func stdinToString(file *os.File) string {
var stringBuffer bytes.Buffer
Expand Down Expand Up @@ -173,3 +236,100 @@ func isPipe() bool {
}
return flag
}

// a simple helper function to take stdin from a pipe and parse it into an annotated sequence
func parseStdin(c *cli.Context) AnnotatedSequence {
var annotatedSequence AnnotatedSequence

// logic for determining input format, then parses accordingly.
if c.String("i") == "json" {
json.Unmarshal([]byte(stdinToString(os.Stdin)), &annotatedSequence)
} else if c.String("i") == "gbk" || c.String("i") == "gb" {
annotatedSequence = ParseGbk(stdinToString(os.Stdin))
} else if c.String("i") == "gff" {
annotatedSequence = ParseGff(stdinToString(os.Stdin))
}
return annotatedSequence
}

// helper function to hash sequence based on flag using generic hash.
func flagSwitchHash(c *cli.Context, annotatedSequence AnnotatedSequence) string {

var hashString string
switch strings.ToUpper(c.String("t")) {
case "MD5":
hashString = annotatedSequence.hash(crypto.MD5)
case "SHA1":
hashString = annotatedSequence.hash(crypto.SHA1)
case "SHA244":
hashString = annotatedSequence.hash(crypto.SHA224)
case "SHA256":
hashString = annotatedSequence.hash(crypto.SHA256)
case "SHA384":
hashString = annotatedSequence.hash(crypto.SHA384)
case "SHA512":
hashString = annotatedSequence.hash(crypto.SHA512)
case "RIPEMD160":
hashString = annotatedSequence.hash(crypto.RIPEMD160)
case "SHA3_224":
hashString = annotatedSequence.hash(crypto.SHA3_224)
case "SHA3_256":
hashString = annotatedSequence.hash(crypto.SHA3_256)
case "SHA3_384":
hashString = annotatedSequence.hash(crypto.SHA3_384)
case "SHA3_512":
hashString = annotatedSequence.hash(crypto.SHA3_512)
case "SHA512_224":
hashString = annotatedSequence.hash(crypto.SHA512_224)
case "SHA512_256":
hashString = annotatedSequence.hash(crypto.SHA512_256)
case "BLAKE2s_256":
hashString = annotatedSequence.hash(crypto.BLAKE2s_256)
case "BLAKE2b_256":
hashString = annotatedSequence.hash(crypto.BLAKE2b_256)
case "BLAKE2b_384":
hashString = annotatedSequence.hash(crypto.BLAKE2b_384)
case "BLAKE2b_512":
hashString = annotatedSequence.hash(crypto.BLAKE2b_512)
case "BLAKE3":
hashString = annotatedSequence.blake3Hash()
default:
break
}
return hashString
}

// helper function to get unique glob patterns from cli.context
func getMatches(c *cli.Context) []string {
var matches []string

//take all args and get their pattern matches.
for argIndex := 0; argIndex < c.Args().Len(); argIndex++ {
match, _ := filepath.Glob(c.Args().Get(argIndex))
matches = append(matches, match...)
}

//filtering pattern matches for duplicates.
matches = uniqueNonEmptyElementsOf(matches)

return matches

}

// function to parse whatever file is at a matched path.
func fileParser(c *cli.Context, match string) AnnotatedSequence {
extension := filepath.Ext(match)
var annotatedSequence AnnotatedSequence

// determining which reader to use and parse into AnnotatedSequence struct.
if extension == ".gff" || c.String("i") == "gff" {
annotatedSequence = ReadGff(match)
} else if extension == ".gbk" || extension == ".gb" || c.String("i") == "gbk" || c.String("i") == "gb" {
annotatedSequence = ReadGbk(match)
} else if extension == ".json" || c.String("i") == "json" {
annotatedSequence = ReadJSON(match)
} else {
// TODO put default error handling here.
}
return annotatedSequence
}
35 changes: 35 additions & 0 deletions commands_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"os"
"os/exec"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -58,3 +59,37 @@ func TestConvert(t *testing.T) {
}

}

func TestHash(t *testing.T) {

puc19GbkBlake3Hash := "4031e1971acc8ff1bf0aa4ed623bc58beefc15e043075866a0854d592d80b28b"

// testing pipe input
command := "cat data/puc19.gbk | poly hash -i gbk"
hashOutput, _ := exec.Command("bash", "-c", command).Output()
hashOutputString := strings.TrimSpace(string(hashOutput))

if hashOutputString != puc19GbkBlake3Hash {
t.Errorf("TestHash for piped input has failed. Returned %q, want %q", hashOutputString, puc19GbkBlake3Hash)
}

// testing regular input
command = "poly hash data/puc19.gbk"
hashOutput, _ = exec.Command("bash", "-c", command).Output()
hashOutputString = strings.TrimSpace(string(hashOutput))

if hashOutputString != puc19GbkBlake3Hash {
t.Errorf("TestHash for regular input has failed. Returned %q, want %q", hashOutputString, puc19GbkBlake3Hash)
}

// testing json write output
command = "poly hash -o json data/puc19.gbk"
exec.Command("bash", "-c", command).Output()
hashOutputString = ReadJSON("data/puc19.json").Sequence.Hash
os.Remove("data/puc19.json")

if hashOutputString != puc19GbkBlake3Hash {
t.Errorf("TestHash for json write output has failed. Returned %q, want %q", hashOutputString, puc19GbkBlake3Hash)
}

}
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@ require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/google/go-cmp v0.4.1
github.com/pmezard/go-difflib v1.0.0
github.com/sergi/go-diff v1.1.0
github.com/urfave/cli/v2 v2.2.0
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9
lukechampine.com/blake3 v1.0.0
)
24 changes: 24 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,47 @@ github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5z
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.4.1 h1:/exdXoGamhu5ONeUJH0deniYLWYvQwW66yvlfiiKTu0=
github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/urfave/cli/v2 v2.2.0 h1:JTTnM6wKzdA0Jqodd966MVj4vWbbquZykeX1sKbe2C4=
github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9 h1:vEg9joUBmeBcK9iSJftGNf3coIG4HqZElCPehJsfAYM=
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
lukechampine.com/blake3 v1.0.0 h1:dNj1NVD7SLgkU7dykKjmmOSOTTx7ZmxnDyUyvxnQP2Q=
lukechampine.com/blake3 v1.0.0/go.mod h1:e0XQzEQp6LtbXBhzYxRoh6s3kcmX+fMMg8sC9VgWloQ=
Loading