Skip to content

Commit

Permalink
Merge pull request #6 from Koeng101/prime
Browse files Browse the repository at this point in the history
Added Seqhash functions
  • Loading branch information
Koeng101 authored Jun 18, 2020
2 parents c782475 + 0c75e93 commit b134e47
Show file tree
Hide file tree
Showing 8 changed files with 465 additions and 34 deletions.
224 changes: 192 additions & 32 deletions commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package main
import (
"bufio"
"bytes"
"crypto"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"

"github.com/urfave/cli/v2"
Expand Down Expand Up @@ -40,16 +42,7 @@ parse them, and then spit out a similiarly named file with the .json extension.
func convert(c *cli.Context) error {
if isPipe() {

var annotatedSequence AnnotatedSequence

// logic for determining input format, then parses accordingly.
if c.String("i") == "json" {
json.Unmarshal([]byte(stdinToString(os.Stdin)), &annotatedSequence)
} else if c.String("i") == "gbk" || c.String("i") == "gb" {
annotatedSequence = ParseGbk(stdinToString(os.Stdin))
} else if c.String("i") == "gff" {
annotatedSequence = ParseGff(stdinToString(os.Stdin))
}
annotatedSequence := parseStdin(c)

var output []byte

Expand All @@ -66,16 +59,8 @@ func convert(c *cli.Context) error {
//
} else {

var matches []string

//take all args and get their pattern matches.
for argIndex := 0; argIndex < c.Args().Len(); argIndex++ {
match, _ := filepath.Glob(c.Args().Get(argIndex))
matches = append(matches, match...)
}

//filtering pattern matches for duplicates.
matches = uniqueNonEmptyElementsOf(matches)
// gets glob pattern matches to determine which files to use.
matches := getMatches(c)

// TODO write basic check to see if input flag or all paths have accepted file extensions.

Expand All @@ -93,18 +78,7 @@ func convert(c *cli.Context) error {
// executing Go routine.
go func(match string) {
extension := filepath.Ext(match)
var annotatedSequence AnnotatedSequence

// determining which reader to use and parse into AnnotatedSequence struct.
if extension == ".gff" || c.String("i") == "gff" {
annotatedSequence = ReadGff(match)
} else if extension == ".gbk" || extension == ".gb" || c.String("i") == "gbk" || c.String("i") == "gb" {
annotatedSequence = ReadGbk(match)
} else if extension == ".json" || c.String("i") == "json" {
annotatedSequence = ReadJSON(match)
} else {
// TODO put default error handling here.
}
annotatedSequence := fileParser(c, match)

// determining output format and name, then writing out to name.
outputPath := match[0 : len(match)-len(extension)]
Expand All @@ -130,6 +104,95 @@ func convert(c *cli.Context) error {
return nil
}

/******************************************************************************
hash currently has two modes. Pipe and fileio.
The function isPipe() detects if input is coming from a pipe like:
cat data/bsub.gbk | poly hash -i gbk -o json > test.json
In this case the output goes directly to standard out and can be redirected
into a file.
Without the "-o json" json flag only the hash is printed to stdout.
to force all output to go to stdout use --stdout.
If not from a pipe convert checks args for file patterns to find, then iterates
over each matched file pattern to read in a file, then spit out the desired
output.
For example:
poly hash -o json *.gbk *.gff
will read all files in a directory with ".gbk" or ".gff" as their extension
parse them, and then spit out a similiarly named file with the .json extension along with their hashes.
******************************************************************************/
func hash(c *cli.Context) {

if isPipe() {
annotatedSequence := parseStdin(c)
sequenceHash := flagSwitchHash(c, annotatedSequence)
if c.String("o") == "json" {
annotatedSequence.Sequence.Hash = sequenceHash
annotatedSequence.Sequence.HashFunction = strings.ToUpper(c.String("t"))
output, _ := json.MarshalIndent(annotatedSequence, "", " ")
fmt.Print(string(output))
} else {
fmt.Print(sequenceHash)
}
} else {

// gets glob pattern matches to determine which files to use.
matches := getMatches(c)

// declaring wait group outside loop
var wg sync.WaitGroup

// concurrently iterate through each pattern match, read the file, output to new format.
for _, match := range matches {

// incrementing wait group for Go routine
wg.Add(1)

// executing Go routine.
go func(match string) {
extension := filepath.Ext(match)
annotatedSequence := fileParser(c, match)
sequenceHash := flagSwitchHash(c, annotatedSequence)
if c.String("o") == "json" {
annotatedSequence.Sequence.Hash = sequenceHash
annotatedSequence.Sequence.HashFunction = strings.ToUpper(c.String("t"))

if c.Bool("stdout") == true {
output, _ := json.MarshalIndent(annotatedSequence, "", " ")
fmt.Print(string(output))
} else {
outputPath := match[0 : len(match)-len(extension)]
WriteJSON(annotatedSequence, outputPath+".json")
}

} else {
fmt.Println(sequenceHash)
}

// decrementing wait group.
wg.Done()

}(match) // passing match to Go routine anonymous function.

}

// waiting outside for loop for Go routines so they can run concurrently.
wg.Wait()

}

}

// a simple helper function to convert an *os.File type into a string.
func stdinToString(file *os.File) string {
var stringBuffer bytes.Buffer
Expand Down Expand Up @@ -173,3 +236,100 @@ func isPipe() bool {
}
return flag
}

// a simple helper function to take stdin from a pipe and parse it into an annotated sequence
func parseStdin(c *cli.Context) AnnotatedSequence {
var annotatedSequence AnnotatedSequence

// logic for determining input format, then parses accordingly.
if c.String("i") == "json" {
json.Unmarshal([]byte(stdinToString(os.Stdin)), &annotatedSequence)
} else if c.String("i") == "gbk" || c.String("i") == "gb" {
annotatedSequence = ParseGbk(stdinToString(os.Stdin))
} else if c.String("i") == "gff" {
annotatedSequence = ParseGff(stdinToString(os.Stdin))
}
return annotatedSequence
}

// helper function to hash sequence based on flag using generic hash.
func flagSwitchHash(c *cli.Context, annotatedSequence AnnotatedSequence) string {

var hashString string
switch strings.ToUpper(c.String("t")) {
case "MD5":
hashString = annotatedSequence.hash(crypto.MD5)
case "SHA1":
hashString = annotatedSequence.hash(crypto.SHA1)
case "SHA244":
hashString = annotatedSequence.hash(crypto.SHA224)
case "SHA256":
hashString = annotatedSequence.hash(crypto.SHA256)
case "SHA384":
hashString = annotatedSequence.hash(crypto.SHA384)
case "SHA512":
hashString = annotatedSequence.hash(crypto.SHA512)
case "RIPEMD160":
hashString = annotatedSequence.hash(crypto.RIPEMD160)
case "SHA3_224":
hashString = annotatedSequence.hash(crypto.SHA3_224)
case "SHA3_256":
hashString = annotatedSequence.hash(crypto.SHA3_256)
case "SHA3_384":
hashString = annotatedSequence.hash(crypto.SHA3_384)
case "SHA3_512":
hashString = annotatedSequence.hash(crypto.SHA3_512)
case "SHA512_224":
hashString = annotatedSequence.hash(crypto.SHA512_224)
case "SHA512_256":
hashString = annotatedSequence.hash(crypto.SHA512_256)
case "BLAKE2s_256":
hashString = annotatedSequence.hash(crypto.BLAKE2s_256)
case "BLAKE2b_256":
hashString = annotatedSequence.hash(crypto.BLAKE2b_256)
case "BLAKE2b_384":
hashString = annotatedSequence.hash(crypto.BLAKE2b_384)
case "BLAKE2b_512":
hashString = annotatedSequence.hash(crypto.BLAKE2b_512)
case "BLAKE3":
hashString = annotatedSequence.blake3Hash()
default:
break
}
return hashString
}

// helper function to get unique glob patterns from cli.context
func getMatches(c *cli.Context) []string {
var matches []string

//take all args and get their pattern matches.
for argIndex := 0; argIndex < c.Args().Len(); argIndex++ {
match, _ := filepath.Glob(c.Args().Get(argIndex))
matches = append(matches, match...)
}

//filtering pattern matches for duplicates.
matches = uniqueNonEmptyElementsOf(matches)

return matches

}

// function to parse whatever file is at a matched path.
func fileParser(c *cli.Context, match string) AnnotatedSequence {
extension := filepath.Ext(match)
var annotatedSequence AnnotatedSequence

// determining which reader to use and parse into AnnotatedSequence struct.
if extension == ".gff" || c.String("i") == "gff" {
annotatedSequence = ReadGff(match)
} else if extension == ".gbk" || extension == ".gb" || c.String("i") == "gbk" || c.String("i") == "gb" {
annotatedSequence = ReadGbk(match)
} else if extension == ".json" || c.String("i") == "json" {
annotatedSequence = ReadJSON(match)
} else {
// TODO put default error handling here.
}
return annotatedSequence
}
35 changes: 35 additions & 0 deletions commands_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"os"
"os/exec"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -58,3 +59,37 @@ func TestConvert(t *testing.T) {
}

}

func TestHash(t *testing.T) {

puc19GbkBlake3Hash := "4031e1971acc8ff1bf0aa4ed623bc58beefc15e043075866a0854d592d80b28b"

// testing pipe input
command := "cat data/puc19.gbk | poly hash -i gbk"
hashOutput, _ := exec.Command("bash", "-c", command).Output()
hashOutputString := strings.TrimSpace(string(hashOutput))

if hashOutputString != puc19GbkBlake3Hash {
t.Errorf("TestHash for piped input has failed. Returned %q, want %q", hashOutputString, puc19GbkBlake3Hash)
}

// testing regular input
command = "poly hash data/puc19.gbk"
hashOutput, _ = exec.Command("bash", "-c", command).Output()
hashOutputString = strings.TrimSpace(string(hashOutput))

if hashOutputString != puc19GbkBlake3Hash {
t.Errorf("TestHash for regular input has failed. Returned %q, want %q", hashOutputString, puc19GbkBlake3Hash)
}

// testing json write output
command = "poly hash -o json data/puc19.gbk"
exec.Command("bash", "-c", command).Output()
hashOutputString = ReadJSON("data/puc19.json").Sequence.Hash
os.Remove("data/puc19.json")

if hashOutputString != puc19GbkBlake3Hash {
t.Errorf("TestHash for json write output has failed. Returned %q, want %q", hashOutputString, puc19GbkBlake3Hash)
}

}
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@ require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/google/go-cmp v0.4.1
github.com/pmezard/go-difflib v1.0.0
github.com/sergi/go-diff v1.1.0
github.com/urfave/cli/v2 v2.2.0
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9
lukechampine.com/blake3 v1.0.0
)
24 changes: 24 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,47 @@ github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5z
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.4.1 h1:/exdXoGamhu5ONeUJH0deniYLWYvQwW66yvlfiiKTu0=
github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/urfave/cli/v2 v2.2.0 h1:JTTnM6wKzdA0Jqodd966MVj4vWbbquZykeX1sKbe2C4=
github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9 h1:vEg9joUBmeBcK9iSJftGNf3coIG4HqZElCPehJsfAYM=
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
lukechampine.com/blake3 v1.0.0 h1:dNj1NVD7SLgkU7dykKjmmOSOTTx7ZmxnDyUyvxnQP2Q=
lukechampine.com/blake3 v1.0.0/go.mod h1:e0XQzEQp6LtbXBhzYxRoh6s3kcmX+fMMg8sC9VgWloQ=
Loading

0 comments on commit b134e47

Please sign in to comment.