Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
aa6445c
commit a5879e3
Showing
9 changed files
with
347 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
package cmd | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
|
||
"github.com/evolbioinfo/goalign/align" | ||
"github.com/evolbioinfo/goalign/io" | ||
"github.com/spf13/cobra" | ||
) | ||
|
||
var compressOutput string | ||
var compressWeightOutput string | ||
|
||
var compressCmd = &cobra.Command{ | ||
Use: "compress", | ||
Short: "Removes identical patterns/sites from an input alignment", | ||
Long: `Removes identical patterns/sites from an input alignment | ||
And prints in the weight file the number of occurence of each pattern | ||
Example: | ||
ali.phy | ||
1 GGGGGGGGGGGGGGGGGGGG | ||
2 TTTTTTTTTTTTTTTTTTTT | ||
3 GGGGGGGGGGCCCCCCCCCC | ||
4 AAAAAAAAAAAAAAAAAAAA | ||
goalign compress -i ali.phy will produce: | ||
1 GG | ||
2 TT | ||
3 GC | ||
4 AA | ||
and weight file: | ||
10 | ||
10 | ||
`, | ||
RunE: func(cmd *cobra.Command, args []string) (err error) { | ||
var aligns *align.AlignChannel | ||
var f, wf *os.File | ||
|
||
if aligns, err = readalign(infile); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
if f, err = openWriteFile(compressOutput); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
defer closeWriteFile(f, compressOutput) | ||
|
||
if wf, err = openWriteFile(compressWeightOutput); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
defer closeWriteFile(f, compressWeightOutput) | ||
|
||
for al := range aligns.Achan { | ||
var w []int | ||
if w = al.Compress(); err != nil { | ||
io.LogError(err) | ||
return | ||
} else { | ||
writeAlign(al, f) | ||
writeWeights(w, wf) | ||
} | ||
} | ||
|
||
if aligns.Err != nil { | ||
err = aligns.Err | ||
io.LogError(err) | ||
} | ||
return | ||
}, | ||
} | ||
|
||
func init() { | ||
compressCmd.PersistentFlags().StringVarP(&compressOutput, "output", "o", "stdout", "Compressed output alignment file") | ||
compressCmd.PersistentFlags().StringVar(&compressWeightOutput, "weight-out", "none", "Pattern weight output file") | ||
RootCmd.AddCommand(compressCmd) | ||
} | ||
|
||
func writeWeights(weights []int, f *os.File) { | ||
for _, w := range weights { | ||
fmt.Fprintf(f, "%d\n", w) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# Goalign: toolkit and api for alignment manipulation | ||
|
||
## API | ||
|
||
### compress | ||
|
||
Remove identical patterns/sites | ||
|
||
```go | ||
package main | ||
|
||
import ( | ||
"bufio" | ||
"fmt" | ||
"io" | ||
|
||
"github.com/evolbioinfo/goalign/align" | ||
"github.com/evolbioinfo/goalign/io/fasta" | ||
"github.com/evolbioinfo/goalign/io/utils" | ||
) | ||
|
||
func main() { | ||
var fi io.Closer | ||
var r *bufio.Reader | ||
var err error | ||
var al align.Alignment | ||
var weights []int | ||
|
||
/* Get reader (plain text or gzip) */ | ||
fi, r, err = utils.GetReader("align.fa") | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
/* Parse Fasta */ | ||
al, err = fasta.NewParser(r).Parse() | ||
if err != nil { | ||
panic(err) | ||
} | ||
fi.Close() | ||
|
||
/* Compress */ | ||
weights = al.Compress() | ||
fmt.Println(fasta.WriteAlignment(al)) | ||
for _, w := range weights { | ||
fmt.Println(w) | ||
} | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# Goalign: toolkit and api for alignment manipulation | ||
|
||
## Commands | ||
|
||
### compress | ||
This command removes identical patterns/sites from an input alignment | ||
|
||
#### Usage | ||
``` | ||
Usage: | ||
goalign compress [flags] | ||
Flags: | ||
-o, --output string Compressed output alignment file (default "stdout") | ||
--weight-out string Pattern weight output file (default "none") | ||
Global Flags: | ||
-i, --align string Alignment input file (default "stdin") | ||
--auto-detect Auto detects input format (overrides -p, -x and -u) | ||
-u, --clustal Alignment is in clustal? default fasta | ||
--input-strict Strict phylip input format (only used with -p) | ||
-x, --nexus Alignment is in nexus? default fasta | ||
--no-block Write Phylip sequences without space separated blocks (only used with -p) | ||
--one-line Write Phylip sequences on 1 line (only used with -p) | ||
--output-strict Strict phylip output format (only used with -p) | ||
-p, --phylip Alignment is in phylip? default fasta | ||
``` | ||
|
||
#### Examples | ||
|
||
``` | ||
cat > input <<EOF | ||
4 20 | ||
1 GGGGGGGGGGGGGGGGGGGG | ||
2 TTTTTTTTTTTTTTTTTTTT | ||
3 GGGGGGGGGGCCCCTTTTTT | ||
4 AAAAAAAAAAAAAAAAAAAA | ||
EOF | ||
``` | ||
|
||
goalign compress -i input -p --weight-out wres | ||
|
||
should output: | ||
``` | ||
4 3 | ||
1 GGG | ||
2 TTT | ||
3 CGT | ||
4 AAA | ||
``` | ||
|
||
and produce a weight file like: | ||
``` | ||
4 | ||
10 | ||
6 | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.