Skip to content

Commit

Permalink
Merge branch 'master' of github.com:hng/BiomolecularStructures
Browse files Browse the repository at this point in the history
  • Loading branch information
gp0 committed Mar 29, 2015
2 parents 18f3cb3 + d3436c1 commit 35dccfa
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 8 deletions.
25 changes: 25 additions & 0 deletions docs/mafft.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,31 @@ Group-to-group alignments with input in FastaIO format

*group1* and *group2* have to be in FastaIO format and have to be alignments

### Helper functions for aligned FASTA
This module also includes a few helper functions for the FastaIO dataformat (which is returned by the mafft functions of this module).

```julia
alignment_length(fasta)
```
Returns the length of the alignment.

*fasta*: A FastaIO dataformat object

```julia
to_aminoacids(fasta)
```
Converts a FastaIO-formatted array into an array of BioSeq AminoAcid.

*fasta*: A FastaIO dataformat object

```julia
print_fasta(fasta)
```
Prints a FastaIO object in a nicely formatted way to the screen.

*fasta*: A FastaIO dataformat object


## Supported pre-configurations (strategies)

The following mafft strategies are supported by built-in preconfigurations which can be used by supplying the function calls with the corresponding symbol (in the parentheses).
Expand Down
8 changes: 3 additions & 5 deletions src/MAFFT/mafft.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#= Julia Wrapper for MAFFT (http://mafft.cbrc.jp/alignment/software/)
=#
module Mafft
export mafft, mafft_from_string, mafft_from_fasta, mafft_profile, mafft_profile_from_string, mafft_profile_from_fasta, print_aligned_fasta, alignment_length, to_aminoacids
export mafft, mafft_from_string, mafft_from_fasta, mafft_profile, mafft_profile_from_string, mafft_profile_from_fasta, print_fasta, alignment_length, to_aminoacids

using FastaIO
using BioSeq
Expand Down Expand Up @@ -125,13 +125,11 @@ export mafft, mafft_from_string, mafft_from_fasta, mafft_profile, mafft_profile_

# helper methods for aligned FASTA

function print_aligned_fasta(fasta)
aligned = String[]
function print_fasta(fasta)
for f in fasta
push!(aligned, f[2])
println(">", f[1])
println(f[2])
end
return aligned
end

# returns the length of the alignment (FastaIO-format as input)
Expand Down
6 changes: 3 additions & 3 deletions test/mafft.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ push!(expected_amino_acids, aminoacid("MSNLGRILCAVLGLFHLLSANPVPSSKLQIAIEEIISELVN

@test mafft_from_fasta(readfasta(Pkg.dir("BiomolecularStructures","examples/fasta","il4.fasta"))) == mafft_expected

expected_aligned_output = String["MSNLGRILCAVLGLFHLLSANPVPSSKLQIAIEEIISELVNNKITHKKCFVPTPYDDEEEASVEEISCRAFKSLKHVCASERKNLRLLNASLITMFSENVECSINNDEQKDLISVIEDLLTFFRAQMRKLVMNPKH","MGLTSQLIPTLVCLLACTSNFVHGHKCDITLQEIIKTLNILTARKNSCMELPVTDVFAAPENTTEKETFCRASTVLRHIYRHHTCMKSLLSGLDRNLSSMANMTCSVHEAKKSTLKDFLERLKTIMKEKYSKC","MSSSLPTLLALLVLLAGPGAVPTLCLQLSVPLMESIRIVNDIQGEVSCVKMNVTDIFADNKTNNKTELLCKASTIVWESQHCHKNLQGLFLNMRQLLNASSTSLKAPCPTAAGNTTSMEKFLADLRTFFHQLAKNK","MGLTSQLIPTLVCLLALTSTFVHGHNFNITIKEIIKMLNILTARNDSCMELTVKDVFTAPKNTSDKEIFCRAATVLRQIYTHNCSNRYLRGLYRNLSSMANKTCSMNEIKKSTLKDFLERLKVIMQKKYYRH","MGLSPHLAVTLFCFLICTGNGIHGCNDSPLREIINTLNQVTEKGTPCTEMFVPDVLTATRNTTENELICRASRVLRKFYFPRDVPPCLKNKSGVLGELRKLCRGVSGLNSLRSCTVNESTLTTLKDFLESLKSILRGKYLQSCTSMS","MGLTSQLLPPLFFLLACAGNFVHGHKCDITLQEIIKTLNSLTEQKNTTEKETFCRAATVLRQFYSHHEKDTRCLGATAQQFHRHKQLIRFLKRLDRNLWGLAGLNSCPVKEANQSTLENFLERLKTIMREKYSKCSS","MDLTSQLIPALVCLLAFTSTFVHGQNFNNTLKEIIKTLNILTARNDSCMELTVMDVLAAPKNTSDKEIFCRATTVLRQIYTHHNCSTKFLKGLDRNLSSMANRTCSVNEVKKCTLKDFLERLKAIMQKKYSKH","HKCDITLQEIIKTLNSLTEQKTLCTELTVTDIFAASKNTTEKETFCRAATVLRQFYSHHEKDTRCLGATAQQFHRHKQLIRFLKRLDRNLWGLAGLNSCPVKEANQSTLENFLERLKTIMREKYSKCSS"]
#expected_aligned_output = String["MSNLGRILCAVLGLFHLLSANPVPSSKLQIAIEEIISELVNNKITHKKCFVPTPYDDEEEASVEEISCRAFKSLKHVCASERKNLRLLNASLITMFSENVECSINNDEQKDLISVIEDLLTFFRAQMRKLVMNPKH","MGLTSQLIPTLVCLLACTSNFVHGHKCDITLQEIIKTLNILTARKNSCMELPVTDVFAAPENTTEKETFCRASTVLRHIYRHHTCMKSLLSGLDRNLSSMANMTCSVHEAKKSTLKDFLERLKTIMKEKYSKC","MSSSLPTLLALLVLLAGPGAVPTLCLQLSVPLMESIRIVNDIQGEVSCVKMNVTDIFADNKTNNKTELLCKASTIVWESQHCHKNLQGLFLNMRQLLNASSTSLKAPCPTAAGNTTSMEKFLADLRTFFHQLAKNK","MGLTSQLIPTLVCLLALTSTFVHGHNFNITIKEIIKMLNILTARNDSCMELTVKDVFTAPKNTSDKEIFCRAATVLRQIYTHNCSNRYLRGLYRNLSSMANKTCSMNEIKKSTLKDFLERLKVIMQKKYYRH","MGLSPHLAVTLFCFLICTGNGIHGCNDSPLREIINTLNQVTEKGTPCTEMFVPDVLTATRNTTENELICRASRVLRKFYFPRDVPPCLKNKSGVLGELRKLCRGVSGLNSLRSCTVNESTLTTLKDFLESLKSILRGKYLQSCTSMS","MGLTSQLLPPLFFLLACAGNFVHGHKCDITLQEIIKTLNSLTEQKNTTEKETFCRAATVLRQFYSHHEKDTRCLGATAQQFHRHKQLIRFLKRLDRNLWGLAGLNSCPVKEANQSTLENFLERLKTIMREKYSKCSS","MDLTSQLIPALVCLLAFTSTFVHGQNFNNTLKEIIKTLNILTARNDSCMELTVMDVLAAPKNTSDKEIFCRATTVLRQIYTHHNCSTKFLKGLDRNLSSMANRTCSVNEVKKCTLKDFLERLKAIMQKKYSKH","HKCDITLQEIIKTLNSLTEQKTLCTELTVTDIFAASKNTTEKETFCRAATVLRQFYSHHEKDTRCLGATAQQFHRHKQLIRFLKRLDRNLWGLAGLNSCPVKEANQSTLENFLERLKTIMREKYSKCSS"]

aligned = readfasta(Pkg.dir("BiomolecularStructures","examples/fasta","il4.fasta"))
#aligned = readfasta(Pkg.dir("BiomolecularStructures","examples/fasta","il4.fasta"))

@test print_aligned_fasta(aligned) == expected_aligned_output
#@test print_fasta(aligned) == expected_aligned_output

mafft_profile_expected = {("_seed_3UGM:A|PDBID|CHAIN|SEQUENCE","MASSHHHHHHSSGLVPRGSSGSSMAARPPRAKPAPRRRAAQPSDASPAAQVDLRTLGYSQQQQEKIKPKVRSTVAQHHEALVGHGFTHAHIVALSQHPAALGTVAVTYQHIITALPEATHEDIVGVGKQWSGARALEALLTDAGELRGPPLQLDTGQLVKIAKRGGVTAMEAVHASRNALTGA---PLNLTPAQVVAIASNNGGKQALETVQRLLPVLCQAHGLTPAQVVAIASHDGGKQALETMQRLLPVLCQAHGLPPDQVVAIASNIGGKQALETVQRLLPVLCQAHGLTPDQVVAIASHGGGKQALETVQRLLPVLCQAHGLTPDQVVAIASHDGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGKQALETVQRLLPVLCQAHGLTPDQVVAIASHDGGKQALETVQRLLPVLCQTHGLTPAQVVAIASHDGGKQALETVQQLLPVLCQAHGLTPDQVVAIASNIGGKQALATVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTQVQVVAIASNIGGKQALETVQRLLPVLCQAHGLTPAQVVAIASHDGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTQEQVVAIASNNGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTPAQVVAIASNIGGKQALETVQRLLPVLCQDHGLTLAQVVAIASNIGGKQALETVQRLLPVLCQAHGLTQDQVVAIASNIGGKQALETVQRLLPVLCQDHGLTPDQVVAIASNIGGKQALETVQRLLPVLCQDHGLTLDQVVAIASNGGKQALETVQRLLPVLCQDHGLTPDQVVAIASNSGGKQALETVQRLLPVLCQDHGLTPNQVVAIASNGGKQALESIVAQLSRPDPALAALTNDHLVALACLGGRPAMDAVKKGLPHAPELIRRVN-RRIGERTSHRV"),("_seed_3UGM:B|PDBID|CHAIN|SEQUENCE","T-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGATATGCATCTCCCCCTA--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CTG---TACACCACCAAAAG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T"),("_seed_3UGM:C|PDBID|CHAIN|SEQUENCE","TTTT----------------------------------------------------------------------------------------------------------------------------------------------------------------GGT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GGTGTACAGTAGGGGGAGATGCATATCTAAC--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"),("_seed_gi|166158130|ref|NP_001107279.1| interleukin 4 [Xenopus (Silurana) tropicalis]","MSNL------GRILCA----------------------------------------------------------------------VLGLFHLLSANPVPSSKLQIAIEEIISELV------------------------------------------------------------NNKITHK-----KCFVPTPYDDEEEASVEE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ISCRA-----------------------FKSLKHVCASER----------------------------------------------------------------------------------------------------------------------KNLRLLNASLITMFSE----------------------------NVECSI-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NNDEQKDLISVIEDLL-------------------------------------TFFRAQM-RKLVMNP--KH"),("_seed_gi|55742622|ref|NP_999288.1| interleukin-4 precursor [Sus scrofa]","M-GL------TSQLIP----------------------------------------------------------------------TLVCLLACTSNFVHGHKCDITLQEIIKTL--------------------------------------------------------------NILTARKNSCMELPVTDVFAAPENTTEKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TFCRA-----------------------STVLRHIYRHHT-----------------------------CMK--------------------------------------------------------------------------------------SLLSGLDRNLSSMAN------------------------------MTCSV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HEAKKSTLKDFLERLK-------------------------------------TIMKEKY-SKC--------"),("_seed_gi|55741696|ref|NP_001007080.1| interleukin 4 [Gallus gallus]","MSSS------LPTLLA----------------------------------------------------------------------LLVLLAGPGAVPTLCLQLSVPLMESIRIV--------------------------------------------------------------NDIQGE-VSCVKMNVTDIFADNKTNNKTE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LLCKA-----------------------STI---VWESQH-----------------------------CH---------------------------------------------------------------------------------------KNLQGLFLNMRQLLNASSTS-----------------------LKAPCPT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AAGNTTSMEKFLADLR-------------------------------------TFFHQLAKNK---------"),("_seed_gi|50978886|ref|NP_001003159.1| interleukin-4 precursor [Canis lupus familiaris]","M-GL------TSQLIP----------------------------------------------------------------------TLVCLLALTSTFVHGHNFNITIKEIIKML--------------------------------------------------------------NILTARNDSCMELTVKDVFTAPKNTSDKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------IFCRA-----------------------ATVLRQIYTHN------------------------------CSN--------------------------------------------------------------------------------------RYLRGLYRNLSSMAN------------------------------KTCSM-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NEIKKSTLKDFLERLK-------------------------------------VIMQKKY-YRH--------"),("_seed_gi|42627877|ref|NP_958427.1| interleukin-4 precursor [Rattus norvegicus]","M-GL------SPHLAV----------------------------------------------------------------------TLFCFLICTGNGIHGCN-DSPLREIINTL--------------------------------------------------------------NQVTEKGTPCTEMFVPDVLTATRNTTENE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LICRA-----------------------SRVLRKFYFPRDVPP--------------------------CLK--------------------------------------------------------------------------------NKSGVLGELRKLCRGVSGLNS-----------------------------LRSCTV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NESTLTTLKDFLESLK-------------------------------------SILRGKYLQSCTSM----S"),("_seed_gi|58743333|ref|NP_001008993.1| interleukin-4 isoform delta2 [Pan troglodytes]","M-GL------TSQLLP----------------------------------------------------------------------PLFFLLACAGNFVHGHKCDITLQEIIKTL--------------------------------------------------------------NSLTEQ----------------KNTTEKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TFCRA-----------------------ATVLRQFYSHHEKDTR-------------------------CLG-------------------------------------------------------ATAQ-------QFH-----------RHKQLIRFLKRLDRNLWGLAG-----------------------------LNSCPV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------KEANQSTLENFLERLK-------------------------------------TIMREKY-SKCSS------"),("_seed_gi|112807223|ref|NP_001036804.1| interleukin-4 precursor [Felis catus]","M-DL------TSQLIP----------------------------------------------------------------------ALVCLLAFTSTFVHGQNFNNTLKEIIKTL--------------------------------------------------------------NILTARNDSCMELTVMDVLAAPKNTSDKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------IFCRA-----------------------TTVLRQIYTHHN-----------------------------CST--------------------------------------------------------------------------------------KFLKGLDRNLSSMAN------------------------------RTCSV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NEVKKCTLKDFLERLK-------------------------------------AIMQKKY-SKH--------"),("_seed_2B8U:A|PDBID|CHAIN|SEQUENCE|HOMO SAPIENS","-----------------------------------------------------------------------------------------------------HKCDITLQEIIKTL--------------------------------------------------------------NSLTEQKTLCTELTVTDIFAASKNTTEKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TFCRA-----------------------ATVLRQFYSHHEKDTR-------------------------CLG-------------------------------------------------------ATAQ-------QFH-----------RHKQLIRFLKRLDRNLWGLAG-----------------------------LNSCPV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------KEANQSTLENFLERLK-------------------------------------TIMREKY-SKCSS------")}
@test mafft_profile(Pkg.dir("BiomolecularStructures", "examples", "fasta", "3UGM.ali"),Pkg.dir("BiomolecularStructures", "examples", "fasta", "il4.ali")) == mafft_profile_expected
Expand Down

0 comments on commit 35dccfa

Please sign in to comment.