Skip to content

Commit

Permalink
Merge branch 'mafft'
Browse files Browse the repository at this point in the history
  • Loading branch information
hng committed Mar 28, 2015
2 parents 7c9d80c + 19c05a2 commit 41b8fae
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 4 deletions.
19 changes: 17 additions & 2 deletions docs/mafft.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Runs mafft with the provided fasta file and returns the alignment in FastaIO dat
*preconfiguration*: optional commandline arguments for MAFFT (array of strings)

```julia
mafft_from_string(fasta_in::String, preconfiguration=:default)
mafft_from_string(fasta_in::String, preconfiguration=:default)
```

Calls MAFFT with the given FASTA string as input and returns aligned FASTA in the FastaIO dataformat.
Expand All @@ -53,7 +53,7 @@ Calls MAFFT with the given FASTA string as input and returns aligned FASTA in th
*preconfiguration*: optional commandline arguments for MAFFT (array of strings)

```julia
mafft_from_fasta(fasta_in, preconfiguration=:default)
mafft_from_fasta(fasta_in, preconfiguration=:default)
```

Calls MAFFT with the given FASTA in FastaIO format
Expand All @@ -69,6 +69,21 @@ Group-to-group alignments

*group1* and *group2* have to be files with alignments. Returns aligned FASTA in the FastaIO dataformat.

```julia
mafft_profile_from_string(group1::String, group2::String)
```
Group-to-group alignments with input strings in FASTA format.

*group1* and *group2* have to be strings with alignments in FASTA format.

```julia
mafft_profile_from_fasta(group1, group2)
```

Group-to-group alignments with input in FastaIO format

*group1* and *group2* have to be in FastaIO format and have to be alignments

## Supported pre-configurations (strategies)

The following mafft strategies are supported by built-in preconfigurations which can be used by supplying the function calls with the corresponding symbol (in the parentheses).
Expand Down
33 changes: 31 additions & 2 deletions src/MAFFT/mafft.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#= Julia Wrapper for MAFFT (http://mafft.cbrc.jp/alignment/software/)
=#
module Mafft
export mafft, mafft_from_string, mafft_from_fasta, mafft_profile, print_aligned_fasta, alignment_length, to_aminoacids
export mafft, mafft_from_string, mafft_from_fasta, mafft_profile, mafft_profile_from_string, mafft_profile_from_fasta, print_aligned_fasta, alignment_length, to_aminoacids

using FastaIO
using BioSeq
Expand Down Expand Up @@ -80,7 +80,7 @@ export mafft, mafft_from_string, mafft_from_fasta, mafft_profile, print_aligned_
return mafft_from_string(takebuf_string(io), preconfiguration)
end

#= Group-to-group alignments
#= Group-to-group alignments with input as paths to FASTA files
group1 and group2 have to be files with alignments
=#
function mafft_profile(group1::String, group2::String)
Expand All @@ -94,6 +94,35 @@ export mafft, mafft_from_string, mafft_from_fasta, mafft_profile, print_aligned_
return fr
end

#= Group-to-group alignments with input strings in FASTA format
group1 and group2 have to be strings with alignments in FASTA format
=#
function mafft_profile_from_string(group1::String, group2::String)
# write to tempfiles because mafft can not read from stdin
tempfile1_path, tempfile1_io = mktemp()
write(tempfile1_io, group1)
close(tempfile1_io)

tempfile2_path, tempfile2_io = mktemp()
write(tempfile2_io, group2)
close(tempfile2_io)

return mafft_profile(tempfile1_path, tempfile2_path)
end

#= Group-to-group alignments with input in FastaIO format
group1 and group2 have to be in FastaIO format and have to be alignments
=#
function mafft_profile_from_fasta(group1, group2)
io = IOBuffer()
writefasta(io, group1)
group1_string = takebuf_string(io)
writefasta(io, group2)
group2_string = takebuf_string(io)

return mafft_profile_from_string(group1_string, group2_string)
end

# helper methods for aligned FASTA

function print_aligned_fasta(fasta)
Expand Down
13 changes: 13 additions & 0 deletions test/mafft.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,16 @@ aligned = readfasta(Pkg.dir("BiomolecularStructures","examples/fasta","il4.fasta
mafft_profile_expected = {("_seed_3UGM:A|PDBID|CHAIN|SEQUENCE","MASSHHHHHHSSGLVPRGSSGSSMAARPPRAKPAPRRRAAQPSDASPAAQVDLRTLGYSQQQQEKIKPKVRSTVAQHHEALVGHGFTHAHIVALSQHPAALGTVAVTYQHIITALPEATHEDIVGVGKQWSGARALEALLTDAGELRGPPLQLDTGQLVKIAKRGGVTAMEAVHASRNALTGA---PLNLTPAQVVAIASNNGGKQALETVQRLLPVLCQAHGLTPAQVVAIASHDGGKQALETMQRLLPVLCQAHGLPPDQVVAIASNIGGKQALETVQRLLPVLCQAHGLTPDQVVAIASHGGGKQALETVQRLLPVLCQAHGLTPDQVVAIASHDGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGKQALETVQRLLPVLCQAHGLTPDQVVAIASHDGGKQALETVQRLLPVLCQTHGLTPAQVVAIASHDGGKQALETVQQLLPVLCQAHGLTPDQVVAIASNIGGKQALATVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTQVQVVAIASNIGGKQALETVQRLLPVLCQAHGLTPAQVVAIASHDGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTQEQVVAIASNNGGKQALETVQRLLPVLCQAHGLTPDQVVAIASNGGGKQALETVQRLLPVLCQAHGLTPAQVVAIASNIGGKQALETVQRLLPVLCQDHGLTLAQVVAIASNIGGKQALETVQRLLPVLCQAHGLTQDQVVAIASNIGGKQALETVQRLLPVLCQDHGLTPDQVVAIASNIGGKQALETVQRLLPVLCQDHGLTLDQVVAIASNGGKQALETVQRLLPVLCQDHGLTPDQVVAIASNSGGKQALETVQRLLPVLCQDHGLTPNQVVAIASNGGKQALESIVAQLSRPDPALAALTNDHLVALACLGGRPAMDAVKKGLPHAPELIRRVN-RRIGERTSHRV"),("_seed_3UGM:B|PDBID|CHAIN|SEQUENCE","T-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGATATGCATCTCCCCCTA--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CTG---TACACCACCAAAAG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T"),("_seed_3UGM:C|PDBID|CHAIN|SEQUENCE","TTTT----------------------------------------------------------------------------------------------------------------------------------------------------------------GGT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GGTGTACAGTAGGGGGAGATGCATATCTAAC--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"),("_seed_gi|166158130|ref|NP_001107279.1| interleukin 4 [Xenopus (Silurana) tropicalis]","MSNL------GRILCA----------------------------------------------------------------------VLGLFHLLSANPVPSSKLQIAIEEIISELV------------------------------------------------------------NNKITHK-----KCFVPTPYDDEEEASVEE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ISCRA-----------------------FKSLKHVCASER----------------------------------------------------------------------------------------------------------------------KNLRLLNASLITMFSE----------------------------NVECSI-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NNDEQKDLISVIEDLL-------------------------------------TFFRAQM-RKLVMNP--KH"),("_seed_gi|55742622|ref|NP_999288.1| interleukin-4 precursor [Sus scrofa]","M-GL------TSQLIP----------------------------------------------------------------------TLVCLLACTSNFVHGHKCDITLQEIIKTL--------------------------------------------------------------NILTARKNSCMELPVTDVFAAPENTTEKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TFCRA-----------------------STVLRHIYRHHT-----------------------------CMK--------------------------------------------------------------------------------------SLLSGLDRNLSSMAN------------------------------MTCSV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HEAKKSTLKDFLERLK-------------------------------------TIMKEKY-SKC--------"),("_seed_gi|55741696|ref|NP_001007080.1| interleukin 4 [Gallus gallus]","MSSS------LPTLLA----------------------------------------------------------------------LLVLLAGPGAVPTLCLQLSVPLMESIRIV--------------------------------------------------------------NDIQGE-VSCVKMNVTDIFADNKTNNKTE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LLCKA-----------------------STI---VWESQH-----------------------------CH---------------------------------------------------------------------------------------KNLQGLFLNMRQLLNASSTS-----------------------LKAPCPT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AAGNTTSMEKFLADLR-------------------------------------TFFHQLAKNK---------"),("_seed_gi|50978886|ref|NP_001003159.1| interleukin-4 precursor [Canis lupus familiaris]","M-GL------TSQLIP----------------------------------------------------------------------TLVCLLALTSTFVHGHNFNITIKEIIKML--------------------------------------------------------------NILTARNDSCMELTVKDVFTAPKNTSDKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------IFCRA-----------------------ATVLRQIYTHN------------------------------CSN--------------------------------------------------------------------------------------RYLRGLYRNLSSMAN------------------------------KTCSM-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NEIKKSTLKDFLERLK-------------------------------------VIMQKKY-YRH--------"),("_seed_gi|42627877|ref|NP_958427.1| interleukin-4 precursor [Rattus norvegicus]","M-GL------SPHLAV----------------------------------------------------------------------TLFCFLICTGNGIHGCN-DSPLREIINTL--------------------------------------------------------------NQVTEKGTPCTEMFVPDVLTATRNTTENE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LICRA-----------------------SRVLRKFYFPRDVPP--------------------------CLK--------------------------------------------------------------------------------NKSGVLGELRKLCRGVSGLNS-----------------------------LRSCTV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NESTLTTLKDFLESLK-------------------------------------SILRGKYLQSCTSM----S"),("_seed_gi|58743333|ref|NP_001008993.1| interleukin-4 isoform delta2 [Pan troglodytes]","M-GL------TSQLLP----------------------------------------------------------------------PLFFLLACAGNFVHGHKCDITLQEIIKTL--------------------------------------------------------------NSLTEQ----------------KNTTEKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TFCRA-----------------------ATVLRQFYSHHEKDTR-------------------------CLG-------------------------------------------------------ATAQ-------QFH-----------RHKQLIRFLKRLDRNLWGLAG-----------------------------LNSCPV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------KEANQSTLENFLERLK-------------------------------------TIMREKY-SKCSS------"),("_seed_gi|112807223|ref|NP_001036804.1| interleukin-4 precursor [Felis catus]","M-DL------TSQLIP----------------------------------------------------------------------ALVCLLAFTSTFVHGQNFNNTLKEIIKTL--------------------------------------------------------------NILTARNDSCMELTVMDVLAAPKNTSDKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------IFCRA-----------------------TTVLRQIYTHHN-----------------------------CST--------------------------------------------------------------------------------------KFLKGLDRNLSSMAN------------------------------RTCSV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NEVKKCTLKDFLERLK-------------------------------------AIMQKKY-SKH--------"),("_seed_2B8U:A|PDBID|CHAIN|SEQUENCE|HOMO SAPIENS","-----------------------------------------------------------------------------------------------------HKCDITLQEIIKTL--------------------------------------------------------------NSLTEQKTLCTELTVTDIFAASKNTTEKE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TFCRA-----------------------ATVLRQFYSHHEKDTR-------------------------CLG-------------------------------------------------------ATAQ-------QFH-----------RHKQLIRFLKRLDRNLWGLAG-----------------------------LNSCPV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------KEANQSTLENFLERLK-------------------------------------TIMREKY-SKCSS------")}
@test mafft_profile(Pkg.dir("BiomolecularStructures", "examples", "fasta", "3UGM.ali"),Pkg.dir("BiomolecularStructures", "examples", "fasta", "il4.ali")) == mafft_profile_expected

group1 = readall(open(Pkg.dir("BiomolecularStructures", "examples", "fasta", "3UGM.ali")))

group2 = readall(open(Pkg.dir("BiomolecularStructures", "examples", "fasta", "il4.ali")))

@test mafft_profile_from_string(group1, group2) == mafft_profile_expected

group1 = readfasta(Pkg.dir("BiomolecularStructures", "examples", "fasta", "3UGM.ali"))

group2 = readfasta(Pkg.dir("BiomolecularStructures", "examples", "fasta", "il4.ali"))

@test mafft_profile_from_fasta(group1, group2) == mafft_profile_expected


0 comments on commit 41b8fae

Please sign in to comment.