Skip to content

Commit

Permalink
updated pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
gp0 committed Mar 31, 2015
1 parent ea2d013 commit ddfe670
Showing 1 changed file with 43 additions and 31 deletions.
74 changes: 43 additions & 31 deletions src/pipeline.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,58 @@ using Requests
using BiomolecularStructures.PDB
# 1. search for S_0 in PDB

seq = "MNQLQQLQNPGESPPVHPFVAPLSYLLGTWRGQGEGEYPTIPSFRYGEEIRFSHSGKPVIAY"
threshold = 1.0e-30
#seq = "MNQLQQLQNPGESPPVHPFVAPLSYLLGTWRGQGEGEYPTIPSFRYGEEIRFSHSGKPVIAY"

info("Searching for sequence: $(seq)")
#=seq = "MKKQLKYCFF SLFVSLSSIL SSCGSTTFVL ANFESYISPL LLERVQEKHP LTFLTYPSNE
KLINGFANNT YSVAVASTYA VSELIERDLL SPIDWSQFNL KKSSSSSDKV NNASDAKDLF
IDSIKEISQQ TKDSKNNELL HWAVPYFLQN LVFVYRGEKI SELEQENVSW TDVIKAIVKH
KDRFNDNRLV FIDDARTIFS LANIVNTNNN SADVNPKEDG IGYFTNVYES FQRLGLTKSN
LDSIFVNSDS NIVINELASG RRQGGIVYNG DAVYAALGGD LRDELSEEQI PDGNNFHIVQ
PKISPVALDL LVINKQQSNF QKEAHEIIFD LALDGADQTK EQLIKTDEEL GTDDEDFYLK
GAMQNFSYVN YVSPLKVISD PSTGIVSSKK NNAEMKSKQM STDQMTSEKE FDYYTETLKA
LLEKEDSAEL NENEKKLVET IKKAYTIEKD SSIRWNQLVE KPISPLQRSN LSLSWLDFKL
HWW" =#

rid, rtoe = ncbi_blast_put(seq)
info("rid: $(rid)")
if ncbi_blast_search_info(rid)
seq = "QIKDLLVSSSTDLDTTLVLVNAIYFKGMWKTAFNAEDTREMPFHVTKQESKPVQMMCMNNSFNVATLPAE
KMKILELPFASGDLSMLVLLPDEVSDLERIEKTINFEKLTEWTNPNTMEKRRVKVYLPQMKIEEKYNLTS
VLMALGMTDLFIPSANLTGISSAESLKISQAVHGAFMELSEDGIEMAGSTGVIEDIKHSPESEQFRADHP
FLFLIKHNPTNTIVYFGRYWSP"

results = ncbi_blast_get_results(rid, threshold)

pdbs = (String,String)[]
threshold = 0.005

fastastring = ""
for result in results
fasta = fastarepresentation(result)

accession = split(result.accession, "_")

push!(pdbs, (convert(String,accession[1]), convert(String,accession[2])))
results = webblast("ncbi", seq, threshold, true)

fastastring = string(fastastring, ">", fasta[1], "\n", fasta[2], "\n")
end

# 2. Run MAFFT on results
# PDB IDs and Chain IDs
pdbs = (String,String)[]

println(mafft_from_string(fastastring))
fastastring = ""
for result in results
fasta = fastarepresentation(result)

accession = split(result.accession, "_")

# 3. Get PDBs
for pdb in pdbs
if !isreadable(pdb[1])
data = get(string("http://www.rcsb.org/pdb/files/", pdb[1]))
f = open(Pkg.dir("BiomolecularStructures", ".pdbCache", pdb[1]), 'w')
write(f, data.data)
end
println(get_structure(pdb[1]))

end
push!(pdbs, (convert(String,accession[1]), convert(String,accession[2])))

# 4. Superimpose PDBs (Best Hit as reference?)
fastastring = string(fastastring, ">", fasta[1], "\n", fasta[2], "\n")
end


# 2. Run MAFFT on results
if length(results) > 0
print_fasta(mafft_from_string(fastastring))
end

# 3. Get PDBs

structures = Any[]

for pdb in pdbs
push!(structures, get_structure(get_remote_pdb(pdb[1])))
end

# 4. Do something with the PDBs (Superimpose, etc)
for struc in structures
m = structure_to_matrix(struc)
println(size(m))
end

0 comments on commit ddfe670

Please sign in to comment.