Skip to content

Commit

Permalink
Mapping columns to residues
Browse files Browse the repository at this point in the history
  • Loading branch information
diegozea committed Dec 28, 2015
1 parent 6ff29f6 commit c3dacbb
Showing 1 changed file with 44 additions and 1 deletion.
45 changes: 44 additions & 1 deletion src/Pfam/pdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@

const _regex_PDB_from_GS = r"PDB;\s+(\w+)\s+(\w);\s+\w+-\w+;" # i.e.: "PDB; 2VQC A; 4-73;\n"

"Generates from a Pfam `msa` a `Dict{ASCIIString, Vector{Tuple{ASCIIString,ASCIIString}}}` with sequence ID as key and list of tuples of PDB code and chain as values."
"""Generates from a Pfam `msa` a `Dict{ASCIIString, Vector{Tuple{ASCIIString,ASCIIString}}}`.
Keys are sequence IDs and each value is a list of tuples containing PDB code and chain.
```
julia> getseq2pdb(msa)
Dict{ASCIIString,Array{Tuple{ASCIIString,ASCIIString},1}} with 1 entry:
"F112_SSV1/3-112" => [("2VQC","A")]
```
"""
function getseq2pdb(msa::AnnotatedMultipleSequenceAlignment)
dict = Dict{ASCIIString, Vector{Tuple{ASCIIString,ASCIIString}}}()
for (k, v) in getannotsequence(msa)
Expand All @@ -21,3 +30,37 @@ function getseq2pdb(msa::AnnotatedMultipleSequenceAlignment)
end
sizehint!(dict, length(dict))
end

# Mapping PDB/Pfam
# ================

function getcol2res(seqid::ASCIIString,
pdbid::ASCIIString,
chain::ASCIIString,
pfamid::ASCIIString,
msa::AnnotatedMultipleSequenceAlignment,
siftsfile::ASCIIString)
siftsmap = siftsmapping(siftsfile, dbPfam, pfamid, dbPDB, lowercase(pdbid), chain=chain, missings=false)
seqmap = getsequencemapping(msa, seqid)
colmap = getcolumnmapping(msa)
N = length(colmap)
m = Dict{Int,ASCIIString}()
sizehint!(m, N)
for i in 1:N
m[colmap[i]] = get(siftsmap, seqmap[i], "")
end
m
end

getcol2res(seqid::ASCIIString, pdbid::ASCIIString, chain::ASCIIString,
msa::AnnotatedMultipleSequenceAlignment, siftsfile::ASCIIString) = getcol2res(seqid, pdbid, chain,
ascii(split(getannotfile(msa, "AC"), '.')[1]), msa, siftsfile::ASCIIString)

getcol2res(seqid::ASCIIString, pdbid::ASCIIString, chain::ASCIIString,
pfamid::ASCIIString, msa::AnnotatedMultipleSequenceAlignment) = getcol2res(seqid, pdbid, chain,
pfamid, msa, downloadsifts(pdbid))

getcol2res(seqid::ASCIIString, pdbid::ASCIIString, chain::ASCIIString,
msa::AnnotatedMultipleSequenceAlignment) = getcol2res(seqid, pdbid, chain,
ascii(split(getannotfile(msa, "AC"), '.')[1]), msa)

0 comments on commit c3dacbb

Please sign in to comment.