In [1]:
# load Biopython PDB packages

# PDBList to download PDBs
from Bio.PDB.PDBList import PDBList
pdbl = PDBList()

# PDBParser to load and work with files
from Bio.PDB.PDBParser import PDBParser
parser = PDBParser()

import urllib2
import uuid

In [2]:
# download pdb
pdb_file_path = pdbl.retrieve_pdb_file('2VFA')

Structure exists: '/Users/LAURENCE/Desktop/Senior Design/vf/pdb2vfa.ent' 


In [None]:
structure = parser.get_structure('someprotein', pdb_file_path)

In [None]:
# get the ligands within this file for display
# from: http://stackoverflow.com/questions/25718201/remove-heteroatoms-from-pdb
ligands = []

for residue in structure.get_residues():
    tags = residue.get_full_id()
    # tags contains a tuple with (Structure ID, Model ID, Chain ID, (Residue ID))
    # Residue ID is a tuple with (*Hetero Field*, Residue ID, Insertion Code)

    # Thus you're interested in the Hetero Field, that is empty if the residue
    # is not a hetero atom or have some flag if it is (W for waters, H, etc.)
    if tags[3][0] != " " and tags[3][0] != "W":
        ligands.append(tags[3][0].split('_')[1].strip())
    else:
        continue
        
print(ligands)

### Do we want to display ligands?

In [None]:
class PDBViewer(object):
    '''
    Contributed by: Ali Ebrahim
    '''
    
    def __init__(self, f):
        self.pdb = open(f).read()

    def _repr_html_(self):
        div_id = str(uuid.uuid4())
        
        return """<div id="%s" style="width: 800px; height: 600px"><div>
        <!--script src="//biasmv.github.io/pv/js/pv.min.js"></script-->
        <script>
        require.config({paths: {"pv": "//biasmv.github.io/pv/js/pv.min"}});
        require(["pv"], function (pv) {
            pdb = "%s";
            structure = pv.io.pdb(pdb);
            viewer = pv.Viewer(document.getElementById('%s'),
                               {quality : 'medium', width: 'auto', height : 'auto',
                                antialias : true, outline : true});
            viewer.fitParent();
            var ligand = structure.select({rnames : %s});
            viewer.ballsAndSticks('ligand', ligand);
            viewer.cartoon('molecule', structure);
            viewer.centerOn(structure);
            
        });
        </script>
        """ % (div_id, self.pdb.replace("\n", "\\n"), div_id, ligands)

In [None]:
PDBViewer(pdb_file_path)

In [None]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC
from Bio.PDB import Polypeptide

In [None]:
def get_pdb_seq(structure):
    '''
    Takes in a Biopython structure object and returns a list of the structure's sequences
    :param structure: Biopython structure object
    :return: Dictionary of sequence strings with chain IDs as the key
    '''
    
    structure_seqs = {}
    
    # loop over each chain of the PDB
    for chain in structure[0]:
        
        chain_it = iter(chain) 
        
        chain_seq = ''
        tracker = 0
        
        # loop over the residues
        for res in chain.get_residues():
            # NOTE: you can get the residue number too
            res_num = res.id[1]
            
            # double check if the residue name is a standard residue
            # if it is not a standard residue (ie. selenomethionine),
            # it will be filled in with an X on the next iteration)
            if Polypeptide.is_aa(res, standard=True):
                full_id = res.get_full_id()
                end_tracker = full_id[3][1]
                i_code = full_id[3][2]
                aa = Polypeptide.three_to_one(res.get_resname())
                
                # tracker to fill in X's
                if end_tracker != (tracker + 1):# and first == False:
                    if i_code != ' ':
                        chain_seq += aa
                        tracker = end_tracker + 1
                        continue
                    else:
                        chain_seq += 'X'*(end_tracker - tracker - 1)
                        
                chain_seq += aa
                tracker = end_tracker
                
            else:
                continue

        structure_seqs[chain.get_id()] = chain_seq

    return structure_seqs

In [None]:
def get_pdb_seq2(structure):
    '''
    Takes in a Biopython structure object and returns a list of the structure's sequences
    :param structure: Biopython structure object
    :return: Dictionary of sequence strings with chain IDs as the key
    '''
    
    structure_seqs = {}
    
    # loop over each chain of the PDB
    for chain in structure[0]:
        
        chain_it = iter(chain) 
        
        chain_seq = []
        tracker = 0
        
        # loop over the residues
        for res in chain.get_residues():
            # NOTE: you can get the residue number too
            res_num = res.id[1]
            
            # double check if the residue name is a standard residue
            # if it is not a standard residue (ie. selenomethionine),
            # it will be filled in with an X on the next iteration)
            # TODO: except when it's at the beginning or end...
            if Polypeptide.is_aa(res, standard=True):
                full_id = res.get_full_id()
                end_tracker = full_id[3][1]
                i_code = full_id[3][2]
                aa = Polypeptide.three_to_one(res.get_resname())
                
                # tracker to fill in X's
                if end_tracker != (tracker + 1):
                    if i_code != ' ':
                        chain_seq.append((aa,end_tracker))
                        tracker = end_tracker + 1
                        continue
                    else:
                        xes = 'X'*(end_tracker - tracker - 1)
                        for x in xes:
                            chain_seq.append((x,end_tracker))
                        
                chain_seq.append((aa,end_tracker))
                tracker = end_tracker
                
            else:
                continue

        structure_seqs[chain.get_id()] = chain_seq

    return structure_seqs

In [None]:
my_structure_sequence = get_pdb_seq2(structure)

In [None]:
from Bio.PDB import Selection

In [None]:
# let's say after aligning, this is the residue that matches the structure
# I think this A refers to A chain and B chain if B
my_structure_sequence['A'][26]

In [None]:
# so we want to look at residue number 27
my_mutation_resnum = my_structure_sequence['A'][26][1]
print my_mutation_resnum

In [None]:
# let's get the info from the structure
my_mutation_residue = structure[0]['A'][my_mutation_resnum]
print my_mutation_residue

In [None]:
# we can use the Selection class to select all atoms of this residue
# 'A' here stands for ATOM (http://biopython.org/DIST/docs/api/Bio.PDB.Selection-module.html)
atom_list = Selection.unfold_entities(my_mutation_residue, 'A')
atom_list

In [None]:
# then you can format this information for PV:
for a in atom_list:
    print('{}.{}.{}').format('A',27,a.id)

### Let's do a case study w DNMT3A

#### Got the homology from Nathan's filter

In [None]:
from Bio.PDB.PDBParser import PDBParser

In [None]:
DNMT3A_struc = parser.get_structure('DNMT3A', 'NP_783329.1_model1_fix.pdb')

In [None]:
# get the ligands within this file for display
# from: http://stackoverflow.com/questions/25718201/remove-heteroatoms-from-pdb
ligands = []

for residue in DNMT3A_struc.get_residues():
    tags = residue.get_full_id()
    # tags contains a tuple with (Structure ID, Model ID, Chain ID, (Residue ID))
    # Residue ID is a tuple with (*Hetero Field*, Residue ID, Insertion Code)

    # Thus you're interested in the Hetero Field, that is empty if the residue
    # is not a hetero atom or have some flag if it is (W for waters, H, etc.)
    if tags[3][0] != " " and tags[3][0] != "W":
        ligands.append(tags[3][0].split('_')[1].strip())
    else:
        continue
        
print(ligands)

In [None]:
PDBViewer('NP_783329.1_model1_fix.pdb')

In [None]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC
from Bio.PDB import Polypeptide

In [None]:
from Bio.PDB import Selection

In [None]:
DNMT3A_structure_sequence = get_pdb_seq2(DNMT3A_struc)

In [None]:
DNMT3A_structure_sequence;

In [None]:
# let's say after aligning, this is the residue that matches the structure
# I think this A refers to A chain and B chain if B
DNMT3A_structure_sequence['X'][26]

### ^Not sure what to do for the first ['letter'] b/c sometimes it's A chain, B chain, X chain...how to automate?

In [None]:
# so we want to look at residue number 27
DNMT3A_mutation_resnum = DNMT3A_structure_sequence['X'][26][1]
print my_mutation_resnum

In [None]:
# let's get the info from the structure
DNMT3A_mutation_residue = structure[0]['A'][DNMT3A_mutation_resnum]
print my_mutation_residue

In [None]:
# we can use the Selection class to select all atoms of this residue
# 'A' here stands for ATOM (http://biopython.org/DIST/docs/api/Bio.PDB.Selection-module.html)
atom_list = Selection.unfold_entities(DNMT3A_mutation_residue, 'A')
atom_list

In [None]:
# then you can format this information for PV:
for a in atom_list:
    print('{}.{}.{}').format('A',27,a.id)

In [None]:
class PDBViewer2(object):
    '''
    Contributed by: Ali Ebrahim
    '''
    
    def __init__(self, f):
        self.pdb = open(f).read()

    def _repr_html_(self):
        div_id = str(uuid.uuid4())
        
        return """<div id="%s" style="width: 800px; height: 600px"><div>
        <!--script src="//biasmv.github.io/pv/js/pv.min.js"></script-->
        <script>
        require.config({paths: {"pv": "//biasmv.github.io/pv/js/pv.min"}});
        require(["pv"], function (pv) {
            pdb = "%s";
            structure = pv.io.pdb(pdb);
            viewer = pv.Viewer(document.getElementById('%s'),
                               {quality : 'medium', width: 'auto', height : 'auto',
                                antialias : true, outline : true});
            viewer.fitParent();
            
            viewer.cartoon('molecule', structure);
            viewer.centerOn(structure);
           
                   
            viewer.on('viewerReady', function() {
            viewer.cartoon('crambin', s);
            var carbonAlpha = s.atom('A.27.N');
            var options = {
             fontSize : 16, fontColor: '#f22', backgroundAlpha : 0.4
            };
            viewer.label('label', carbonAlpha.qualifiedName(),
                         carbonAlpha.pos(), options);
            viewer.autoZoom();
          });
        ;

            viewer.label('label', carbonAlpha.qualifiedName(),
                 carbonAlpha.pos(), options);
            
        });
        </script>
        """ % (div_id, self.pdb.replace("\n", "\\n"), div_id)

In [None]:
PDBViewer2(pdb_file_path)

In [3]:
class PDBViewer3():
    
    
    '''
    Contributed by: Ali Ebrahim
    '''
    def _repr_html_(self):
        div_id = str(uuid.uuid4())
        
        return """<div id="%s" style="width: 800px; height: 600px"><div>
        <!--script src="//biasmv.github.io/pv/js/pv.min.js"></script-->
        <script>
        var parent = document.getElementById('viewer');
        viewer = pv.Viewer(parent, {
            width : '300', height: '300', antialias : true,
            outline : true, quality : 'medium', style : 'hemilight',
            background : 'white', animateTime: 500,
            selectionColor : '#f00'
        });

        pv.io.fetchPdb('_static/2vfa.pdb', function(s) {
          viewer.on('viewerReady', function() {
            viewer.cartoon('crambin', s);
            var carbonAlpha = s.atom('A.27.CA');
            // override a few default options to show their effect
            var options = {
             fontSize : 16, fontColor: '#f22', backgroundAlpha : 0.4
            };
            viewer.label('label', carbonAlpha.qualifiedName(),
                         carbonAlpha.pos(), options);
            viewer.autoZoom();
          });
        });

        </script>
        """ %(div_id)

In [4]:
PDBViewer3()