Demo for accessing PDB, visualizing, and parsing 
Sinduja K. Marx Nov-15-2017

TO DO: fix ipywidets 
currently the cells need to be run for the pretty widgets to show
TO DO: annotate formats and types

### PyPDB: A python API for Protein Data Bank
installation instructions
https://github.com/williamgilpin/pypdb


### Description 
A Python 3 toolkit for performing searches with the RCSB Protein Data Bank (PDB) using its XML-based API. This can be used to perform advanced searches for PDB IDs matching various criteria, as well as to look up information associated with specific PDB IDs. This tool allows standard operations that can be perfomed from within the PDB website (BLAST, PFAM lookup, etc.) to be performed within Python scripts, allowing it to supplement existing tools (i.e. Biopython) that are designed for manipulating .pdb files

In [4]:
%pylab inline
from IPython.display import HTML
# from pypdb.pypdb import *
from pypdb import *

import pprint

Populating the interactive namespace from numpy and matplotlib


In [None]:
## search channels in the protein data bank

In [12]:
search_dict = make_query('channels')
found_pdbs = do_search(search_dict)
print(found_pdbs[:3])
print(found_pdbs[-3:])

['1A11', '1A3Q', '1A68']
['6PRN', '7PRN', '8PRN']


In [15]:
all_info = get_all_info('1A11')
print(all_info)

{'polymer': {'@entityNr': '1', '@length': '25', '@type': 'protein', '@weight': '2666.14', 'chain': {'@id': 'A'}, 'Taxonomy': {'@name': 'Rattus norvegicus', '@id': '10116'}, 'synonym': {'@name': 'ACHR M2'}, 'macroMolecule': {'@name': 'Acetylcholine receptor subunit delta', 'accession': {'@id': 'P25110'}}, 'polymerDescription': {'@description': 'ACETYLCHOLINE RECEPTOR M2'}}, 'id': '1A11'}


In [18]:
describe_pdb('1A11')

{'citation_authors': 'Opella, S.J., Marassi, F.M., Gesell, J.J., Valente, A.P., Kim, Y., Oblatt-Montal, M., Montal, M.',
 'deposition_date': '1997-12-19',
 'expMethod': 'SOLUTION NMR',
 'keywords': 'ACETYLCHOLINE RECEPTOR',
 'last_modification_date': '2011-07-13',
 'nr_atoms': '186',
 'nr_entities': '1',
 'nr_residues': '25',
 'pubmedCentralId': 'PMC3282055',
 'pubmedId': '10201407',
 'release_date': '1998-04-08',
 'status': 'CURRENT',
 'structureId': '1A11',
 'structure_authors': 'Gesell, J.J., Sun, W., Montal, M., Opella, S.J.',
 'title': 'NMR STRUCTURE OF MEMBRANE SPANNING SEGMENT 2 OF THE ACETYLCHOLINE RECEPTOR IN DPC MICELLES, 10 STRUCTURES'}

In [None]:
## search pores in the PDB

In [16]:
search_dict = make_query('pores')
found_pdbs = do_search(search_dict)
print(found_pdbs[:3])
print(found_pdbs[-3:])

['1A0L', '1A5R', '1A87']
['7AHL', '7PRN', '8PRN']


In [22]:
all_info = get_all_info('7PRN')
print(all_info)
describe_pdb('7PRN')

{'polymer': {'@entityNr': '1', '@length': '289', '@type': 'protein', '@weight': '30508.9', 'chain': {'@id': 'A'}, 'Taxonomy': {'@name': 'Rhodobacter blasticus', '@id': '1075'}, 'macroMolecule': {'@name': 'Porin', 'accession': {'@id': 'P39767'}}, 'polymerDescription': {'@description': 'PORIN'}, 'mutation': {'@desc': 'D97A, E99A'}}, 'id': '7PRN'}


{'citation_authors': 'Schmid, B., Maveyraud, L., Kromer, M., Schulz, G.E.',
 'deposition_date': '1998-06-12',
 'expMethod': 'X-RAY DIFFRACTION',
 'keywords': 'MEMBRANE PROTEIN',
 'last_modification_date': '2011-07-13',
 'nr_atoms': '2177',
 'nr_entities': '1',
 'nr_residues': '289',
 'pubmedCentralId': 'PMC2144057',
 'pubmedId': '9684893',
 'release_date': '1998-08-12',
 'resolution': '2.25',
 'status': 'CURRENT',
 'structureId': '7PRN',
 'structure_authors': 'Maveyraud, L., Schmid, B., Schulz, G.E.',
 'title': 'E1M, D97A, E99A MUTANT OF RH. BLASTICA PORIN'}

In [61]:
#using pypdb get a file
pdb_file =  get_pdb_file('7PRN', filetype='pdb')
print(pdb_file[:1000])

HEADER    MEMBRANE PROTEIN                        12-JUN-98   7PRN              
TITLE     E1M, D97A, E99A MUTANT OF RH. BLASTICA PORIN                          
COMPND    MOL_ID: 1;                                                            
COMPND   2 MOLECULE: PORIN;                                                     
COMPND   3 CHAIN: A;                                                            
COMPND   4 ENGINEERED: YES;                                                     
COMPND   5 MUTATION: YES                                                        
SOURCE    MOL_ID: 1;                                                            
SOURCE   2 ORGANISM_SCIENTIFIC: RHODOBACTER BLASTICUS;                          
SOURCE   3 ORGANISM_TAXID: 1075;                                                
SOURCE   4 CELL_LINE: BL21;                                                     
SOURCE   5 CELLULAR_LOCATION: OUTER MEMBRANE;                                   
SOURCE   6 EXPRESSION_SYSTEM

### View the structure via nglview package

In [13]:
import nglview as nv

In [23]:
view = nv.show_pdbid("7PRN")
view

A Jupyter Widget

### Parse PDB file using biopython (say get chain A)

In [58]:
from Bio.PDB import PDBParser 
from Bio.PDB import MMCIFParser
from Bio.PDB import *

In [62]:
# create parser
parser = PDBParser(PERMISSIVE=1)
# parser = MMCIFParser()

# read structure from file (TO DO: pass in pypdb object to bipython to parse? or maybe biopython can fetch by itself?)
structure = parser.get_structure('ex', 'Hairpin_HBs_3_C6_0009_0001_0001.pdb')

model = structure[0]
chain = model['A']

In [101]:
view = nv.show_biopython(chain)
view

A Jupyter Widget

In [85]:
# retrieve pdb from databank using bipython

pdbl = PDBList()
pdbl.retrieve_pdb_file('7PRN',file_format='pdb',pdir='.')

Downloading PDB structure '7PRN'...


'./pdb7prn.ent'

In [100]:
# parsing ideas 
# still working on this.

#initiate parser
p = PDBParser()
triple_beta_b = p.get_structure('7prn','pdb7prn.ent')

#iterate over entities
for model in structure:
    for chain in model:
        for residue in chain:
            #print (residue)
            pass

# #selection shortcuts     
# res_list = Selection.unfold_entities(chain, 'R')
# #print(res_list)

# #method shortcuts
# res_list = Selection.unfold_entities(structure, 'R')
# residue_list = 

#reference : http://biopython.org/wiki/The_Biopython_Structural_Bioinformatics_FAQ

<generator object Chain.get_residues at 0x10d8948e0>


In [86]:
%%bash
ls

Demo Exploring Proteins (PyPDB+nglview+biopython).ipynb
Demo Visualizing Proteins in Jupyter Notebook.ipynb
Hairpin_HBs_3_C6_0009_0001_0001.pdb
Hairpin_HBs_3_C6_0009_0001_0001A.pdb
obsolete
pdb7prn.ent
show_rosetta.ipynb
