In [1]:
import pypdb
from pprint import pprint

Let's query an arbitrary complex from pdb and get familiar with the returned info

In [2]:
pdb_1am9_desc = pypdb.describe_pdb("1am9")
pprint(pdb_1am9_desc)

{'citation_authors': "Parraga, A., Bellsolell, L., Ferre-D'Amare, A.R., "
                     'Burley, S.K.',
 'deposition_date': '1997-06-25',
 'expMethod': 'X-RAY DIFFRACTION',
 'keywords': 'TRANSCRIPTION/DNA',
 'last_modification_date': '2011-07-13',
 'nr_atoms': '3989',
 'nr_entities': '3',
 'nr_residues': '404',
 'pubmedId': '9634703',
 'release_date': '1998-07-10',
 'resolution': '2.30',
 'status': 'CURRENT',
 'structureId': '1AM9',
 'structure_authors': 'Parraga, A., Burley, S.K.',
 'title': 'HUMAN SREBP-1A BOUND TO LDL RECEPTOR PROMOTER'}


In [3]:
pdb_1am9_desc_all = pypdb.get_all_info('1am9')
pprint(pdb_1am9_desc_all)

{'id': '1AM9',
 'polymer': [{'@entityNr': '1',
              '@length': '17',
              '@type': 'dna',
              '@weight': '5289.42',
              'chain': [{'@id': 'E'}, {'@id': 'G'}],
              'polymerDescription': {'@description': 'DNA '
                                                     "(5'-D(*TP*TP*GP*CP*AP*GP*TP*GP*GP*GP*GP*TP*GP*AP*TP*CP*T "
                                                     ")-3')"}},
             {'@entityNr': '2',
              '@length': '21',
              '@type': 'dna',
              '@weight': '6361.14',
              'chain': [{'@id': 'F'}, {'@id': 'H'}],
              'polymerDescription': {'@description': 'DNA '
                                                     "(5'-D(*CP*AP*TP*GP*AP*GP*AP*TP*CP*AP*CP*CP*CP*CP*AP*CP*T "
                                                     "P*GP*CP*AP*A)-3')"}},
             {'@entityNr': '3',
              '@length': '82',
              '@type': 'protein',
              '@weight': '9492.99',
  

Let's get the pdb file for this complex

In [4]:
pdb_1am9 = pypdb.get_pdb_file(pdb_1am9_desc_all["id"])

Now, let's parse and visualize this pdb file

In [5]:
import nglview as nv
from Bio import PDB
import tempfile

_ColormakerRegistry()

Create an empty NGL Widget

In [6]:
nv_view = nv.NGLWidget()
nv_view

NGLWidget()

Parsing pdb data using biopython PDBParser

In [7]:
with tempfile.NamedTemporaryFile(suffix=".pdb", mode="a+") as file:
    file.write(pdb_1am9)
    # seek to 0 to reset file pointer
    file.seek(0)
    print(file.name)
    structure_1am9 = PDB.PDBParser().get_structure("SREBP-1A", file)
structure_1am9

/var/folders/46/9pp5st7d2mvdf79xv9pdbhfm0000gn/T/tmp1s7sgcf0.pdb




<Structure id=SREBP-1A>

Uncomment this for viewing the entire complex

In [8]:
#nv_view.add_structure(nv.adaptor.BiopythonStructure(structure_1am9))

<nglview.component.ComponentViewer at 0x117560a30>

Adding only the molecule of interest to the visualization

In [13]:
for chain in ["A", "B", "C", "D"]:
    nv_view.add_structure(nv.adaptor.BiopythonStructure(structure_1am9[0][chain]))
    

Extracting sequence data from structure

In [17]:
seq_chainA = PDB.PPBuilder().build_peptides(structure_1am9[0]["A"])[0].get_sequence()
seq_chainA

Seq('QSRGEKRTAHNAIEKRYRSSINDKIIELKDLVVGTEAKLNKSAVLRKAIDYIRF...SLK', ProteinAlphabet())

In [21]:
seq_chainA.count("A")

6

Checking the type of the molecule

In [34]:
structure_1am9.header["compound"]["3"]["molecule"].split(" ")[0] == "protein"

True