In [12]:
!pip install biopython



In [13]:
from Bio.PDB import PDBParser

In [20]:
# Initialize the PDB parser
parser = PDBParser()

# Load the structure from the PDB file
structure = parser.get_structure('9jer', file='./data/9jer.pdb')



#### Information about Biopython Structure
* *Model*: A structure can have multiple models (though most have just one)
* *Chain*: Each model contains one or more chains (A, B, C, etc.)
* *Residue*: Chains are composed of amino acid residues
* *Atom*: Each residue has multiple atoms

In [15]:
print("Number of models:", len(structure))

Number of models: 1


In [21]:
structure[0]['A']

<Chain id=A>

In [16]:
# Lets check to see if the N, CA, and C backbones are really avaiable for each residue?
from dataclasses import dataclass
import numpy as np

class Backbone:
   
    def __init__(self):
        self.N: np.ndarray = None
        self.CA: np.ndarray = None
        self.C: np.ndarray = None

In [24]:
# Lets iterate over the model
model = structure[0]['A']
residue_dict = dict()

# Iterate over chains
for residue in model:
    # Lets first get the residue information
    residue_id = residue.get_id()
    residue_name = residue.get_resname()
    
    # Now lets iterate over each atom and get their locations
    print(residue_name)

VAL
GLU
ASN
PRO
VAL
GLU
THR
PHE
ARG
LYS
LEU
ILE
GLU
ASN
ASP
SER
THR
LEU
TYR
MET
LEU
ALA
HIS
SER
MET
PHE
ASP
GLU
VAL
PRO
GLU
LYS
ALA
PRO
TYR
ASP
ARG
ASP
PRO
THR
THR
LEU
LYS
LYS
GLN
VAL
ARG
ASN
TYR
LYS
THR
MET
LEU
TYR
LEU
PHE
ASN
THR
LEU
LEU
THR
GLU
VAL
PRO
GLU
TYR
PHE
LEU
ARG
ASP
ASN
PRO
ASN
VAL
PRO
SER
GLY
LEU
ILE
GLY
PHE
PRO
PHE
ASN
ILE
ILE
VAL
ASP
TRP
PRO
MET
GLY
THR
PRO
SER
GLY
ARG
GLN
PHE
PHE
LEU
ASP
THR
ARG
VAL
ASN
LYS
CYS
LEU
LYS
ASP
ILE
LEU
ASN
LYS
TRP
ASN
GLU
PHE
LEU
LYS
ASP
PRO
THR
ALA
GLN
GLY
ASN
GLY
ASN
LYS
GLY
GLY
ASN
GLN
ALA
LEU
ILE
ASP
ALA
GLY
TRP
SER
SER
ASP
ALA
ALA
VAL
GLU
GLN
LEU
VAL
ASN
LYS
ALA
ASN
GLU
SER
THR
THR
ASP
LYS
LYS
LYS
THR
PHE
SER
GLU
ILE
PHE
GLN
HIS
PRO
ALA
ASN
GLY
THR
GLN
GLU
ASN
PHE
PHE
ASN
TYR
ALA
CYS
TRP
ASP
ASN
PHE
PHE
THR
ARG
ARG
PHE
LYS
ASP
GLY
VAL
ARG
PRO
VAL
ALA
ASP
ALA
ALA
VAL
VAL
ASN
ALA
CYS
GLU
SER
PHE
PRO
LEU
SER
PHE
ASP
THR
ASP
VAL
SER
ARG
ARG
ASN
THR
PHE
TRP
LEU
LYS
GLY
THR
PRO
TYR
SER
LEU
HIS
ASP
MET
LEU
GLY
ALA
THR
GLN
ASP
GLU
ARG
VAL
ALA


In [18]:
for residue_name in residue_dict:
    assert residue_dict[residue_name].N is not None
    assert residue_dict[residue_name].CA is not None
    assert residue_dict[residue_name].C is not None
print('All residues have a backbone!!!!')

AssertionError: 