In [2]:
!pip install biopython

Collecting biopython
  Using cached biopython-1.84-cp39-cp39-macosx_11_0_arm64.whl (2.7 MB)
Installing collected packages: biopython
Successfully installed biopython-1.84


In [3]:
from Bio.PDB import PDBParser

In [4]:
# Initialize the PDB parser
parser = PDBParser()

# Load the structure from the PDB file
structure = parser.get_structure('8z0f', file='./data/8z0f.pdb')



#### Information about Biopython Structure
* *Model*: A structure can have multiple models (though most have just one)
* *Chain*: Each model contains one or more chains (A, B, C, etc.)
* *Residue*: Chains are composed of amino acid residues
* *Atom*: Each residue has multiple atoms

In [5]:
print("Number of models:", len(structure))

Number of models: 1


In [6]:
# Lets check to see if the N, CA, and C backbones are really avaiable for each residue?
from dataclasses import dataclass
import numpy as np

class Backbone:
   
    
    def __init__(self):
        self.N: np.ndarray = np.zeros(3)
        self.CA: np.ndarray = np.zeros(3)
        self.C: np.ndarray = np.zeros(3)

In [7]:
# Lets iterate over the model
model = structure[0]
residue_dict = dict()

# Iterate over chains
for chain in model:
    # Iterate over residues
    for residue in chain:
        # Lets first get the residue information
        residue_id = residue.get_id()
        residue_name = residue.get_resname()
        
        # Now lets iterate over each atom and get their locations
        
        for atom in residue:
            # Get atom information (name, coordinates)
            atom_name = atom.get_name()
            atom_coord = atom.get_coord()
            
            print(f"Residue: {residue_name} {residue_id}, Atom: {atom_name}, Coordinates: {atom_coord}")
            if residue_name not in residue_dict:
                residue_dict[residue_name] = Backbone()
                
            if atom_name == 'N':
                residue_dict[residue_name].N = atom_coord
            elif atom_name == 'CA':
                residue_dict[residue_name].CA = atom_coord
            elif atom_name == 'C':
                residue_dict[residue_name].C = atom_coord

Residue: ARG (' ', 56, ' '), Atom: N, Coordinates: [ 85.737 102.217  94.224]
Residue: ARG (' ', 56, ' '), Atom: CA, Coordinates: [ 85.335 101.387  95.353]
Residue: ARG (' ', 56, ' '), Atom: C, Coordinates: [ 86.553 100.798  96.053]
Residue: ARG (' ', 56, ' '), Atom: O, Coordinates: [87.281 99.991 95.479]
Residue: ARG (' ', 56, ' '), Atom: CB, Coordinates: [ 84.403 100.266  94.888]
Residue: ARG (' ', 56, ' '), Atom: CG, Coordinates: [ 82.984 100.723  94.586]
Residue: ARG (' ', 56, ' '), Atom: CD, Coordinates: [ 82.822 101.115  93.126]
Residue: ARG (' ', 56, ' '), Atom: NE, Coordinates: [ 81.734 102.065  92.931]
Residue: ARG (' ', 56, ' '), Atom: CZ, Coordinates: [ 81.282 102.455  91.747]
Residue: ARG (' ', 56, ' '), Atom: NH1, Coordinates: [ 81.804 101.996  90.621]
Residue: ARG (' ', 56, ' '), Atom: NH2, Coordinates: [ 80.28  103.329  91.69 ]
Residue: ARG (' ', 56, ' '), Atom: HA, Coordinates: [ 84.853 101.934  95.993]
Residue: ARG (' ', 56, ' '), Atom: HB2, Coordinates: [84.766 99.875 

In [8]:
for residue_name in residue_dict:
    assert residue_dict[residue_name].N is not None
    assert residue_dict[residue_name].CA is not None
    assert residue_dict[residue_name].C is not None
print('All residues have a backbone!!!!')

All residues have a backbone!!!!
