In [3]:
import os
import subprocess
import numpy as np
from Bio import PDB
from Bio.PDB.Superimposer import Superimposer


In [15]:
# Cell 1: Import necessary libraries
import os
import subprocess
import numpy as np
from Bio import PDB
from Bio.PDB.PDBList import PDBList
from Bio.PDB.Superimposer import Superimposer
import warnings
from Bio.PDB.PDBExceptions import PDBConstructionWarning



In [18]:
# Cell 2: Custom warning handler
def custom_warning_handler(message, category, filename, lineno, file=None, line=None):
    print(f"Warning: {message}")

warnings.showwarning = custom_warning_handler


In [19]:
# Cell 2: Process PDB files with mark_sur
#def run_mark_sur(input_pdb, output_pdb):
#    subprocess.run([mark_sur_path, input_pdb, output_pdb], check=True)

# Cell 2: Download PDB files
def download_pdb(pdb_id):
    pdbl = PDBList()
    filename = pdbl.retrieve_pdb_file(pdb_id, pdir='.', file_format='pdb')
    return filename
# Download the crystal structure
crystal_file = download_pdb('1ACB')


Structure exists: './pdb1acb.ent' 


In [20]:
# Cell 3: Run ZDOCK
#def run_zdock(receptor, ligand, output):
#    subprocess.run([zdock_path, "-R", receptor, "-L", ligand, "-o", output], check=True)
# Cell 3: Load structures
def load_structure(filename):
    parser = PDB.PDBParser()
    structure = parser.get_structure('structure', filename)
    return structure

crystal_structure = load_structure(crystal_file)




In [21]:
# Cell 4: Create complex structures
#def create_complexes(zdock_out):
#    subprocess.run([create_pl_path, zdock_out], check=True)
# Cell 4: Load structures with additional information
def load_structure(filename):
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure('structure', filename)
    
    # Analyze chain continuity
    for model in structure:
        for chain in model:
            residues = list(chain.get_residues())
            for i in range(len(residues) - 1):
                if residues[i].id[1] + 1 != residues[i+1].id[1]:
                    print(f"Gap detected in chain {chain.id} between residues {residues[i].id[1]} and {residues[i+1].id[1]}")
    
    return structure


In [22]:
# Cell 4a : Load PDB structure
#def load_structure(filename):
#    parser = PDB.PDBParser()
#    structure = parser.get_structure('structure', filename)
#    return structure
# Load the crystal structure
with warnings.catch_warnings(record=True) as w:
    warnings.simplefilter("always")
    crystal_structure = load_structure(crystal_file)
    for warning in w:
        print(f"Warning details: {warning}")


Gap detected in chain E between residues 13 and 16
Gap detected in chain E between residues 146 and 149
Gap detected in chain E between residues 245 and 406
Gap detected in chain E between residues 421 and 423
Gap detected in chain E between residues 425 and 427
Gap detected in chain E between residues 446 and 448
Gap detected in chain E between residues 459 and 463
Gap detected in chain E between residues 463 and 469
Gap detected in chain E between residues 469 and 501
Gap detected in chain E between residues 504 and 516
Gap detected in chain E between residues 518 and 523
Gap detected in chain E between residues 525 and 527
Gap detected in chain E between residues 528 and 532
Gap detected in chain E between residues 532 and 535
Gap detected in chain E between residues 535 and 538
Gap detected in chain E between residues 538 and 601
Gap detected in chain E between residues 601 and 603
Gap detected in chain E between residues 604 and 606
Gap detected in chain E between residues 608 and

In [23]:
# make sure you have pymol installed in order to visualize the protein and the gaps detected in the previous cell
!pymol pdb1acb.ent

 PyMOL(TM) Molecular Graphics System, Version 3.0.0.
 Copyright (c) Schrodinger, LLC.
 All Rights Reserved.
 
    Created by Warren L. DeLano, Ph.D. 
 
    PyMOL is user-supported open-source software.  Although some versions
    are freely available, PyMOL is not in the public domain.
 
    If PyMOL is helpful in your work or study, then please volunteer 
    support for our ongoing efforts to create open and affordable scientific
    software by purchasing a PyMOL Maintenance and/or Support subscription.

    More information can be found at "http://www.pymol.org".
 
    Enter "help" for a list of commands.
    Enter "help <command-name>" for information on a specific command.

 Hit ESC anytime to toggle between text and graphics.

 Detected OpenGL version 2.1. Shaders available.
 Tessellation shaders not available
 Detected GLSL version 1.20.
 OpenGL graphics engine:
  GL_VENDOR:   Apple
  GL_RENDERER: Apple M2 Pro
  GL_VERSION:  2.1 Metal - 88.1
 Detected 10 CPU cores.  Enabled mul

In [24]:
# Cell 6: Compare models
#def compare_models(models):
#    rmsd_matrix = np.zeros((len(models), len(models)))
#    for i in range(len(models)):
#        for j in range(i+1, len(models)):
#            sup = Superimposer()
#            atoms_1 = list(models[i].get_atoms())
#            atoms_2 = list(models[j].get_atoms())
#            sup.set_atoms(atoms_1, atoms_2)
#            rmsd_matrix[i,j] = rmsd_matrix[j,i] = sup.rms
#    return rmsd_matrix

# Cell 5: Analyze structure
def analyze_structure(structure):
    print(f"Structure ID: {structure.id}")
    print(f"Number of models: {len(structure)}")
    for model in structure:
        print(f"  Model {model.id}:")
        for chain in model:
            residues = list(chain.get_residues())
            print(f"    Chain {chain.id}: {len(residues)} residues")
            print(f"      First residue: {residues[0].resname} {residues[0].id[1]}")
            print(f"      Last residue: {residues[-1].resname} {residues[-1].id[1]}")

print("Analysis of crystal structure:")
analyze_structure(crystal_structure)



Analysis of crystal structure:
Structure ID: structure
Number of models: 1
  Model 0:
    Chain E: 354 residues
      First residue: CYS 1
      Last residue: HOH 726
    Chain I: 92 residues
      First residue: LYS 8
      Last residue: HOH 725


# In the following section of the notebook we will create a script that provides a comprehensive workflow for comparing protein structures, preparing them for docking, running ZDOCK, and analyzing the results. 

In [25]:
# Cell 1: Import necessary libraries
import os
from Bio import PDB
from Bio.PDB.PDBList import PDBList
from Bio.PDB.Superimposer import Superimposer
import numpy as np


In [26]:
# Cell 2: Download and load PDB structures
def download_and_load_pdb(pdb_id):
    pdbl = PDBList()
    filename = pdbl.retrieve_pdb_file(pdb_id, pdir='.', file_format='pdb')
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure(pdb_id, filename)
    return structure

complex_structure = download_and_load_pdb('1ACB')
receptor_structure = download_and_load_pdb('5CHA')
ligand_structure = download_and_load_pdb('1CSE')


Structure exists: './pdb1acb.ent' 
Downloading PDB structure '5cha'...
Downloading PDB structure '1cse'...


In [29]:
# Cell 3: Function to calculate RMSD between two structures
def calculate_rmsd(struct1, struct2, chain1, chain2):
    atoms1 = [atom for atom in struct1[0][chain1].get_atoms() if atom.name == 'CA']
    atoms2 = [atom for atom in struct2[0][chain2].get_atoms() if atom.name == 'CA']
    
    len1, len2 = len(atoms1), len(atoms2)
    if len1 != len2:
        return None, f"Atom count mismatch: {len1} vs {len2}"
    
    sup = Superimposer()
    sup.set_atoms(atoms1, atoms2)
    return sup.rms, None


In [30]:
# Cell 4: Compare complex structure with individual structures
def compare_structures(struct1, struct2, chain1, chain2, name1, name2):
    rmsd, error = calculate_rmsd(struct1, struct2, chain1, chain2)
    if rmsd is not None:
        print(f"RMSD between {name1} (chain {chain1}) and {name2} (chain {chain2}): {rmsd:.2f} Å")
    else:
        print(f"Could not calculate RMSD between {name1} (chain {chain1}) and {name2} (chain {chain2}): {error}")
    
    # Print additional information about the chains
    print(f"  {name1} (chain {chain1}) atom count: {len(list(struct1[0][chain1].get_atoms()))}")
    print(f"  {name2} (chain {chain2}) atom count: {len(list(struct2[0][chain2].get_atoms()))}")
    print(f"  {name1} (chain {chain1}) residue count: {len(list(struct1[0][chain1].get_residues()))}")
    print(f"  {name2} (chain {chain2}) residue count: {len(list(struct2[0][chain2].get_residues()))}")
    print()

compare_structures(complex_structure, receptor_structure, 'E', 'A', '1ACB', '5CHA')
compare_structures(complex_structure, ligand_structure, 'I', 'I', '1ACB', '1CSE')

Could not calculate RMSD between 1ACB (chain E) and 5CHA (chain A): Atom count mismatch: 241 vs 8
  1ACB (chain E) atom count: 1880
  5CHA (chain A) atom count: 63
  1ACB (chain E) residue count: 354
  5CHA (chain A) residue count: 18

RMSD between 1ACB (chain I) and 1CSE (chain I): 0.63 Å
  1ACB (chain I) atom count: 551
  1CSE (chain I) atom count: 642
  1ACB (chain I) residue count: 92
  1CSE (chain I) residue count: 183



In [31]:
# Cell 5: Detailed structure analysis
def analyze_structure(structure, structure_id):
    print(f"Analyzing {structure_id}:")
    for model in structure:
        for chain in model:
            print(f"  Chain {chain.id}:")
            residues = list(chain.get_residues())
            print(f"    Number of residues: {len(residues)}")
            print(f"    First residue: {residues[0].resname} {residues[0].id[1]}")
            print(f"    Last residue: {residues[-1].resname} {residues[-1].id[1]}")
            print(f"    Number of atoms: {len(list(chain.get_atoms()))}")
    print()

analyze_structure(complex_structure, "1ACB (Complex)")
analyze_structure(receptor_structure, "5CHA (Receptor)")
analyze_structure(ligand_structure, "1CSE (Ligand)")

Analyzing 1ACB (Complex):
  Chain E:
    Number of residues: 354
    First residue: CYS 1
    Last residue: HOH 726
    Number of atoms: 1880
  Chain I:
    Number of residues: 92
    First residue: LYS 8
    Last residue: HOH 725
    Number of atoms: 551

Analyzing 5CHA (Receptor):
  Chain A:
    Number of residues: 18
    First residue: CYS 1
    Last residue: HOH 741
    Number of atoms: 63
  Chain B:
    Number of residues: 206
    First residue: ILE 16
    Last residue: HOH 742
    Number of atoms: 1055
  Chain C:
    Number of residues: 154
    First residue: ALA 149
    Last residue: HOH 739
    Number of atoms: 759
  Chain E:
    Number of residues: 12
    First residue: CYS 1
    Last residue: HOH 693
    Number of atoms: 57
  Chain F:
    Number of residues: 190
    First residue: ILE 16
    Last residue: HOH 738
    Number of atoms: 1039
  Chain G:
    Number of residues: 141
    First residue: ALA 149
    Last residue: HOH 735
    Number of atoms: 746

Analyzing 1CSE (Ligan

In [32]:
# Cell 6: Re-download and analyze 5CHA
def redownload_and_analyze(pdb_id):
    pdbl = PDBList()
    filename = pdbl.retrieve_pdb_file(pdb_id, pdir='.', file_format='pdb', overwrite=True)
    structure = PDB.PDBParser(QUIET=True).get_structure(pdb_id, filename)
    analyze_structure(structure, f"{pdb_id} (Re-downloaded)")

redownload_and_analyze("")

Downloading PDB structure '5cha'...
Analyzing 5CHA (Re-downloaded):
  Chain A:
    Number of residues: 18
    First residue: CYS 1
    Last residue: HOH 741
    Number of atoms: 63
  Chain B:
    Number of residues: 206
    First residue: ILE 16
    Last residue: HOH 742
    Number of atoms: 1055
  Chain C:
    Number of residues: 154
    First residue: ALA 149
    Last residue: HOH 739
    Number of atoms: 759
  Chain E:
    Number of residues: 12
    First residue: CYS 1
    Last residue: HOH 693
    Number of atoms: 57
  Chain F:
    Number of residues: 190
    First residue: ILE 16
    Last residue: HOH 738
    Number of atoms: 1039
  Chain G:
    Number of residues: 141
    First residue: ALA 149
    Last residue: HOH 735
    Number of atoms: 746



In [33]:
# Cell 7: Updated structure preparation for docking
def prepare_structure_for_docking(structure, chain_id, start_residue=None, end_residue=None):
    new_structure = PDB.Structure.Structure('prepared')
    model = PDB.Model.Model(0)
    new_structure.add(model)
    
    for chain in structure[0]:
        if chain.id == chain_id:
            new_chain = PDB.Chain.Chain(chain_id)
            for residue in chain:
                if residue.id[0] == ' ':  # Check if it's a standard amino acid
                    if (start_residue is None or residue.id[1] >= start_residue) and \
                       (end_residue is None or residue.id[1] <= end_residue):
                        new_chain.add(residue)
            model.add(new_chain)
    
    return new_structure

# Prepare receptor (5CHA, chain B)
prepared_receptor = prepare_structure_for_docking(receptor_structure, 'B', start_residue=16)

# Prepare ligand (1CSE, chain I)
prepared_ligand = prepare_structure_for_docking(ligand_structure, 'I', start_residue=8)


In [34]:
# Cell 8: Save prepared structures
io = PDB.PDBIO()

io.set_structure(prepared_receptor)
io.save("prepared_receptor.pdb")

io.set_structure(prepared_ligand)
io.save("prepared_ligand.pdb")

print("Prepared structures saved as 'prepared_receptor.pdb' and 'prepared_ligand.pdb'")

# Cell 9: Verify prepared structures
def verify_structure(structure, structure_name):
    print(f"Verifying {structure_name}:")
    for model in structure:
        for chain in model:
            residues = list(chain.get_residues())
            print(f"  Chain {chain.id}:")
            print(f"    Number of residues: {len(residues)}")
            print(f"    First residue: {residues[0].resname} {residues[0].id[1]}")
            print(f"    Last residue: {residues[-1].resname} {residues[-1].id[1]}")
            print(f"    Number of atoms: {len(list(chain.get_atoms()))}")
    print()

verify_structure(prepared_receptor, "Prepared Receptor (5CHA, chain B)")
verify_structure(prepared_ligand, "Prepared Ligand (1CSE, chain I)")

Prepared structures saved as 'prepared_receptor.pdb' and 'prepared_ligand.pdb'
Verifying Prepared Receptor (5CHA, chain B):
  Chain B:
    Number of residues: 131
    First residue: ILE 16
    Last residue: TYR 146
    Number of atoms: 980

Verifying Prepared Ligand (1CSE, chain I):
  Chain I:
    Number of residues: 63
    First residue: LYS 8
    Last residue: GLY 70
    Number of atoms: 522



In [41]:
# Cell 10: Run ZDOCK
import subprocess
import os

def run_zdock(receptor_file, ligand_file, output_file):
    zdock_path = "zdock"  # Adjust this if ZDOCK is not in your PATH
    command = f"{zdock_path} -R {receptor_file} -L {ligand_file} -o {output_file}"
    try:
        subprocess.run(command, shell=True, check=True)
        print(f"ZDOCK completed successfully. Output saved to {output_file}")
    except subprocess.CalledProcessError as e:
        print(f"Error running ZDOCK: {e}")

# Uncomment the following line to run ZDOCK
#run_zdock("prepared_receptor.pdb", "prepared_ligand.pdb", "zdock_output.txt")


In [42]:
# Cell 11: Analyze ZDOCK results
def analyze_zdock_results(zdock_output_file, top_n=10):
    results = []
    with open(zdock_output_file, 'r') as f:
        lines = f.readlines()
    
    for line in lines[6:6+top_n]:  # ZDOCK output format: first 6 lines are header
        parts = line.split()
        results.append({
            'rank': int(parts[0]),
            'score': float(parts[1]),
            'rotation': [float(parts[2]), float(parts[3]), float(parts[4])],
            'translation': [float(parts[5]), float(parts[6]), float(parts[7])]
        })
    
    return results


In [43]:
# Cell 12: Compare top ZDOCK result with crystal structure
from Bio.PDB.Superimposer import Superimposer

def compare_docked_to_crystal(docked_complex, crystal_complex, receptor_chain, ligand_chain):
    # Extract CA atoms from receptor and ligand in both structures
    docked_receptor_cas = [atom for atom in docked_complex[0][receptor_chain].get_atoms() if atom.name == 'CA']
    docked_ligand_cas = [atom for atom in docked_complex[0][ligand_chain].get_atoms() if atom.name == 'CA']
    crystal_receptor_cas = [atom for atom in crystal_complex[0][receptor_chain].get_atoms() if atom.name == 'CA']
    crystal_ligand_cas = [atom for atom in crystal_complex[0][ligand_chain].get_atoms() if atom.name == 'CA']

    # Superimpose receptor
    sup_receptor = Superimposer()
    sup_receptor.set_atoms(crystal_receptor_cas, docked_receptor_cas)
    sup_receptor.apply(docked_complex[0][ligand_chain].get_atoms())

    # Calculate RMSD for ligand
    sup_ligand = Superimposer()
    sup_ligand.set_atoms(crystal_ligand_cas, docked_ligand_cas)
    
    return sup_ligand.rms


In [44]:
# Cell 13: Main execution
def main():
    # Run ZDOCK
    run_zdock("prepared_receptor.pdb", "prepared_ligand.pdb", "zdock_output.txt")

    # Analyze ZDOCK results
    results = analyze_zdock_results("zdock_output.txt")
    print("Top 10 ZDOCK predictions:")
    for result in results:
        print(f"Rank: {result['rank']}, Score: {result['score']}")

    # Compare top result with crystal structure
    # Note: You'll need to create the docked complex structure using ZDOCK's create.pl script
    # and then load it here. For now, we'll use a placeholder.
    docked_complex = PDB.PDBParser().get_structure("docked", "docked_complex.pdb")
    crystal_complex = PDB.PDBParser().get_structure("crystal", "1ACB.pdb")
    
    rmsd = compare_docked_to_crystal(docked_complex, crystal_complex, 'B', 'I')
    print(f"RMSD between top docked model and crystal structure: {rmsd:.2f} Å")

# Uncomment the following line to run the main function
# main()

print("ZDOCK execution and analysis script prepared. Uncomment the last line to run the full analysis.")

ZDOCK execution and analysis script prepared. Uncomment the last line to run the full analysis.


The following code fragment creates a script that provides a Python-based molecular docking workflow using AutoDock Vina and PyMOL.

In [45]:
# Cell 1: Check for available docking tools
def check_tool_availability(tool_name):
    try:
        subprocess.run([tool_name, '--help'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        return True
    except FileNotFoundError:
        return False

vina_available = check_tool_availability('vina')
pymol_available = check_tool_availability('pymol')

print(f"AutoDock Vina available: {vina_available}")
print(f"PyMOL available: {pymol_available}")


AutoDock Vina available: False
PyMOL available: True
