In [1]:
import os
import pandas as pd
import pdb
import matplotlib.pyplot as plt
import pdb
import subprocess
import logomaker as lm
import matplotlib.pyplot as plt
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio import SeqIO
from enum import Enum

Matplotlib is building the font cache; this may take a moment.


In [6]:
import os
import sys
import urllib.request


import Bio
import Bio.PDB
import Bio.SeqRecord

def download_read_pdb(pdbcode, datadir, keepfile=True):
    """
    Downloads a PDB file from the Internet and saves it in a data directory.
    Then it reads and returns the structure inside.
    :param pdbcode: The standard PDB ID e.g. '3ICB'
    :param datadir: The directory where the downloaded file will be saved
    :param keepfile: if False, then the downloaded file will be deleted (default: keep the downloaded file)
    :return: a Bio.PDB Structure object or None if something went wrong
    """
    pdbfilenm = download_pdb(pdbcode, datadir)
    if pdbfilenm is None:
        return None
    struct = read_pdb(pdbcode, pdbfilenm)
    if not keepfile:
        os.remove(pdbfilenm)
    return struct
        
def download_pdb(pdbcode, datadir, downloadurl="http://files.rcsb.org/download/"):
    """
    Downloads a PDB file from the Internet and saves it in a data directory.
    :param pdbcode: The standard PDB ID e.g. '3ICB' or '3icb'
    :param datadir: The directory where the downloaded file will be saved
    :param downloadurl: The base PDB download URL, cf.
        `https://www.rcsb.org/pages/download/http#structures` for details
        Note that the unencrypted HTTP protocol is used by default
        to avoid spurious OpenSSL errors...
    :return: the full path to the downloaded PDB file or None if something went wrong
    """
    pdbfn = pdbcode + ".pdb"
    url = downloadurl + pdbfn
    outfnm = os.path.join(datadir, pdbfn)
    try:
        urllib.request.urlretrieve(url, outfnm)
        return outfnm
    except Exception as err:
        # all sorts of things could have gone wrong...
        print(str(err), file=sys.stderr)
        return None

def read_pdb(pdbcode, pdbfilenm):
    """
    Read a PDB structure from a file.
    :param pdbcode: A PDB ID string
    :param pdbfilenm: The PDB file
    :return: a Bio.PDB.Structure object or None if something went wrong
    """
    try:
        pdbparser = Bio.PDB.PDBParser(QUIET=True)   # suppress PDBConstructionWarning
        struct = pdbparser.get_structure(pdbcode, pdbfilenm)
        return struct
    except Exception as err:
        print(str(err), file=sys.stderr)
        return None 

def extract_seqrecords(pdbcode, struct):
    """
    Extracts the sequence records from a Bio.PDB structure.
    :param pdbcode: the PDB ID of the structure, needed to add a sequence ID to the result
    :param struct: a Bio.PDB.Structure object
    :return: a list of Bio.SeqRecord objects
    """
    ppb = Bio.PDB.PPBuilder()
    seqrecords = []
    for i, chain in enumerate(struct.get_chains()):
        # extract and store sequences as list of SeqRecord objects
        pps = ppb.build_peptides(chain)    # polypeptides
        seq = pps[0].get_sequence() # just take the first, hope there's no chain break
        seqid = pdbcode + chain.id
        seqrec = Bio.SeqRecord.SeqRecord(seq, id=seqid, 
            description="Sequence #{}, {}".format(i+1, seqid))
        seqrecords.append(seqrec)
    return seqrecords

def get_calphas(struct):
    """
    Extracts the C-alpha atoms from a PDB structure.
    :param struct: A Bio.PDB.Structure object.
    :return: A list of Bio.PDB.Atom objects representing the C-alpha atoms in `struct`.
    """
    calphas = [ atom for atom in struct.get_atoms() if atom.get_fullname() == " CA " ]
    return calphas


In [28]:
FULL_PDB = '/Users/yitongtseo/Documents/GitHub/ethical_necromancy/nanobody_design_scratch/myosin_2_pdb_files/6ysy_just_myosin.pdb'
SELECTED_RESIDUES = '/Users/yitongtseo/Documents/GitHub/ethical_necromancy/nanobody_design_scratch/myosin_2_pdb_files/6ysy_chunks/6sys_selected_residues.pdb'

selected_resi_pdb = read_pdb('selected_residues', SELECTED_RESIDUES)

In [None]:
# from Bio.PDB import PDBParser, DSSP
# from Bio.PDB.DSSP import DSSP, ss_to_index


# # Step 1: Load the PDB structure
# parser = PDBParser()
# structure = parser.get_structure('full_protein', FULL_PDB)

# # Step 2: Calculate or load DSSP information
# model = structure[0]  # Assuming the structure has only one model
# dssp = DSSP(model, FULL_PDB,dssp='mkdssp')


# # Step 3: Access secondary structure information
# # For example, to iterate over residues and identify alpha helix or beta sheet residues:
# alpha_helices = []
# beta_sheets = []

# for residue in structure.get_residues():
#     residue_id = residue.id[1]
#     ss = dssp[residue_id][2]  # Get secondary structure assignment (H for alpha helix, E for beta sheet)
    
#     if ss == 'H':
#         alpha_helices.append(residue)
#     elif ss == 'E':
#         beta_sheets.append(residue)

# # Step 4: Use alpha_helices and beta_sheets lists for further analysis
# print("Residues in Alpha Helices:", [residue.id for residue in alpha_helices])
# print("Residues in Beta Sheets:", [residue.id for residue in beta_sheets])


## HADDOCK https://rascar.science.uu.nl/haddock2.4/submit/1 
6ysy_chunk1
Nanobody residues: 
26,27,28,29,30,31,32,33,51,52,53,54,55,56,57,58,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120

In [44]:
target_residues = set(selected_resi_pdb.get_residues())
print(len(target_residues))

# for chunk_idx in range(1, 11):
chunk_idx = 10
chunk_pdb_name = f'/Users/yitongtseo/Documents/GitHub/ethical_necromancy/nanobody_design_scratch/myosin_2_pdb_files/6ysy_chunks/6ysy_chunk{chunk_idx}.pdb'
chunk_pdb = read_pdb('selected_residues', chunk_pdb_name)
chunk_residues = set(chunk_pdb.get_residues())
feature_residues = [r for r in target_residues.intersection(chunk_residues) if not r.is_disordered()]
print('chunk_residues ', len(feature_residues))

print(sorted(([residue.id[1] for residue in target_residues.intersection(chunk_residues)])))


483
chunk_residues  50
[121, 144, 145, 146, 147, 148, 149, 158, 161, 163, 164, 165, 167, 168, 169, 170, 171, 172, 178, 180, 184, 187, 190, 191, 194, 199, 239, 240, 241, 242, 243, 252, 256, 257, 258, 260, 261, 267, 268, 269, 270, 271, 455, 456, 457, 458, 459, 585, 586, 669]
