In [8]:
from pathlib import Path
import IMP
import IMP.atom


In [9]:
# Configuration
output_dir = Path("../data/pdbs")
output_dir.mkdir(exist_ok=True)

# Multi-model PDB file to process (from pdbs_1.ipynb)
multi_model_pdb = Path(Path.home(), "Documents/mtorc2/data/models/submission_1/models/89_2462_1_8525/all_akt_clusters.pdb")
extract_chain = "I"

# Directories with individual PDB files (from pdbs_2.ipynb)
pdb_dirs = [
    Path("/Users/matthew/Documents/mtorc2_ms/submission_1/figures/akt_atp/akt"),
    Path("/Users/matthew/Documents/mtorc2_ms/submission_1/figures/akt_atp/akt_A"),
    Path("/Users/matthew/Documents/mtorc2_ms/submission_1/figures/akt_atp/akt_A_rand")
]

# Target chain ID for all files
target_chain = "E"

# Residue index offset
residue_offset = 145

# Count existing files to continue numbering
model_count = len(list(output_dir.glob("*.pdb")))
print(f"Starting from: {model_count}.pdb")


Starting from: 0.pdb


In [None]:
if multi_model_pdb.exists():
    # Read multi-model PDB
    m = IMP.Model()
    hierarchies = IMP.atom.read_multimodel_pdb(str(multi_model_pdb), m, IMP.atom.AllPDBSelector())
    print(f"Processing {len(hierarchies)} models from multi-model file")

    for i, h in enumerate(hierarchies):
        # Extract specific chain
        chain_sel = IMP.atom.Selection(h, chain_id=extract_chain)
        IMP.atom.write_pdb(chain_sel, f"{output_dir}/{model_count}.pdb")
        model_count += 1

    # Update chain IDs and residue indices for extracted files
    for i in range(len(hierarchies)):
        pdb_file = f"{output_dir}/{model_count - len(hierarchies) + i}.pdb"
        m = IMP.Model()
        h = IMP.atom.read_pdb(pdb_file, m)

        # Update chain ID
        for chain in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE):
            IMP.atom.Chain(chain).set_id(target_chain)

        # Add offset to residue indices
        for residue in IMP.atom.get_by_type(h, IMP.atom.RESIDUE_TYPE):
            res = IMP.atom.Residue(residue)
            res.set_index(res.get_index() + residue_offset)

        IMP.atom.write_pdb(h, pdb_file)


Processing 10 models from multi-model file


In [None]:
# Process all directories
for directory in pdb_dirs:
    if not directory.exists():
        continue

    pdb_files = list(directory.glob("*.pdb"))
    print(f"\nProcessing {len(pdb_files)} files from {directory.name}")

    for pdb_file in pdb_files:
        # Read PDB
        m = IMP.Model()
        h = IMP.atom.read_pdb(str(pdb_file), m)

        # Update chain ID
        for chain in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE):
            IMP.atom.Chain(chain).set_id(target_chain)

        # Add offset to residue indices
        for residue in IMP.atom.get_by_type(h, IMP.atom.RESIDUE_TYPE):
            res = IMP.atom.Residue(residue)
            res.set_index(res.get_index() + residue_offset)

        # Save with sequential numbering
        IMP.atom.write_pdb(h, f"{output_dir}/{model_count}.pdb")
        model_count += 1

print(f"\nTotal files processed: {model_count}")
print(f"Residue indices increased by {residue_offset}")



Processing 3 files from akt

Processing 25 files from akt_A

Processing 25 files from akt_A_rand

Total files processed: 63
Residue indices increased by 145
