In [17]:
from openmm.app import PDBFile, Modeller
from pdbfixer import PDBFixer
import pandas as pd
from collections import defaultdict

In [19]:
df = pd.read_csv('peptide_mutants_students.csv')
student_combs = defaultdict(list)
for student in set(df['Student'].values):
    for i, row in df[df['Student'] == student].iterrows():
        student_combs[student].append({k:row[k] for k in row.keys() if k != 'Student'})

In [23]:
# Check for duplicates
for student, data in student_combs.items():
    for d in data:
        for student2, data2 in student_combs.items():
            if student != student2:
                for d2 in data2:
                    if all([d[k] == d2[k] for k in d.keys()]):
                        print(f"Duplicate found")

In [30]:
from pathlib import Path

def get_res_name_by_id(struc:PDBFixer, id, chainid):
    for res in struc.topology.residues():
        if int(res.id) == int(id) and res.chain.id == chainid:
            return res.name

def create_mutant(fpath, resids, mut_reses, chainid):
    struc = PDBFixer(fpath)

    mutants = []

    for resid, mut_res in zip(resids, mut_reses):
        orig_res = get_res_name_by_id(struc, resid, chainid)
        mutants.append(f"{orig_res}-{resid}-{mut_res}")

    struc.applyMutations(mutations=mutants, chain_id=chainid)
    struc.findMissingResidues()
    struc.findMissingAtoms()
    struc.addMissingAtoms()
    struc.addMissingHydrogens()

    return struc


fpath = 'prep_complex.pdb'

for student, data in student_combs.items():
    for d in data:
        mut_name = '-'.join(list(d.values()))
        mutant_dir = Path(student) / mut_name
        mutant_dir.mkdir(parents=True, exist_ok=True)
        
        mutant = create_mutant(fpath, list(d.keys()), list(d.values()), 'B')
        PDBFile.writeFile(mutant.topology, mutant.positions, 
                          str(mutant_dir / f"{mut_name}.pdb"),
                          keepIds=True)

In [2]:
from openmm.app import Modeller, PDBFile

from pathlib import Path

current_path = Path('.')

for parent_path in current_path.iterdir():
    new_path = Path(f'{parent_path.stem}_pept')

    pdbfiles = parent_path.rglob('*.pdb')
    for pdb_file in pdbfiles:
        pdb_struc = PDBFile(str(pdb_file))
        modeller = Modeller(pdb_struc.topology, pdb_struc.positions)
        protein_atoms = [atom for atom in modeller.topology.atoms() if atom.residue.chain.id == 'A']
        modeller.delete(protein_atoms)
        out_file_dir = new_path / pdb_file.parent.stem
        out_file_dir.mkdir(parents=True, exist_ok=True)
        PDBFile.writeFile(modeller.topology, modeller.positions, str(out_file_dir / f"{pdb_file.parent.stem}.pdb"), keepIds=True)