In [5]:
import os
import pandas as pd
from Bio.PDB import PDBParser, PDBIO, Select

In [6]:
# Read the CSV file
csv_file = '/links/grid/scratch/lmerlicek/design/LM_CNTRpos_PMPNN_B05_p03_011/all_scores.csv'
df = pd.read_csv(csv_file)

# Sort by catalytic score
df_sorted = df.sort_values(by='catalytic_score')

# Initialize PDB parser
parser = PDBParser(QUIET=True)

class LigandCatResSelect(Select):
    def __init__(self, chain_id, res_id, ligand_name):
        self.chain_id = chain_id
        self.res_id = res_id
        self.ligand_name = ligand_name

    def accept_residue(self, residue):
        if residue.id[1] == self.res_id and residue.parent.id == self.chain_id:
            return True
        if residue.resname == self.ligand_name:
            return True
        return False

# Output PDB file
output_pdb = '/links/grid/scratch/lmerlicek/design/Input/cst_out/catres_sorted.pdb'
io = PDBIO()

# Iterate over sorted PDB files
for index, row in df_sorted.iterrows():
    pdb_file = next((file for file in os.listdir('/links/grid/scratch/lmerlicek/design/LM_CNTRpos_PMPNN_B05_p03_011/best_structures') if file.endswith(f'{row["index"]}.pdb')), None)
    pdb_file = f'/links/grid/scratch/lmerlicek/design/LM_CNTRpos_PMPNN_B05_p03_011/best_structures/{pdb_file}' if pdb_file else None
    if pdb_file is None:
        continue
    structure = parser.get_structure(row["index"], pdb_file)
    
    # Extract ligand and catalytic residue
    select = LigandCatResSelect(chain_id='A', res_id=row['cat_resi'], ligand_name='5TS')
    io.set_structure(structure)
    io.save(output_pdb, select=select, write_end=False)

print(f'Combined PDB file saved as {output_pdb}')

Combined PDB file saved as catres_sorted.pdb
