conda activate antibody_prediction_safe

In [2]:
!pip install freesasa


Collecting freesasa
  Using cached freesasa-2.2.1-cp310-cp310-linux_x86_64.whl
Installing collected packages: freesasa
Successfully installed freesasa-2.2.1


In [5]:
import os
import pandas as pd
import freesasa
from Bio import PDB
from tqdm import tqdm

def calculate_sasa_freesasa(pdb_path):
    structure = freesasa.Structure(pdb_path)
    result = freesasa.calc(structure)
    sasa_dict = {}
    for i in range(structure.nAtoms()):
        resn = structure.residueName(i)
        chain = structure.chainLabel(i)
        resi = structure.residueNumber(i)
        sasa = result.atomArea(i)
        sasa_dict[(chain, resn, resi)] = sasa_dict.get((chain, resn, resi), 0) + sasa
    return sasa_dict

def extract_methionine_sasa(sasa_dict, short_name, imgt_number, model_name):
    # output: short name, imgt number, model, sasa
    return short_name, imgt_number, model_name, sasa_dict

# --- Main ---
folders = [
    "renumbered_output/igfold_1",
    "renumbered_output/igfold_2",
    "renumbered_output/igfold_3",
    "renumbered_output/immunebuilder_1",
    "renumbered_output/immunebuilder_2",
    "renumbered_output/immunebuilder_3",
]

# data dictionary to collect
data = {}

for folder in folders:
    for pdb_file in tqdm(os.listdir(folder)):
        if pdb_file.endswith(".pdb"):
            full_path = os.path.join(folder, pdb_file)
            pdb_name = pdb_file.replace("_renumbered.pdb", "")
            # short_name is molecule name
            parts = pdb_name.split("_")
            short_name = "_".join(parts[:-2])
            model_name = parts[-2] + "_" + parts[-1]  # igfold_1 etc.

            sasa_dict = calculate_sasa_freesasa(full_path)

            for (chain, resn, resi), sasa in sasa_dict.items():
                if resn == "MET":
                    chain_tag = 'H' if chain.upper() == 'H' else 'L'
                    chain_tag = chain_tag.strip()
                    resi = resi.strip()
                    imgt_number = f"{chain_tag}{resi}"

                    key = (short_name, imgt_number)
                    if key not in data:
                        data[key] = {}
                    data[key][model_name] = sasa

# --- Build DataFrame ---
rows = []
for (short_name, imgt_number), models_sasa in data.items():
    row = {
        "name": short_name,
        "imgt_number": imgt_number,
    }
    for model in ["igfold_1", "igfold_2", "igfold_3", "immunebuilder_1", "immunebuilder_2", "immunebuilder_3"]:
        row[model] = models_sasa.get(model, "")
    rows.append(row)

df = pd.DataFrame(rows)

# Save to CSV
df.to_csv("methionine_sasa_summary_fixed.csv", index=False)

print("methionine_sasa_summary_fixed.csv generated!")


  0%|          | 0/4 [00:00<?, ?it/s]

100%|██████████| 4/4 [00:00<00:00, 12.85it/s]
100%|██████████| 3/3 [00:00<00:00, 11.16it/s]
100%|██████████| 3/3 [00:00<00:00, 13.57it/s]
100%|██████████| 3/3 [00:00<00:00,  7.35it/s]
100%|██████████| 3/3 [00:00<00:00,  7.03it/s]
100%|██████████| 3/3 [00:00<00:00,  8.84it/s]

methionine_sasa_summary_fixed.csv generated!



