In [None]:
import os, re, pandas as pd
from prody import parsePDB
from prody import parsePDB, buildDistMatrix
import glob
# 1. load half-life dict
hl_df   = pd.read_csv('/Users/ozgetimur/Desktop/halfpepstab/hlapepstab/peptideseq_halflife.csv')
hl_dict = hl_df.set_index('peptide_sequences')['half_life'].to_dict()

models = glob.glob('models_archive/A_02_01_*.pdb')

rows = []
for path in models:
    fn = os.path.basename(path)
    m = re.match(r'A_02_01_([A-Z]{9})\.pdb$', fn)
    if not m:
        print(f"skip {fn}")
        continue

    pep_seq   = m.group(1)
    half_life = hl_dict.get(pep_seq)
    if half_life is None:
        print(f"no half-life for {pep_seq}, skipping")
        continue

    struct      = parsePDB(path)
    pep         = struct.select('chain A and backbone')
    hla         = struct.select('chain B and backbone')
    coords_pep  = pep.getCoords()
    coords_hla  = hla.getCoords()
    dist_flat   = buildDistMatrix(coords_pep, coords_hla).flatten()

    row = {'peptide_seq': pep_seq, 'half_life': half_life}
    row.update({f'dist_{i}': d for i, d in enumerate(dist_flat)})
    rows.append(row)

df = pd.DataFrame(rows)
df.to_csv('pepdist_with_halflife.csv', index=False)


@> 1577 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 1570 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1564 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1574 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1565 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1571 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1562 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1570 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1564 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1556 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1556 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1570 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1552 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1562 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1583 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1571 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1558 atoms and 1 coordinate set(s) we