In [None]:
import os, re, pandas as pd
from prody import parsePDB
from prody import parsePDB, buildDistMatrix
import glob
# 1. load half-life dict
hl_df   = pd.read_csv('/Users/ozgetimur/Desktop/halfpepstab/hlapepstab/peptideseq_halflife.csv')
hl_dict = hl_df.set_index('peptide_sequences')['half_life'].to_dict()

models = glob.glob('models_archive/A_02_01_*.pdb')

rows = []
for path in models:
    fn = os.path.basename(path)
    m = re.match(r'A_02_01_([A-Z]{9})\.pdb$', fn)
    if not m:
        print(f"skip {fn}")
        continue

    pep_seq   = m.group(1)
    half_life = hl_dict.get(pep_seq)
    if half_life is None:
        print(f"no half-life for {pep_seq}, skipping")
        continue

    struct      = parsePDB(path)
    pep         = struct.select('chain A and backbone')
    hla         = struct.select('chain B and backbone')
    coords_pep  = pep.getCoords()
    coords_hla  = hla.getCoords()
    dist_flat   = buildDistMatrix(coords_pep, coords_hla).flatten()

    row = {'peptide_seq': pep_seq, 'half_life': half_life}
    row.update({f'dist_{i}': d for i, d in enumerate(dist_flat)})
    rows.append(row)

df = pd.DataFrame(rows)
df.to_csv('pepdist_with_halflife.csv', index=False)


@> 1577 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 1570 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1564 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1574 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1565 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1571 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1562 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1570 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1564 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1556 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1556 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1570 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1552 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1562 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1583 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1571 atoms and 1 coordinate set(s) were parsed in 0.01s.
@> 1558 atoms and 1 coordinate set(s) we

In [2]:
import os
import pandas as pd

# 1️⃣ Load your half-life lookup
df = pd.read_csv("pepdist_with_halflife.csv")
hl_map = dict(zip(df.peptide_seq, df.half_life))

# 2️⃣ Point to the directory where your .pdb files live
MODEL_DIR = "/Users/ozgetimur/Desktop/halfpepstab/models_archive"

for fname in os.listdir(MODEL_DIR):
    if not fname.endswith(".pdb"):
        continue

    # 3️⃣ Strip off the extension and the known prefix
    base = fname[:-4]                  # e.g. "A_02_01ABCDEFGHI"
    prefix = "A_02_01_"
    if not base.startswith(prefix):
        print(f"⚠️ skipping unexpected file name: {fname}")
        continue

    seq = base[len(prefix):]          # e.g. "ABCDEFGHI"

    # 4️⃣ Lookup half-life
    hl = hl_map.get(seq)
    if hl is None:
        print(f"⚠️ no half-life for sequence {seq}, leaving {fname} untouched")
        continue

    # Optional: format hl nicely (e.g. integer, or one decimal place)
    hl_str = f"{hl:.1f}"              # change to "{int(hl)}" if you want no decimals

    # 5️⃣ Construct the new filename
    new_base = f"{prefix}{seq}_{hl_str}"
    new_fname = new_base + ".pdb"

    # 6️⃣ Rename on disk
    src = os.path.join(MODEL_DIR, fname)
    dst = os.path.join(MODEL_DIR, new_fname)
    os.rename(src, dst)
    print(f"Renamed: {fname} → {new_fname}")


Renamed: A_02_01_FDAVLYYHM.pdb → A_02_01_FDAVLYYHM_45658.0.pdb
Renamed: A_02_01_YIVGYYSAL.pdb → A_02_01_YIVGYYSAL_102.0.pdb
Renamed: A_02_01_MMLVPLITV.pdb → A_02_01_MMLVPLITV_848.0.pdb
Renamed: A_02_01_VMNHKNKFM.pdb → A_02_01_VMNHKNKFM_45689.0.pdb
Renamed: A_02_01_RTLAAMPEE.pdb → A_02_01_RTLAAMPEE_45839.0.pdb
Renamed: A_02_01_TVIYRGVNF.pdb → A_02_01_TVIYRGVNF_1.0.pdb
Renamed: A_02_01_ALVCGLRQL.pdb → A_02_01_ALVCGLRQL_1.0.pdb
Renamed: A_02_01_TVYDINNEV.pdb → A_02_01_TVYDINNEV_170.0.pdb
Renamed: A_02_01_AVDRGCLRI.pdb → A_02_01_AVDRGCLRI_45718.0.pdb
Renamed: A_02_01_IISAVVGIV.pdb → A_02_01_IISAVVGIV_41.0.pdb
Renamed: A_02_01_LMTGGVTLV.pdb → A_02_01_LMTGGVTLV_102.0.pdb
Renamed: A_02_01_KMTLFKSIL.pdb → A_02_01_KMTLFKSIL_45713.0.pdb
Renamed: A_02_01_TIAGVAGLI.pdb → A_02_01_TIAGVAGLI_0.0.pdb
Renamed: A_02_01_ITAALAWSL.pdb → A_02_01_ITAALAWSL_30.0.pdb
Renamed: A_02_01_FIFFFLFNI.pdb → A_02_01_FIFFFLFNI_114.0.pdb
Renamed: A_02_01_SFYGYGFNV.pdb → A_02_01_SFYGYGFNV_24.0.pdb
Renamed: A_02_01_ATGFGT