<a href="https://colab.research.google.com/github/Zebreu/DeorphaNN/blob/main/minimum_distance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Notebook to check if peptide is positioned in the binding pocket of the GPCR in a GPCR-peptide .pdb predicted by AF-Multimer.

*the GPCR must be Chain A and the peptide Chain B in the multimer

In [None]:
#@title Install Dependencies
%%capture
import torch
import pandas as pd
import numpy as np
import os
import json
!pip install biopython

from Bio.PDB import PDBParser, PDBIO
from Bio.SeqUtils import seq1
from datetime import datetime

!pip3 install -qU pybiolib
import biolib
import re
deeptmhmm = biolib.load('DTU/DeepTMHMM')
from biolib._internal.http_client import HttpError
import time

from google.colab import files
import os, glob, tempfile
from Bio.PDB import PPBuilder
from Bio.Data.IUPACData import protein_letters_3to1

In [None]:
#@title Identify binding pocket using DeepTMHMM


def run_deeptmhmm_safe(tmp_fa, max_retries=2, delay=3):
    for attempt in range(max_retries + 1):
        try:
            return deeptmhmm.cli(args=f"--fasta {tmp_fa}")
        except HttpError as e:
            msg = str(e)
            if "NoSuchKey" in msg and attempt < max_retries:
                print("DeepTMHMM storage not ready. Retrying...")
                time.sleep(delay)
                continue
            raise  # different error or retries exhausted


uploaded = files.upload()
pdb_path = list(uploaded.keys())[0]

results = []

print("Processing:", pdb_path)


atoms = []
parser = PDBParser(QUIET=True)
structure = parser.get_structure("struct", pdb_path)
model = structure[0]

for chain in model:
    for res in chain:
        if res.id[0] != " ":
            continue
        for atom in res:
            atoms.append({
                "chain_id": chain.id,
                "residue_seq_id": res.id[1],
                "atom_name": atom.name,
                "pos_x": atom.coord[0],
                "pos_y": atom.coord[1],
                "pos_z": atom.coord[2],
            })

pdb_df = pd.DataFrame(atoms)

# === extract chain A sequence ===
ppb = PPBuilder()
seq3 = []
res_ids_A = []
for pp in ppb.build_peptides(model["A"]):
    for r in pp:
        seq3.append(r.get_resname())
        res_ids_A.append(r.id[1])

seq1 = "".join([protein_letters_3to1.get(r.capitalize(), "X") for r in seq3])

# === run DeepTMHMM on chain A ===
tmp_fa = os.path.join(tempfile.gettempdir(), "chainA.fasta")
with open(tmp_fa, "w") as f:
    f.write(">A\n" + seq1 + "\n")

# job = deeptmhmm.cli(args=f"--fasta {tmp_fa}") #someimes gives an error randomly?
job = run_deeptmhmm_safe(tmp_fa) #to avoid the error...

outdir = os.path.join(tempfile.gettempdir(), "deeptmhmm_A")
os.makedirs(outdir, exist_ok=True)
job.save_files(outdir, overwrite=True)

linefile = glob.glob(os.path.join(outdir, "*.3line"))[0]
with open(linefile) as f:
    lines = f.read().strip().splitlines()
top = lines[2].strip()  # topology string

if len(top) != len(seq1):
    raise ValueError(f"DeepTMHMM length mismatch for {pdb_path}")

# map topology to residue_seq_ids
tmm_data = pd.Series(index=res_ids_A, data=list(top))

# === compute centroid per residue ===
rec = pdb_df[pdb_df["chain_id"] == "A"]
pep = pdb_df[pdb_df["chain_id"] == "B"]

rec_groups = rec.groupby("residue_seq_id")
pep_groups = pep.groupby("residue_seq_id")

rec_pos = rec_groups[["pos_x", "pos_y", "pos_z"]].mean()
pep_pos = pep_groups[["pos_x", "pos_y", "pos_z"]].mean()

# === mark transitions O<->M ===
tmm_aligned = tmm_data.reindex(rec_pos.index).fillna("X")
tr = []
t = list(tmm_aligned)
for i in range(len(t)):
    if i == len(t)-1:
        tr.append(0)
        break
    pair = t[i:i+2]
    if pair == ["O","M"] or pair == ["M","O"]:
        tr.append(1)
    else:
        tr.append(0)

rec_pos["tmm"] = tmm_aligned.values
rec_pos["transitions"] = tr

# keep transition residues
rec_pos = rec_pos[rec_pos["transitions"] == 1][["pos_x","pos_y","pos_z"]]

# === distance matrix ===
rec_t = torch.from_numpy(rec_pos.values).float()
pep_t = torch.from_numpy(pep_pos.values).float()

dist = torch.cdist(rec_t, pep_t, p=2)
min_dist = float(dist.min().numpy())

results.append({"pdb": os.path.basename(pdb_path), "min_distance": min_dist})
print(f"Minimum distance (Ã…): {min_dist}")
if min_dist >= 12.5:
  print("Peptide outside binding pocket.")
else:
  print("Peptide inside binding pocket.")
