In [None]:
#@title Upload your relaxed GPCR-peptide pdb file (top ranked by peptide pLDDT)
from google.colab import files
uploaded_pdb = files.upload()
pdb = list(uploaded_pdb.keys())[0]


In [3]:
#@title Setup
%%capture
import sys, os
from sys import version_info
PYTHON_VERSION = f"{version_info.major}.{version_info.minor}"

print("installing conda...")
os.system("wget -qnc https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh")
os.system("bash Miniforge3-Linux-x86_64.sh -bfp /usr/local")
os.system("mamba config --set auto_update_conda false")
os.system("touch CONDA_READY")

#  os.system(f"mamba install -y -q -c conda-forge openmm=7.7.0 python='{PYTHON_VERSION}' pdbfixer 2>&1 1>/dev/null")

PATH = f"/usr/local/lib/python{PYTHON_VERSION}/site-packages/"
if PATH not in sys.path:
  sys.path.insert(0, PATH)


os.system(f"mamba install -y -q -c conda-forge python='{PYTHON_VERSION}' 2>&1 1>/dev/null")
os.system(f"mamba install -y -q -c conda-forge gemmi openbabel biopython 2>&1 1>/dev/null")

!pip install pdbe-arpeggio==1.4.4
!pip install atomium==1.0.11

from arpeggio.core import InteractionComplex
from arpeggio.core.utils import max_mem_usage
import atomium
import numpy as np
import torch
import pandas as pd



section_to_add = '''loop_
_chem_comp.id
_chem_comp.type
_chem_comp.mon_nstd_flag
_chem_comp.name
_chem_comp.pdbx_synonyms
_chem_comp.formula
_chem_comp.formula_weight
ALA "L-peptide linking" y ALANINE         ? "C3 H7 N O2"     89.093
ARG "L-peptide linking" y ARGININE        ? "C6 H15 N4 O2 1" 175.209
ASN "L-peptide linking" y ASPARAGINE      ? "C4 H8 N2 O3"    132.118
ASP "L-peptide linking" y "ASPARTIC ACID" ? "C4 H7 N O4"     133.103
CYS "L-peptide linking" y CYSTEINE        ? "C3 H7 N O2 S"   121.158
GLN "L-peptide linking" y GLUTAMINE       ? "C5 H10 N2 O3"   146.144
GLU "L-peptide linking" y "GLUTAMIC ACID" ? "C5 H9 N O4"     147.129
GLY "peptide linking"   y GLYCINE         ? "C2 H5 N O2"     75.067
HIS "L-peptide linking" y HISTIDINE       ? "C6 H10 N3 O2 1" 156.162
ILE "L-peptide linking" y ISOLEUCINE      ? "C6 H13 N O2"    131.173
LEU "L-peptide linking" y LEUCINE         ? "C6 H13 N O2"    131.173
LYS "L-peptide linking" y LYSINE          ? "C6 H15 N2 O2 1" 147.195
MET "L-peptide linking" y METHIONINE      ? "C5 H11 N O2 S"  149.211
PHE "L-peptide linking" y PHENYLALANINE   ? "C9 H11 N O2"    165.189
PRO "L-peptide linking" y PROLINE         ? "C5 H9 N O2"     115.130
SER "L-peptide linking" y SERINE          ? "C3 H7 N O3"     105.093
THR "L-peptide linking" y THREONINE       ? "C4 H9 N O3"     119.119
TRP "L-peptide linking" y TRYPTOPHAN      ? "C11 H12 N2 O2"  204.225
TYR "L-peptide linking" y TYROSINE        ? "C9 H11 N O3"    181.189
VAL "L-peptide linking" y VALINE          ? "C5 H11 N O2"    117.146
#
loop_
_atom_site.group_PDB
_atom_site.id'''

to_replace = '''loop_
_atom_site.group_PDB
_atom_site.id'''


In [None]:
#@title Generate Arpeggio contacts (.parquet)
if not uploaded_pdb:
    print("\nNo pdb uploaded.")
else:
    pdb_filename = list(uploaded_pdb.keys())[0]
    print(f"\nProcessing file: {pdb_filename}")

    selections = ["/B//"]
    interacting_distance = 5.0
    ph = 7.4
    vdw_clash_allowance = 0.1
    include_sequence_adjacent = False

    try:
        pdb_model = atomium.open(pdb_filename)
        pdb_model.model.save('temp.cif')

        with open('temp.cif', 'r') as f:
            cif_text = f.read()
            modified_cif_text = cif_text.replace(to_replace, section_to_add)

        with open('temp_modified.cif', 'w') as f:
            f.write(modified_cif_text)

        i_complex = InteractionComplex(
            'temp_modified.cif', vdw_clash_allowance, interacting_distance, ph
        )
        i_complex.structure_checks()
        i_complex.initialize()
        i_complex.run_arpeggio(
            selections, interacting_distance, vdw_clash_allowance, include_sequence_adjacent
        )

        contacts = pd.DataFrame(i_complex.get_contacts())
        inter_chain_contacts = contacts[contacts['interacting_entities'] == 'INTER'].copy()
        inter_chain_contacts['contact'] = inter_chain_contacts['contact'].apply(lambda x: tuple(x))
        final_contacts = inter_chain_contacts[inter_chain_contacts['contact'] != ('proximal',)].reset_index(drop=True)

        final_contacts['path'] = pdb_filename

        output_parquet_filename = f"{os.path.splitext(pdb_filename)[0]}_contacts.parquet"
        final_contacts.to_parquet(output_parquet_filename)
        print(f"\nSuccessfully created '{output_parquet_filename}'.")

        files.download(output_parquet_filename)

    except Exception as e:
        print(f"\nAn error occurred during processing: {e}")