# Adding disulfides to the measurements

In [2]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes._axes import Axes
from matplotlib.figure import Figure
from pathlib import Path
from sklearn.metrics import roc_curve, roc_auc_score
import re
import tempfile
import shutil
import os
import subprocess
import sys
stdout, stderr = sys.stdout, sys.stderr
from typing import Literal

import pymol
from Bio.SeqUtils import seq1
from Bio.PDB import PDBParser
from Bio.PDB.Structure import Structure as BioPy_PDBStructure
from Bio.PDB.Model import Model as BioPy_PDBModel
from Bio.PDB.Chain import Chain
from Bio.PDB.PDBExceptions import PDBConstructionException
parser = PDBParser(QUIET=True)

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

In [3]:
libpath = Path("../src").resolve()
print(libpath)
sys.path.insert(0, str(libpath))
import measure_PPI

D:\Eigene Datein\Programmieren\Git\abrilka\bachelorthesis\src


In [5]:
# Settings

# Which AF output should be parsed
af_mode: Literal["AF2", "AF3"] = "AF2"

# Path to resource folder with the structures and metadata tables
path_resources = Path(r"D:\Eigene Datein\dev\Uni\JGU Bio Bachelorthesis\Daten\resources")
# Path to the Luck Drive folder (used for ipSAE metric to get the json file)
path_AF_luck_drive = Path(r"L:\imb-luckgr2\projects\AlphaFold")
if af_mode == "AF3":
    path_AF_luck_drive = path_AF_luck_drive / "AlphaFold3"

# Paths to the local folders
path_AF = path_resources / af_mode
path_solved = path_resources / "solved"

In [17]:
structure_biopy, atomarray_biotite = measure_PPI.OpenStructure(path_AF / "DDI" / "known_DDI" / "PF00009_PF01873_2D74_A_resi12_resi200.B_resi21_resi133" / "ranked_0.pdb", "PF00009_PF01873_2D74_A_resi12_resi200.B_resi21_resi133")

In [None]:
cutoff = 4
chains = [c for c in structure_biopy.get_chains()]
assert len(chains) == 2

chain1 = structure_biopy[0][chains[0].id]
chain2 = structure_biopy[0][chains[1].id]

saltBridges_ac = {"ASP":"a", "GLU":"a", "ARG":"b", "LYS":"b", "HIS": "b"} # a: Acidic, b: Basic
saltBridges_atoms = ['OD1', 'OD2', # Aspartate
                     'OE1', 'OE2', # Glutamate
                     'NH1', 'NH2', 'NE', # Arginin 
                     'NZ', # Lysin
                     'ND1', 'NE1', 'AE1', 'AE2' # Histidin
                     ]

salt_bridges = 0

for res1 in chain1:
    if res1.resname not in saltBridges_ac.keys():
        continue
    for res2 in chain2:
        if res2.resname not in saltBridges_ac.keys():
            continue
        if saltBridges_ac[res1.resname] == saltBridges_ac[res2.resname]:
            continue

        for atom1 in [a for a in res1 if a.id in saltBridges_atoms]:
            for atom2 in [a for a in res2 if a.id in saltBridges_atoms]:
                distance = atom1 - atom2
                if distance <= cutoff:
                    salt_bridges += 1
                    break
            else:
                continue
            break

HIS GLU
HIS GLU
HIS GLU
HIS GLU
HIS ASP
HIS ASP
HIS ASP
HIS GLU
HIS GLU
HIS GLU
HIS ASP
HIS ASP
HIS GLU
ASP LYS
ASP HIS
ASP HIS
ASP LYS
ASP ARG
ASP LYS
ASP LYS
ASP ARG
ASP HIS
ASP LYS
ASP ARG
ASP ARG
ASP ARG
ASP ARG
ASP LYS
ASP LYS
ASP LYS
ASP LYS
ASP LYS
ASP LYS
ASP ARG
ASP ARG
ASP HIS
ASP LYS
HIS GLU
HIS GLU
HIS GLU
HIS GLU
HIS ASP
HIS ASP
HIS ASP
HIS GLU
HIS GLU
HIS GLU
HIS ASP
HIS ASP
HIS GLU
LYS GLU
LYS GLU
LYS GLU
LYS GLU
LYS ASP
LYS ASP
LYS ASP
LYS GLU
LYS GLU
LYS GLU
LYS ASP
LYS ASP
LYS GLU
LYS GLU
LYS GLU
LYS GLU
LYS GLU
LYS ASP
LYS ASP
LYS ASP
LYS GLU
LYS GLU
LYS GLU
LYS ASP
LYS ASP
LYS GLU
ASP LYS
ASP HIS
ASP HIS
ASP LYS
ASP ARG
ASP LYS
ASP LYS
ASP ARG
ASP HIS
ASP LYS
ASP ARG
ASP ARG
ASP ARG
ASP ARG
ASP LYS
ASP LYS
ASP LYS
ASP LYS
ASP LYS
ASP LYS
ASP ARG
ASP ARG
ASP HIS
ASP LYS
HIS GLU
HIS GLU
HIS GLU
HIS GLU
HIS ASP
HIS ASP
HIS ASP
HIS GLU
HIS GLU
HIS GLU
HIS ASP
HIS ASP
HIS GLU
GLU LYS
GLU HIS
GLU HIS
GLU LYS
GLU ARG
GLU LYS
GLU LYS
GLU ARG
GLU HIS
GLU LYS
GLU ARG
GLU ARG


In [69]:
for res1 in [r for r in chain1 if r.resname == "HIS"]:
    for atom1 in res1:
        print (atom1.id)
    break

N
H
CA
HA
C
CB
HB2
HB3
O
CG
CD2
HD2
ND1
HD1
CE1
HE1
NE2


In [56]:
chains = list(set(measure_PPI.struc.get_chains(atomarray_biotite)))
assert len(chains) == 2

chain1_mask = atomarray_biotite.chain_id == chains[0]
chain2_mask = atomarray_biotite.chain_id == chains[1]

bond_list = measure_PPI.struc.bonds.connect_via_distances(atomarray_biotite)
atomarray_biotite.bonds = bond_list

triplets = measure_PPI.struc.hbond(atomarray_biotite, selection1=chain1_mask, selection2=chain2_mask)

bond_list.get_all_bonds()

(array([[   1,    2,    3,    4],
        [   0,   -1,   -1,   -1],
        [   0,   -1,   -1,   -1],
        ...,
        [4790,   -1,   -1,   -1],
        [4789,   -1,   -1,   -1],
        [4789,   -1,   -1,   -1]]),
 array([[ 0,  0,  0,  0],
        [ 0, -1, -1, -1],
        [ 0, -1, -1, -1],
        ...,
        [ 0, -1, -1, -1],
        [ 0, -1, -1, -1],
        [ 0, -1, -1, -1]], dtype=int8))

In [60]:
atomarray_biotite

array([
	Atom(np.array([  6.367, -15.089,   6.702], dtype=float32), chain_id="A", res_id=1, ins_code="", res_name="VAL", hetero=False, atom_name="N", element="N"),
	Atom(np.array([  5.861, -14.311,   7.102], dtype=float32), chain_id="A", res_id=1, ins_code="", res_name="VAL", hetero=False, atom_name="H", element="H"),
	Atom(np.array([  6.082, -15.954,   7.138], dtype=float32), chain_id="A", res_id=1, ins_code="", res_name="VAL", hetero=False, atom_name="H2", element="H"),
	Atom(np.array([  7.36 , -14.957,   6.833], dtype=float32), chain_id="A", res_id=1, ins_code="", res_name="VAL", hetero=False, atom_name="H3", element="H"),
	Atom(np.array([  6.083, -15.165,   5.261], dtype=float32), chain_id="A", res_id=1, ins_code="", res_name="VAL", hetero=False, atom_name="CA", element="C"),
	Atom(np.array([  6.67 , -15.978,   4.834], dtype=float32), chain_id="A", res_id=1, ins_code="", res_name="VAL", hetero=False, atom_name="HA", element="H"),
	Atom(np.array([  6.506, -13.864,   4.612], dtype=fl