## Speed improvents of Code II
Created 2025-01-05 by Andreas

In [1]:
import measure_PPI
import pathlib
import time
import pandas as pd
import numpy as np
import biotite.structure as struc

measure_PPI.logger.setLevel(measure_PPI.logging.DEBUG)

[2025-01-06 17:28:36,100 | measure_PPI | INFO] Loaded measure_PPI libary


Opening sample structure

In [2]:
structure_basePath = pathlib.Path("../ressources/ISS AF_DMI_structures").resolve()
structure_folders = [structure_basePath / p for p in ['AF_DMI_structures1', 'AF_DMI_structures2', 'AF_DMI_structures3']]
solved_basePath = pathlib.Path("../ressources/ISS DMI_solved_structures").resolve()
solvedHydrogen_basePath = pathlib.Path("../ressources/ISS DMI_solved_structures hydrogens").resolve()

In [3]:
#Loading sample structure
sampleStructure_name = "LIG_NRP_CendR_1_2ORZ"
sampleStructure_path = structure_folders[1] / sampleStructure_name / "ranked_0.pdb"
sampleStructure = measure_PPI.OpenStructure(sampleStructure_path)

[2025-01-06 17:28:38,734 | measure_PPI | DEBUG] Runtime reading structure  (file ranked_0.pdb): 53.9ms


In [4]:
pathObj = [(sampleStructure_path, sampleStructure_name)]
measure_PPI.Run(pathObj, num_threads=1)

[2025-01-06 17:28:40,675 | measure_PPI | INFO] Started Taskpool of 1 processes.
{'structure_name': 'LIG_NRP_CendR_1_2ORZ', 'file': 'ranked_0.pdb', 'hbonds': array([ 983,  985, 2208]), 'salt_bridges': 2, 'buried_area': np.float32(697.554), 'min_distance': np.float32(6.297), 'hydrophobic_interactions': 0}


Unnamed: 0,structure_name,file,hbonds,salt_bridges,buried_area,min_distance,hydrophobic_interactions
0,LIG_NRP_CendR_1_2ORZ,ranked_0.pdb,"[983, 985, 2208]",2,697.554016,6.297,0


In [None]:
# Test Evaluation
measure_PPI.EvaluateStructure(sampleStructure_path, sampleStructure_name)

In [None]:
dataset_measure2.calculate_buried_area(sampleStructure[1])

In [None]:
sampleStructure_name = "DOC_MAPK_RevD_3_3TEI"
sampleStructure_name = "LIG_NRP_CendR_1_2ORZ"
sampleStructure_path = structure_folders[1] / sampleStructure_name / "ranked_0.pdb"
sampleStructure = dataset_measure2.OpenStructure(sampleStructure_path)
print(type(sampleStructure[0]), type(sampleStructure[1]))

atom_array = sampleStructure[1][0]
chains = struc.get_chains(atom_array)
assert len(chains) == 2
chain1 = atom_array[atom_array.chain_id == chains[0]]
chain2 = atom_array[atom_array.chain_id == chains[1]]
print("Chain", chains[0],":",len(chain1))
print("Chain", chains[1],":",len(chain2))

print(sampleStructure[1])

In [None]:
import math
def calculate_buried_area(atomarray_biotite:struc.AtomArray, probe_radius:float=1.4):
    """
        Calculates the buried surface area using biotite which is defined as surface area of the two chains
        subtracted from the surface area of the complex.
    """
    logger = dataset_measure2.logger
    ti = time.perf_counter()
    chains = struc.get_chains(atomarray_biotite)
    assert len(chains) == 2

    chain1 = atomarray_biotite[atomarray_biotite.chain_id == chains[0]]
    chain2 = atomarray_biotite[atomarray_biotite.chain_id == chains[1]]
    t1 = time.perf_counter()

    sasa12 = np.sum([s for s in struc.sasa(atomarray_biotite, probe_radius=probe_radius) if math.isfinite(s)])
    sasa1 = np.sum([s for s in struc.sasa(chain1, probe_radius=probe_radius) if math.isfinite(s)])
    sasa2 = np.sum([s for s in struc.sasa(chain2, probe_radius=probe_radius) if math.isfinite(s)])
    print(sasa12, sasa1, sasa2)
    buried_area = (sasa1 + sasa2 - sasa12)
    tf = time.perf_counter()
    logger.debug(f"Runtime calculate_buried_area: {round((tf-ti)*1000, 1)}ms ({round((t1-ti)*1000, 1)}ms generating chains, {round((tf-ti)*1000, 1)}ms sasa)")
    return round(buried_area, 3)

calculate_buried_area(sampleStructure[1])

In [None]:
t0 = time.perf_counter()
import math
atomarray_biotite = sampleStructure[1]
chains = struc.get_chains(atomarray_biotite)
chain1 = atomarray_biotite[atomarray_biotite.chain_id == chains[0]]
chain2 = atomarray_biotite[atomarray_biotite.chain_id == chains[1]]
probe_radius = 1.4
sasa12 = sum([s for s in struc.sasa(atomarray_biotite, probe_radius=probe_radius) if math.isfinite(s)])
sasa1 = sum([s for s in struc.sasa(chain1, probe_radius=probe_radius) if math.isfinite(s)])
sasa2 = sum([s for s in struc.sasa(chain2, probe_radius=probe_radius) if math.isfinite(s)])
t1 = time.perf_counter()
print(t1-t0)

In [None]:
t0 = time.perf_counter()
struc.sasa(atomarray_biotite, probe_radius=probe_radius, vdw_radii="Single")
t1 = time.perf_counter()
print(t1-t0)

In [None]:
from rust_sasa_python import *
print(sampleStructure_path)
t0 = time.perf_counter()
sasas = calculate_sasa_at_protein_level(str(sampleStructure_path))
df = time.perf_counter() - t0
print(df)
print(sasas)

sasaR = calculate_sasa_at_atom_level(str(sampleStructure_path))
sasa = sum(sasaR)
print(sasa)

sasaR = calculate_sasa_at_residue_level(str(sampleStructure_path))
sasa = sum([x[1] for x in sasaR])
print(sasa)

In [None]:
from Bio.PDB import ShrakeRupley
sasa = ShrakeRupley()
chains = [c for c in sampleStructure[0].get_chains()]
ti = time.perf_counter()
sasa.compute(sampleStructure[0], level="A")
total_area = sum(atom.sasa for atom in sampleStructure[0].get_atoms())
t1 = time.perf_counter()

# Calculate buried area for each chain separately
chain1 = sampleStructure[0][0][chains[0].id]
chain2 = sampleStructure[0][0][chains[1].id]

sasa.compute(chain1, level="A")
area_ch1 = sum(atom.sasa for atom in chain1.get_atoms())

sasa.compute(chain2, level="A")
area_ch2 = sum(atom.sasa for atom in chain2.get_atoms())

print(area_ch1, area_ch2, total_area)
tf = time.perf_counter()
print(tf - ti, t1-ti)