# Further improvments of the measuring script
Created 2025-01-14

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import sys
import numpy as np
import time

In [2]:
libpath = Path("../andreas lib").resolve()
print(libpath)
sys.path.insert(0, str(libpath))
import measure_PPI

D:\Eigene Datein\Programmieren\Git\abrilka\bachelorthesis\andreas lib


In [3]:
measure_PPI.logger.setLevel(measure_PPI.logging.DEBUG)

In [4]:
structure_basePath = Path("../ressources/ISS AF_DMI_structures").resolve()
structure_folders = [structure_basePath / p for p in ['AF_DMI_structures1', 'AF_DMI_structures2', 'AF_DMI_structures3']]
solved_basePath = Path("../ressources/ISS DMI_solved_structures").resolve()
solvedHydrogen_basePath = Path("../ressources/ISS DMI_solved_structures hydrogens").resolve()

In [14]:
#Loading sample structure
sampleStructure_name = "LIG_NRP_CendR_1_2ORZ"
sampleStructure_path = structure_folders[1] / sampleStructure_name / "ranked_0.pdb"
#sampleStructure_name = "DEG_MDM2_SWIB_1_1YCR"
#sampleStructure_path = structure_folders[0] / sampleStructure_name / "ranked_1.pdb"
#sampleStructure_name = "DEG_Kelch_Keap1_1_2FLU"
#sampleStructure_path = structure_folders[0] / sampleStructure_name / "ranked_4.pdb"
pdb_id = sampleStructure_name.split("_")[-1]

structure_biopy, atomarray_biotite = measure_PPI.OpenStructure(sampleStructure_path, sampleStructure_name)
solved_biopy, solved_biotite = measure_PPI.OpenStructure(solvedHydrogen_basePath / f"{pdb_id}_min_DMI.pdb", sampleStructure_name)
print(measure_PPI.calculate_min_distance(atomarray_biotite, max_cutoff = 200))
print("AF", measure_PPI.EvaluateStructure(sampleStructure_path, sampleStructure_name))
print("SOLVED", measure_PPI.EvaluateStructure(solvedHydrogen_basePath / f"{pdb_id}_min_DMI.pdb", sampleStructure_name))

[2025-01-15 14:55:59,903 | measure_PPI | DEBUG] Runtime reading structure LIG_NRP_CendR_1_2ORZ (file ranked_0.pdb): 22.6ms
[2025-01-15 14:55:59,927 | measure_PPI | DEBUG] Runtime reading structure LIG_NRP_CendR_1_2ORZ (file 2ORZ_min_DMI.pdb): 23.5ms
HELLO
[2025-01-15 14:56:00,023 | measure_PPI | DEBUG] Runtime calculate_min_distance: 94.7ms (0.4ms generating chains, 94.3ms calculating distance)
6.297
[2025-01-15 14:56:00,045 | measure_PPI | DEBUG] Runtime reading structure LIG_NRP_CendR_1_2ORZ (file ranked_0.pdb): 20.8ms
[2025-01-15 14:56:00,110 | measure_PPI | DEBUG] Sasa values: Chain 1 = 7380.857, Chain 2 = 734.776, Total = 7417.026
[2025-01-15 14:56:00,111 | measure_PPI | DEBUG] Runtime calculate_buried_area: 65.3ms (1.9ms model buiilding, 12.5ms loading, 50.9ms sasa calc)
[2025-01-15 14:56:00,126 | measure_PPI | DEBUG] Runtime calculate_hbonds: 14.6ms (0.3ms generating chains, 11.5ms bond list, 2.8ms hbonds)
HELLO
[2025-01-15 14:56:00,155 | measure_PPI | DEBUG] Runtime calculate_m

### New functions

In [8]:
import biotite.structure as struc
import biotite.structure.io.pdb as bt_pdb
from Bio.PDB import PDBParser
from Bio.PDB.Structure import Structure as BioPy_PDBStructure
from Bio.PDB.Model import Model as BioPy_PDBModel
from Bio.PDB.PDBExceptions import PDBConstructionException

In [7]:
import warnings
with warnings.catch_warnings():
    print("catch")
    warnings.filterwarnings('ignore', r'All-NaN (slice|axis) encountered')

catch


In [23]:
from sklearn.metrics import pairwise_distances
from Bio.PDB.Atom import Atom as BioPy_Atom

class ProteinStructureWarning(Exception):
    def __init__(self, message):            
        super().__init__(message)

def get_distance_matrix(structure_biopy:BioPy_PDBStructure) -> tuple[np.ndarray, list[BioPy_Atom], list[BioPy_Atom]]:
    chains = [c for c in structure_biopy.get_chains()]
    if not len(chains) == 2: raise ProteinStructureWarning(f"The protein needs to have 2 chains but it has {len(chains)}")
    chain1 = structure_biopy[0][chains[0].id]
    chain2 = structure_biopy[0][chains[1].id]

    chain1_atoms = np.array([a for a in chain1.get_atoms()])
    chain2_atoms = np.array([a for a in chain2.get_atoms()])
    chain1_coords = [a.coord for a in chain1_atoms]
    chain2_coords = [a.coord for a in chain2_atoms]

    distance_matrix = pairwise_distances(chain1_coords,chain2_coords)

    return (distance_matrix, chain1_atoms, chain2_atoms)

def get_interface(distance_matrix: np.ndarray, chain1_atoms: list[BioPy_Atom], chain2_atoms: list[BioPy_Atom], cutoff=5.0):
    pair_dist = np.argwhere(distance_matrix <= cutoff) # List of matrix indices where distance is below cutoff
    intf1_atoms = set(chain1_atoms[pair_dist[:, 0]]) # Set of the chain1 atoms with an atom from chain2 closer than cutoff distance
    intf2_atoms = set(chain2_atoms[pair_dist[:, 1]])

    # List of backbone atoms in the interface
    intf1_backbone = [a for a in intf1_atoms if a.name == "CA" and a in intf1_atoms]
    intf2_backbone = [a for a in intf2_atoms if a.name == "CA" and a in intf2_atoms]

    intf1_residues = [a.parent for a in intf1_backbone]
    intf2_residues = [a.parent for a in intf2_backbone]

    # A list for each chain combined to a tuple with the indices of the interface residues. Example : ([23, 445, 470], [26]) 
    backbone_indices_tuple = ([i for i,a in enumerate(chain1_atoms) if a in intf1_backbone], [i for i,a in enumerate(chain2_atoms) if a in intf2_backbone]) 

    # The distance matrix sliced to only the interface backbone atoms. So shape is (len(intf1_backbone), len(intf2_backbone))
    local_dist_matrix = distance_matrix[backbone_indices_tuple[0], :][:, backbone_indices_tuple[1]]

    min_distance = round(np.min(local_dist_matrix), 3)

    return (intf1_residues, intf2_residues, min_distance)


structure_name = sampleStructure_name

ti = time.perf_counter()
try:
    distance_matrix, chain1_atoms, chain2_atoms = get_distance_matrix(structure_biopy)
    print(type(chain1_atoms[0]))
    print(chain1_atoms)
    t1 = time.perf_counter()
    intf1_residues, intf2_residues, min_distance = get_interface(distance_matrix, chain1_atoms, chain2_atoms)
    print(intf1_residues, intf2_residues, min_distance)

except ProteinStructureWarning as ex:
    measure_PPI.logger.warning(f"The {structure_name} throw the following Structure Warning: {str(ex)}")

tf = time.perf_counter()
print("Distance matrix", round(1000*(t1-ti), 3), "ms")
print("Interace", round(1000*(tf-t1), 3), "ms")
print("Total", round(1000*(tf-ti), 3), "ms")

<class 'Bio.PDB.Atom.Atom'>
[<Atom N> <Atom H> <Atom H2> ... <Atom C> <Atom O> <Atom OXT>]
[<Residue GLU het=  resseq=31 icode= >, <Residue SER het=  resseq=58 icode= >, <Residue THR het=  resseq=28 icode= >, <Residue ASP het=  resseq=32 icode= >, <Residue TYR het=  resseq=9 icode= >, <Residue PRO het=  resseq=29 icode= >, <Residue GLY het=  resseq=126 icode= >, <Residue ILE het=  resseq=127 icode= >, <Residue GLY het=  resseq=30 icode= >] [<Residue LYS het=  resseq=2 icode= >, <Residue ARG het=  resseq=4 icode= >, <Residue PRO het=  resseq=3 icode= >] 6.297
Distance matrix 8.291 ms
Interace 7.214 ms
Total 15.505 ms


In [None]:


#intf1_res = set([a.get_parent() for a in intf1_atoms])
#intf2_res = set([a.get_parent() for a in intf2_atoms])
t1 = time.perf_counter()
print(min_distance)
print(1000*(t1-t0), "ms")

In [9]:
#structure_biopy, atomarray_biotite = measure_PPI.OpenStructure(sampleStructure_path, sampleStructure_name)
t0 = time.perf_counter()
chains = [c for c in structure_biopy.get_chains()]
chain1 = structure_biopy[0][chains[0].id]
chain2 = structure_biopy[0][chains[1].id]
print(time.perf_counter() - t0)

9.850000060396269e-05


In [10]:
from sklearn.metrics import pairwise_distances

ti = time.perf_counter()

chain1_atoms = np.array([a for a in structure_biopy[0][chains[0].id].get_atoms()])
chain2_atoms = np.array([a for a in structure_biopy[0][chains[1].id].get_atoms()])
chain1_coords = [a.coord for a in chain1_atoms]
chain2_coords = [a.coord for a in chain2_atoms]

t1 = time.perf_counter()

distance_matrix = pairwise_distances(chain1_coords,chain2_coords)

tf = time.perf_counter()

print("Array", t1-ti)
print("Matrix", tf-t1)
print("Total", tf-ti)
print(np.min(distance_matrix))

Array 0.0022367999990819953
Matrix 0.006452300000091782
Total 0.008689099999173777
1.7236778


In [91]:
dist

array([[25.854052 , 26.387917 , 26.257805 , ..., 17.6606   , 19.13263  ,
        18.084257 ],
       [26.691383 , 27.2195   , 27.1128   , ..., 18.656075 , 20.117607 ,
        19.086708 ],
       [26.154837 , 26.698648 , 26.525972 , ..., 17.513313 , 19.036243 ,
        17.806412 ],
       ...,
       [27.09391  , 27.591217 , 27.298271 , ..., 15.971724 , 17.622305 ,
        16.19397  ],
       [26.66787  , 27.220314 , 26.844728 , ..., 15.6247635, 17.332933 ,
        15.413696 ],
       [31.203632 , 31.786158 , 31.367935 , ..., 20.097376 , 21.837587 ,
        19.434824 ]], dtype=float32)

In [44]:
type(distance_matrix)

numpy.ndarray

In [11]:
chains = struc.get_chains(atomarray_biotite)
chain1_bt = atomarray_biotite[atomarray_biotite.chain_id == chains[0]]
chain2_bt = atomarray_biotite[atomarray_biotite.chain_id == chains[1]]
print(a1 := chain1_bt[883])
print(a2 := chain2_bt[54])
print(struc.distance(a1, a2))

    A      58  SER CA     C        -7.922   14.767    5.514
    B       4  ARG CA     C        -9.628   11.590   10.676
6.2968254


In [12]:
c1, c2 = chain1_atoms[883], chain2_atoms[54]
print(c1.coord)
print(c2.coord)

[-7.922 14.767  5.514]
[-9.628 11.59  10.676]


In [60]:
t0 = time.perf_counter()
pair_dist = np.argwhere(distance_matrix <= 5)
intf1_atoms = set(chain1_atoms[pair_dist[:, 0]])
intf2_atoms = set(chain2_atoms[pair_dist[:, 1]])

intf1_backbone = [a for a in intf1_atoms if a.name == "CA" and a in intf1_atoms]
intf2_backbone = [a for a in intf2_atoms if a.name == "CA" and a in intf2_atoms]

backbone_indices_tuple = ([i for i,a in enumerate(chain1_atoms) if a in intf1_backbone], [i for i,a in enumerate(chain2_atoms) if a in intf2_backbone])
local_dist_matrix = distance_matrix[backbone_indices_tuple[0], :][:, backbone_indices_tuple[1]]
print(backbone_indices_tuple)
print(local_dist_matrix)
min_distance = np.min(local_dist_matrix)

#intf1_res = set([a.get_parent() for a in intf1_atoms])
#intf2_res = set([a.get_parent() for a in intf2_atoms])
t1 = time.perf_counter()
print(min_distance)
print(1000*(t1-t0), "ms")

([115, 424, 437, 452, 459, 474, 883, 2007, 2014], [18, 39, 54])
[[ 8.723246  10.721379   8.79001  ]
 [10.39036   12.158244   9.11787  ]
 [10.487941  13.229823  11.049983 ]
 [ 9.200148  12.363564  11.144696 ]
 [ 8.541397  12.23767   11.581208 ]
 [ 8.958161  12.394463  11.6013365]
 [11.42874    9.517461   6.2968254]
 [ 8.255908  10.14322    8.704535 ]
 [10.452786  12.562158  10.266275 ]]
6.2968254
8.070399999269284 ms


In [130]:
t0 = time.perf_counter()
intf1_res = chain1_atoms[list(intf1_atoms)]
t1 = time.perf_counter()

0.0001063999998223153


In [163]:
chain1_atoms_res = [a.get_parent() for a in chain1_atoms]
chain1_atoms_res

[<Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue PRO het=  resseq=1 icode= >,
 <Residue LYS het=  resseq=2 icode= >,
 <Residue LYS het=  resseq=2 icode= >,
 <Residue LYS het=  resseq=2 icode= >,
 <Residue LYS het=  resseq=2 icode= >,
 <Residue LYS het=  resseq=2 icode= >,
 <Residue LYS het=  resseq=2 icode= >,
 <Residue LYS het=  resseq=2 icode= >,
 <Residue LYS het=  resseq=2 icode= >,
 <Residue LYS het=  resseq=2 icode= >,
 <Residue LYS het=  resse

In [143]:
from sklearn.metrics import pairwise_distances

ti = time.perf_counter()

chain1_res = [r for r in structure_biopy[0][chains[0].id]]
chain2_res = [r for r in structure_biopy[0][chains[1].id]]
for r1 in chain1_res:
    for r2 in chain2_res:
        
        r1_coords = [a.coord for a in r1]
        r2_coords = [a.coord for a in r2]
        ca1_index = r1_coords

        ca1 = r1[r1]
        pairwise_distances(chain1_coords,chain2_coords)
print(chain1_atoms)


chain1_atoms = np.array([a for a in structure_biopy[0][chains[0].id].get_atoms()])
chain2_atoms = np.array([a for a in structure_biopy[0][chains[1].id].get_atoms()])
chain1_coords = [a.coord for a in chain1_atoms]
chain2_coords = [a.coord for a in chain2_atoms]

t1 = time.perf_counter()

dist = pairwise_distances(chain1_coords,chain2_coords)

tf = time.perf_counter()

print("Array", t1-ti)
print("Matrix", tf-t1)
print("Total", tf-ti)

[<Residue PRO het=  resseq=1 icode= >, <Residue LYS het=  resseq=2 icode= >, <Residue PRO het=  resseq=3 icode= >, <Residue LEU het=  resseq=4 icode= >, <Residue LEU het=  resseq=5 icode= >, <Residue LEU het=  resseq=6 icode= >, <Residue LYS het=  resseq=7 icode= >, <Residue LEU het=  resseq=8 icode= >, <Residue LEU het=  resseq=9 icode= >, <Residue LYS het=  resseq=10 icode= >, <Residue SER het=  resseq=11 icode= >, <Residue VAL het=  resseq=12 icode= >, <Residue GLY het=  resseq=13 icode= >, <Residue ALA het=  resseq=14 icode= >, <Residue GLN het=  resseq=15 icode= >, <Residue LYS het=  resseq=16 icode= >, <Residue ASP het=  resseq=17 icode= >, <Residue THR het=  resseq=18 icode= >, <Residue TYR het=  resseq=19 icode= >, <Residue THR het=  resseq=20 icode= >, <Residue MET het=  resseq=21 icode= >, <Residue LYS het=  resseq=22 icode= >, <Residue GLU het=  resseq=23 icode= >, <Residue VAL het=  resseq=24 icode= >, <Residue LEU het=  resseq=25 icode= >, <Residue PHE het=  resseq=26 icod

In [162]:
r1 = chain1_res[0]
a1 = np.array([a for a in r1])
r1["CA"]

<Atom CA>

In [207]:
chain1_res = [r for r in structure_biopy[0][chains[0].id]]
chain2_res = [r for r in structure_biopy[0][chains[1].id]]

residue_dist_matrix = np.full(shape=(len(chain1_res), len(chain2_res)), fill_value=np.nan)

intf1_res = set()
intf2_res = set()

for r1 in chain1_res:
    for r2 in chain2_res:
        chain1_indices = np.array([(i, a) for i, a in enumerate(chain1_atoms) if a.parent==r1])
        chain2_indices = np.array([(i, a) for i, a in enumerate(chain2_atoms) if a.parent==r2])
        chain1_ca = [i for i,a in chain1_indices if a.name == "CA"]
        chain2_ca = [i for i,a in chain2_indices if a.name == "CA"]
        local_dist = dist[chain1_indices[:,0].astype("int"),:][:,chain2_indices[:,0].astype("int")]
        if np.min(local_dist) > 5:
            continue
        intf1_res.add(r1)
        intf2_res.add(r2)
        
print(intf1_res)

{<Residue TYR het=  resseq=38 icode= >, <Residue PHE het=  resseq=57 icode= >, <Residue PHE het=  resseq=26 icode= >, <Residue GLY het=  resseq=29 icode= >, <Residue HIS het=  resseq=44 icode= >, <Residue GLU het=  resseq=66 icode= >, <Residue LYS het=  resseq=65 icode= >, <Residue TYR het=  resseq=71 icode= >, <Residue LEU het=  resseq=28 icode= >, <Residue GLN het=  resseq=43 icode= >, <Residue LEU het=  resseq=25 icode= >, <Residue PHE het=  resseq=62 icode= >, <Residue ILE het=  resseq=74 icode= >, <Residue VAL het=  resseq=46 icode= >, <Residue VAL het=  resseq=64 icode= >, <Residue GLN het=  resseq=30 icode= >, <Residue HIS het=  resseq=67 icode= >, <Residue VAL het=  resseq=24 icode= >, <Residue ILE het=  resseq=70 icode= >, <Residue ILE het=  resseq=45 icode= >, <Residue MET het=  resseq=33 icode= >, <Residue ILE het=  resseq=32 icode= >}


In [12]:
ti = time.perf_counter()
chains = struc.get_chains(atomarray_biotite)
assert len(chains) == 2

chain1 = atomarray_biotite[atomarray_biotite.chain_id == chains[0]]
chain2 = atomarray_biotite[atomarray_biotite.chain_id == chains[1]]

chain1_backbone = chain1[chain1.atom_name == "CA"]
chain2_backbone = chain2[chain2.atom_name == "CA"]

min_distance = float("inf")
t1 = time.perf_counter()
cutoff = 5.0
max_cutoff = 15

# max_cutoff is implemented to mimic the same behaviour as the ISS code which used pymol.

for ca1 in chain1_backbone:
    for ca2 in chain2_backbone:
        if (dist := struc.distance(ca1, ca2)) < cutoff:
            min_distance = min(min_distance, dist)
            continue
        elif dist <= max_cutoff and dist < min_distance: # If max_cutoff is set, check the individual atoms
            for a1 in chain1[chain1.res_id == ca1.res_id]:
                for a2 in chain2[chain2.res_id == ca2.res_id]:
                    if struc.distance(a1, a2) <= cutoff:
                        break
                else: # Runs after loop finished normally
                    continue
                break # This only runs if there is a break in the inner loop because of previous continue statement
            else:
                # Only calculate min_distance if there is the atom wise distance of the residues is below cutoff
                continue
            min_distance = min(min_distance, dist)

tf = time.perf_counter()
print(min_distance)
print( round(float(min_distance), 3))


6.2968254
6.297


In [13]:
measure_PPI.calculate_min_distance(atomarray_biotite)

HELLO
[2025-01-15 14:55:08,211 | measure_PPI | DEBUG] Runtime calculate_min_distance: 27.3ms (0.5ms generating chains, 26.7ms calculating distance)


6.297

### Old code

In [None]:
def get_chains(atomarray_biotite:struc.AtomArray) -> tuple[struc.AtomArray, struc.AtomArray]:
    chains = struc.get_chains(atomarray_biotite)
    if len(chains) != 2:
        raise ProteinStructureWarning("The protein does not have 2 chains")

    chain1 = atomarray_biotite[atomarray_biotite.chain_id == chains[0]]
    chain2 = atomarray_biotite[atomarray_biotite.chain_id == chains[1]]

    return (chain1, chain2)

def get_interface(distance_matrix: np.ndarray, chain1_backbone:struc.AtomArray, chain2_backbone:struc.AtomArray, cutoff=np.inf):
    # np.min (and np.nanmin also) will produce wrong results or warnings if NaNs are included. Therefore replace with cutoff value + 1
    # as this will be filtered out in the next step
    _distance_matrix = np.nan_to_num(distance_matrix, copy=True, nan=(cutoff+1))

    # Generate a array of chain 1 (called axis as it originates from a matrix) where the nth value 
    # depicts the distance to chain 2 and vice versa. NaN if their is no value
    chain1_intfres_ax = np.nanmin(_distance_matrix, axis=1)
    chain2_intfres_ax = np.nanmin(_distance_matrix, axis=0)

    # Get the CA atoms of the interface by searching for not NaN values in the chain_infres_ax array.
    chain1_intfres = chain1_backbone[np.argwhere(chain1_intfres_ax <= cutoff)]
    chain2_intfres = chain2_backbone[np.argwhere(chain2_intfres_ax <= cutoff)]

    return (chain1_intfres, chain2_intfres)
        


def get_distance_matrix(chain1: struc.AtomArray, chain2: struc.AtomArray, cutoff=5.0, boundary_cutoff=0.0) -> np.ndarray:
    """
        Finds interface residues and reports their distance defined as distance between the CA backbone atoms if at least
        some atoms of the residue pair have a distance below cutoff. Otherwise report NaN.

        For faster calculations, the implementation creates a boundary box of possible residues based on CA atoms beeing closer
        then boundary_cutoff distance. Therefore setting a higher boundary_cutoff may or may not find more interface residues
        but leading to higher computational cost.

        Returns a matrix on which the axis correspondends to the chains and the (i,k) entry is the distance between the
        ith residue of chain 1 and the kth residue of chain2 (or NaN if this distance is above cutoff)
    """
    chain1_backbone = chain1[chain1.atom_name == "CA"]
    chain2_backbone = chain2[chain2.atom_name == "CA"]

    if len(chain1_backbone) != struc.get_residue_count(chain1): 
        raise ProteinStructureWarning(f"The count of residues ({struc.get_residue_count(chain1)}) is not equal to the number of CA atoms in chain 1 ({len(chain1_backbone)})")
    if len(chain2_backbone) != struc.get_residue_count(chain2): 
        raise ProteinStructureWarning(f"The count of residues ({struc.get_residue_count(chain2)}) is not equal to the number of CA atoms in chain 2 ({len(chain2_backbone)})")

    # Code uses for else structure to break out of two loops
    # That works because the else clause is only run WITHOUT a break
    # Therefore the outer break will only be hit when NOT running the inner else clause

    matrix = np.full(shape=(len(chain1_backbone), len(chain2_backbone)), fill_value=np.nan, dtype=np.float32)

    for i1, ca1 in enumerate(chain1_backbone):
        for i2, ca2 in enumerate(chain2_backbone):
            if (dist := round(float(struc.distance(ca1, ca2)), 3)) < cutoff:
                matrix[i1, i2] = dist
            elif dist <= boundary_cutoff: # If boundary_cutoff is set, check the individual atoms
                for a1 in chain1[chain1.res_id == ca1.res_id]:
                    for a2 in chain2[chain2.res_id == ca2.res_id]:
                        if struc.distance(a1, a2) <= cutoff:
                            break
                    else: # Runs after loop finished normally
                        continue
                    break # This only runs if there is a break in the inner loop because of previous continue statement
                else:
                    # Only calculate min_distance if there is the atom wise distance of the residues is below cutoff
                    continue
                matrix[i1, i2] = dist
           
    
    return (matrix, chain1_backbone, chain2_backbone)

