In [2]:
## Read in a structure file - intended to be used for AlphaFold-Multimer output - and calculate structural features of the interface between two chains
# Created by: Joelle Strom
# Last updated: 16.08.2024

import os
from pymol import cmd
from get_raw_distances import get_raw_distances

In [3]:
def find_h_bonds():

    """ Use PyMol find polar contacts functionality to return integer number of H-bonds between chains within the interface """
    
    # Create two selections: one containing interface residues on chain 1 and the other containing residues on chain 2
    cmd.select(selection=f"br. {name} and chain {chains[0]} within 5A of {name} and chain {chains[1]}", name='sel1')
    cmd.select(selection=f"br. {name} and chain {chains[1]} within 5A of {name} and chain {chains[0]}", name="sel2")
    # Call PyMol distance function in 'find polar contacts' mode, restricting calculation to contacts between the two selections
    cmd.distance("h_bonds", "sel1", "sel2", mode="2")
    dists = get_raw_distances("h_bonds")

    return len(dists) # This is the number of H-bonds found by the distance function


In [4]:
def find_salt_bridges():

    """ Use PyMol distance function and selection of charged functional groups to return integer number of salt bridges within the interface"""

    # Create two selections: one for negatively charged residues, the other for positively charged residues
    cmd.select(selection="(resn ASP+Glu and name OD*+OE*)", name="negative")
    cmd.select(selection="(resn Lys and name NZ) or (resn arg and name NE+NH*)", name="positive")
    # Call PyMol distance function (in normal mode this time) twice
    # First call finds salt bridges between positive residues on chain 1 and negative residues on chain 2
    # Second call finds salt bridges between negative residues on chain 2 and positive residues on chain 1
    cmd.distance("saltbridge1", "positive and chain %s" % chains[0], "negative and chain %s" % chains[1], cutoff="4", mode="0")
    cmd.distance("saltbridge2", "negative and chain %s" % chains[0], "positive and chain %s" % chains[1], cutoff="4", mode="0")
    dists = get_raw_distances("saltbridge1 saltbridge2")

    return len(dists) # This is the total number of salt bridges found by the two distance functions

In [5]:
def calculate_buried_area():
    
    """ Use PyMol Get_area function to find the buried surface area of the interface"""

    # Create two selections, one for the entirety of chain 1, the other for the entirety of chain 2
    cmd.select(selection=f"{name} and chain {chains[0]}", name="chain1")
    cmd.select(selection=f"{name} and chain {chains[1]}", name="chain2")
    cmd.set('dot_solvent', 1)
    cmd.set('dot_density', 3)
    # Find SASA of chain 1, chain 2, and the entire multimer
    area_ch1 = cmd.get_area(selection="chain1")
    area_ch2 = cmd.get_area(selection="chain2")
    area_all = cmd.get_area()

    return round((area_ch1 + area_ch2 - area_all), 3) # This is the calculated buried surface area based on the three values found above

In [6]:
### EXAMPLE TO ITERATE OVER MULTIPLE STRUCTURE FILES, ASSUMING A CERTAIN FOLDER STRUCTURE ###
### Assumption is that there is one parent folder, which contains subfolders for each unique value of 'prediction_name' in the data table, each of which subfolder contains the 5 AlphaFold predictions for that prediction name ###

### CHANGE FILEPATH ###
datapath = "<filepath to parent folder for all structures>"

# Iterate over subfolders
for folder in os.listdir(datapath):
    if os.path.isdir(os.path.join(filepath,folder)):
        namepath = os.path.join(filepath,folder)
        # Iterate over structure files in subfolders
        for file in os.listdir(namepath):
            filepath = os.path.join(namepath,file)
            name = file.split('.')[0]

            # Load structure file using PyMol API and get chain names for use in above functions
            cmd.load(filepath, name)
            chains = cmd.get_chains(name)
            # Call above functions
            hbonds = find_h_bonds()
            saltBridges = find_salt_bridges()
            buriedSASA = calculate_buried_area()
            # Reinitialize the PyMol session - important to add this step, otherwise structures will accumulate in a single session as the loop iterates
            cmd.reinitialize()

            # Example output of all 3 functions
            print('Number of H-bonds in interface: ', hbonds, '\nNumber of salt bridges in interface: ', saltBridges, '\nAmount of buried surface area: ', buriedSASA)
            ### THIS LAST LINE SHOULD BE CHANGED TO SOMETHING THAT WILL SAVE VALUES TO APPEND TO DATA TABLE ###

Number of H-bonds in interface:  10 
Number of salt bridges in interface:  0 
Amount of buried surface area:  0.206
