In [1]:
import pymol
from pymol import cmd, stored
import __main__
import pandas as pd
import sys

In [2]:
def sasa_PDB(PDB_path, target_chain):
    # solvent accessible area mode
    cmd.select('all')
    cmd.delete('all')
    oldDS = cmd.get("dot_solvent")
    cmd.set("dot_solvent", 1)
    cmd.load(PDB_path, 'test')
    cmd.remove('solvent')
    cmd.remove('ino.')
    cmd.remove('org.')
    chains = cmd.get_chains('test')
    atoms = cmd.get_model('c. ' + target_chain)
    resi_id = []
    for at in atoms.atom:
        #print(at.resi)
        if at.resi.isnumeric():
            resi_id.append(int(at.resi))
    resi_id_final = list(dict.fromkeys(resi_id))
    # get sasa of target chain in complex
    area_complex = []
    for i in range(len(resi_id_final)):
        area = cmd.get_area('test & c. ' + target_chain + ' & resi %s' % resi_id_final[i], load_b = 1)
        area_complex.append(area)
    cmd.extract('chA', 'test & c. ' + target_chain)
    area_single_chain = []
    for i in range(len(resi_id_final)):
        area = cmd.get_area('chA & c. ' + target_chain + ' & resi %s' % resi_id_final[i], load_b = 1)
        #print(area)
        area_single_chain.append(area)
    hot_spot = []
    for i in range(len(resi_id_final)):
        if area_single_chain[i] != 0:
            ratio = (area_single_chain[i] - area_complex[i])/area_single_chain[i]
            sasa = area_single_chain[i] - area_complex[i]
            #print(ratio, resi_id_final[i])
            if ratio > 0.4 or sasa > 50:
                hot_spot.append([resi_id_final[i], ratio, sasa])
    cmd.select('all')
    cmd.delete('all')
    cmd.set("dot_solvent", oldDS)
    return hot_spot, area_single_chain

In [3]:
def sasa_selected_residues(PDB_path, target_chain, start_res, end_res):
    start_res = int(start_res)
    end_res = int(end_res)
    # solvent accessible area mode
    cmd.select('all')
    cmd.delete('all')
    oldDS = cmd.get("dot_solvent")
    cmd.set("dot_solvent", 1)
    cmd.load(PDB_path, 'test')
    cmd.remove('solvent')
    cmd.remove('ino.')
    cmd.remove('org.')
    chains = cmd.get_chains('test')
    atoms = cmd.get_model('c. ' + target_chain)
    resi_id = []
    for at in atoms.atom:
        #print(at.resi)
        if at.resi.isnumeric():
            resi_id.append(int(at.resi))
    resi_id_final = list(dict.fromkeys(resi_id))
    # get sasa of target chain in complex
    area_complex_all = 0
    for i in range(len(resi_id_final)):
        area = cmd.get_area('test & c. ' + target_chain + ' & resi %s' % resi_id_final[i], load_b = 1)
        area_complex_all += area
    area_complex_selected_residues = 0
    for i in range(start_res, end_res + 1):
        area = cmd.get_area('test & c. ' + target_chain + ' & resi %s' % i, load_b = 1)
        area_complex_selected_residues += area
    area_complex = []
    for i in range(start_res, end_res+1):
        area = cmd.get_area('test & c. ' + target_chain + ' & resi %s' % i, load_b = 1)
        area_complex.append(area)
    cmd.extract('chA', 'test & c. ' + target_chain)
    area_single_chain_all = 0
    for i in range(len(resi_id_final)):
        area = cmd.get_area('chA & c. ' + target_chain + ' & resi %s' % resi_id_final[i], load_b = 1)
        #print(area)
        area_single_chain_all += area
    area_selected_residues = 0
    for i in range(start_res, end_res+1):
        area = cmd.get_area('chA & c. ' + target_chain + ' & resi %s' % i, load_b = 1)
        #print(area)
        area_selected_residues += area
    area_single_chain = []
    for i in range(start_res, end_res+1):
        area = cmd.get_area('chA & c. ' + target_chain + ' & resi %s' % i, load_b = 1)
        #print(area)
        area_single_chain.append(area)
    hot_spot = []
    for i in range(len(area_complex)):
        if area_single_chain[i] != 0:
            ratio = (area_single_chain[i] - area_complex[i])/area_single_chain[i]
            sasa = area_single_chain[i] - area_complex[i]
            #print(ratio, resi_id_final[i])
            hot_spot.append([start_res + i, ratio, sasa])
    difference_all = area_single_chain_all - area_complex_all
    difference_selected_residues = area_selected_residues - area_complex_selected_residues
    statistics = [difference_selected_residues, difference_selected_residues/difference_all]
    cmd.select('all')
    cmd.delete('all')
    cmd.set("dot_solvent", oldDS)
    return hot_spot, statistics

In [None]:
# input format
hotspots,_ = sasa_selected_residues('pdb name + .pdb','chain name','start residue','end residue')
#input pdb path + target chain 
hotspots = pd.DataFrame(hotspots)
#save as pandas format
hotspots.to_csv('name of output +.csv')
#save as csv file with default name

In [4]:
#example
hotspots,_ = sasa_selected_residues('CDK2-cyclinE.pdb','A',42,58)
#input pdb path + target chain 
hotspots = pd.DataFrame(hotspots)
#save as pandas format
hotspots.to_csv('CDK2-cycE.csv')
#save as csv file with default name

 PyMOL not running, entering library mode (experimental)


In [5]:
#example
hotspots,_ = sasa_selected_residues('8bya.pdb','C',25,93)
#input pdb path + target chain 
hotspots = pd.DataFrame(hotspots)
#save as pandas format
hotspots.to_csv('p27-.csv')
#save as csv file with default name