# IFs by PLIP

In [None]:
# import libraries
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)
from rdkit import Chem
from rdkit import DataStructs
import sys
sys.path.insert(0, './')
from descriptors import calc_plip

In [None]:
# read protein file to string
protein_file = "../materials/2etr.pdb"
protein_mol = Chem.MolFromPDBFile(protein_file, removeHs=False)
Chem.Kekulize(protein_mol)
protein = Chem.MolToPDBBlock(protein_mol)
print(f"Num of atoms in protein: {protein_mol.GetNumAtoms()}")

In [None]:
# define complex-ligand dictionary
complex_ligand = {
'1S1C':'GNP',
'2ESM': 'M77',
'2ETK': 'HFS',
'2ETR': 'Y27',
'2V55': 'ANP',
'3D9V': 'H52',
'3NCZ': '3NC',
'3TV7': '07Q',
'3TWJ': '07R',
'3V8S': '0HD',
'4W7P': '3J7',
'4YVC': '4KH',
'4YVE': '4KK',
'5BML': '4TW',
'5HVU': '65R',
'5KKS': '6U1',
'5KKT': '6U2',
'5UZJ': '8UV',
'5WNE': 'B4J',
'5WNF': 'B4V',
'5WNG': 'B4Y',
'5WNH': 'B5G',
'6E9W': 'J0P',
'7JOU': 'VFS',
'7S25': '86G',
'7S26': '86K',
}

complex_ligand = dict(sorted(complex_ligand.items(), key=lambda item: item[1]))

molecules_order = []
for complex, ligand in complex_ligand.items():
    molecules_order.append(ligand)

print(molecules_order)


In [None]:
# read aligned (crystallized) dataset
aligned_dataset_path = "../materials/aligned_molecules.sdf"
with Chem.SDMolSupplier(aligned_dataset_path, removeHs=False) as w:
    mols = [mol for mol in w]

mols_temp = []
i = 0
for mol in mols:
    code = molecules_order[i]
    if (code != 'GNP'):
        Chem.Kekulize(mol)
        mols_temp.append(mol)
    i = i + 1

molecules_order.remove("GNP")
aligned_mols = [tuple([molecules_order[i], mol]) for i, mol in enumerate(mols_temp)]


In [None]:
# calculate IFs for aligned dataset
fps_aligned = calc_plip(aligned_mols, protein)
fps_aligned = fps_aligned.drop(columns=["Pose_ID"])
fps_aligned.index = molecules_order
fps_aligned = fps_aligned.fillna(value=False)
fps_aligned.replace({False: 0, True: 1}, inplace=True)

In [None]:
# calculate percetage of interactions and save to file
percent_row =  []
rows = len(fps_aligned.index)
for col in fps_aligned.columns:
    percent_row.append(len(fps_aligned[fps_aligned[col] == 1].index)/rows * 100)

fps_aligned.loc["Percentage"] = percent_row

fps_aligned = fps_aligned[sorted(fps_aligned.columns, key=lambda col: fps_aligned.loc["Percentage", col], reverse=True)]
display(fps_aligned)

res_path = f"../materials/graphs/ifs_aligned_plip.csv"
fps_aligned.to_csv(res_path, index=True, index_label='code')

res_path = f"../materials/rescore/ifs_aligned_plip.csv"
fps_aligned.to_csv(res_path, index=True, index_label='code')