# IFs by ProLIF

In [None]:
# import libraries
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)
import prolif as plf
from rdkit import Chem
from rdkit import DataStructs
import sys
from IPython.display import display

In [None]:
# read protein file to string
protein_file = "../materials/2etr.pdb"
rdkit_prot = Chem.MolFromPDBFile(protein_file, removeHs=False)
protein = plf.Molecule(rdkit_prot)

In [None]:
# define complex-ligand dictionary
complex_ligand = {
'1S1C':'GNP',
'2ESM': 'M77',
'2ETK': 'HFS',
'2ETR': 'Y27',
'2V55': 'ANP',
'3D9V': 'H52',
'3NCZ': '3NC',
'3TV7': '07Q',
'3TWJ': '07R',
'3V8S': '0HD',
'4W7P': '3J7',
'4YVC': '4KH',
'4YVE': '4KK',
'5BML': '4TW',
'5HVU': '65R',
'5KKS': '6U1',
'5KKT': '6U2',
'5UZJ': '8UV',
'5WNE': 'B4J',
'5WNF': 'B4V',
'5WNG': 'B4Y',
'5WNH': 'B5G',
'6E9W': 'J0P',
'7JOU': 'VFS',
'7S25': '86G',
'7S26': '86K',
}

complex_ligand = dict(sorted(complex_ligand.items(), key=lambda item: item[1]))

molecules_order = []
for complex, ligand in complex_ligand.items():
    molecules_order.append(ligand)

print(molecules_order)


In [None]:
# read aligned (crystallized) dataset
aligned_dataset_path = "../materials/aligned_molecules.sdf"
with Chem.SDMolSupplier(aligned_dataset_path, removeHs=False) as w:
    mols = [mol for mol in w]

aligned_mols = []
i = 0
for mol in mols:
    code = molecules_order[i]
    if (code != 'GNP'):
        aligned_mols.append(plf.Molecule.from_rdkit(mol))
    i = i + 1

molecules_order.remove("GNP")

In [None]:
# calculate IFs for aligned dataset
aligned_fps_list = []
i = 0
for al in aligned_mols:
    code = molecules_order[i]
    fp_ref = plf.Fingerprint()
    fp_ref.run_from_iterable([al], protein, progress=False)
    df_ref = fp_ref.to_dataframe(index_col="Pose")
    df_ref.rename(index={0:-1}, inplace=True)
    df_ref.rename(columns={str(al[0].resid): molecules_order[i]}, inplace=True)

    df_aligned_new = df_ref.copy()
    df_aligned_new.index = [code]
    new_columns = pd.MultiIndex.from_tuples(
        [(col[1], col[2]) for col in df_aligned_new.columns], 
        names=['Residue', 'Interaction']
    )
    
    df_aligned_new.columns = new_columns
    df_aligned_new.columns = ["{}_{}".format(res, interaction) for res, interaction in df_aligned_new.columns]

    aligned_fps_list.append(df_aligned_new)

    i = i + 1


# save IFS for aligned dataset
ifs_aligned_df = pd.concat(aligned_fps_list, axis=0)
ifs_aligned_df = ifs_aligned_df.fillna(False)
display(ifs_aligned_df)

In [None]:
# calculate percetage of interactions and save to file

ifs_aligned_df.replace({False: 0, True: 1}, inplace=True)
percent_row =  []
rows = len(ifs_aligned_df.index)
for col in ifs_aligned_df.columns:
    percent_row.append(len(ifs_aligned_df[ifs_aligned_df[col] == 1].index)/rows * 100)

ifs_aligned_df.loc["Percentage"] = percent_row

ifs_aligned_df = ifs_aligned_df[sorted(ifs_aligned_df.columns, key=lambda col: ifs_aligned_df.loc["Percentage", col], reverse=True)]
display(ifs_aligned_df)


res_path = f"../materials/graphs/ifs_aligned_prolif.csv"
ifs_aligned_df.to_csv(res_path, index=True, index_label='code')

res_path = f"../materials/rescore/ifs_aligned_prolif.csv"
ifs_aligned_df.to_csv(res_path, index=True, index_label='code')