# Get fingerprints of Actives and Decoys

In [None]:
# import libraries
import pandas as pd
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem import AllChem
from spyrmsd import rmsd
import spyrmsd.molecule
import numpy as np
import prolif as plf
from IPython.display import display
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# read protein molecule to calculate IFs on (save to PDBBlock)
protein_file = "../materials/2etr.pdb"
rdkit_prot = Chem.MolFromPDBFile(protein_file, removeHs=False)
protein = plf.Molecule(rdkit_prot)

In [None]:
# function to calculate fingerprints using ProLIF
# params: mol (rdkit.Mol), protein (plf.Molecule)
# returns: df of IFs
def get_fingerprints(mol_temp, protein):
    mol = plf.Molecule.from_rdkit(mol_temp)
    fp_ref = plf.Fingerprint()
    fp_ref.run_from_iterable([mol], protein, progress=False)
    df_ref = fp_ref.to_dataframe(index_col="Pose")
    df_ref.rename(index={0:-1}, inplace=True)
    df_ref.rename(columns={str(mol[0].resid): "temp_name"}, inplace=True)

    df_aligned_new = df_ref.copy()
    df_aligned_new.index = ["temp_name"]
    new_columns = pd.MultiIndex.from_tuples(
        [(col[1], col[2]) for col in df_aligned_new.columns], 
        names=['Residue', 'Interaction']
    )
    
    df_aligned_new.columns = new_columns
    df_aligned_new.columns = ["{}_{}".format(res, interaction) for res, interaction in df_aligned_new.columns]

    return df_aligned_new


In [None]:
from rdkit import Chem
suppl = Chem.SDMolSupplier("../materials/actives_decoys/glide_actives_docked.sdf")
all_ifs = pd.DataFrame()
count = 1
parts = 20
print(len(suppl))
for m in suppl:
    try:
        if (count < 20000):
            count = count + 1
        else:
            idx = m.GetProp("i_i_glide_lignum")
            dock_score = m.GetProp("r_i_docking_score")
            ifs = get_fingerprints(m, protein)
            ifs["idx"] = idx
            ifs["score"] = dock_score
            all_ifs = pd.concat([all_ifs, ifs], ignore_index=True)
            count = count + 1
            
            if (((count % 1000) == 0) or (count == len(suppl))):
                all_ifs = all_ifs.fillna("False")
                new_cols = ["idx"] + [col for col in all_ifs.columns if col not in ["idx"]]
                all_ifs = all_ifs[new_cols]

                res_path = f"../materials/actives_decoys/ifs_glide/actives/actives_{parts}.csv"
                all_ifs.to_csv(res_path)
                print(f"Part {parts} saved.")
                parts = parts + 1
                all_ifs = pd.DataFrame()
                #break

        
    except Exception as e:
        print(e,m.GetPropsAsDict()) 


print(f"count = {count}")



In [None]:
suppl = Chem.SDMolSupplier("../materials/actives_decoys/glide_decoys_docked.sdf")
count2 = 1
parts = 0
all_ifs = pd.DataFrame()

#print(len(suppl))

#threshold = count

for m in suppl:
    try:
        if count2 <= 0:
            count2 = count2 + 1
        else:
            idx = m.GetProp("i_i_glide_lignum")
            dock_score = m.GetProp("r_i_docking_score")
            ifs = get_fingerprints(m, protein)
            ifs["idx"] = idx
            ifs["score"] = dock_score
            all_ifs = pd.concat([all_ifs, ifs], ignore_index=True)
            count2 = count2 + 1

            if ((count2 % 5000) == 0):
                all_ifs = all_ifs.fillna("False")
                new_cols = ["idx"] + [col for col in all_ifs.columns if col not in ["idx"]]
                all_ifs = all_ifs[new_cols]

                res_path = f"../materials/actives_decoys/ifs_glide/decoys/decoys_{parts}.csv"
                all_ifs.to_csv(res_path)
                print(f"Part {parts} saved.")
                parts = parts + 1
                all_ifs = pd.DataFrame()

    except Exception as e:
        print(e,m.GetPropsAsDict())


all_ifs = all_ifs.fillna("False")
new_cols = ["idx"] + [col for col in all_ifs.columns if col not in ["idx"]]
all_ifs = all_ifs[new_cols]

res_path = f"../materials/actives_decoys/ifs_glide/decoys/decoys_{parts}.csv"
all_ifs.to_csv(res_path)
print(f"Part {parts} saved.")
parts = parts + 1
all_ifs = pd.DataFrame()


print(f"count2 = {count2}")
