# Analyze speed of generating E3FPs compared to 2D fingerprints

In [None]:
from rdkit import Chem
import pandas as pd
import time
from python_utilities.parallel import Parallelizer
from e3fp.pipeline import fprints_from_smiles

In [None]:
data_dir = '../out/predictions_from_models/pk_37k_model/800k/'
df = pd.read_csv(data_dir + 'clean_preds_05_02_2022.csv')

num_trials = 3
dfs = []
for i in range(num_trials):
    df = df.sample(n=50, random_state=i)
    dfs.append(df)

In [None]:
times_rdkit = []
from rdkit.Chem import AllChem

for df in dfs:
    start_time = time.time()

    smiles_dict = dict(zip(list(df['Name']), list(df['smiles'])))
    smiles_iter = ((smi, name) for name, smi in smiles_dict.items())

    # Generate RDKit fingerprints
    mols = [Chem.MolFromSmiles(smi) for smi in df['smiles']]
    fps = [AllChem.GetMorganFingerprint(mol, radius=4) for mol in mols]

    print('Number of generated fingerprints: ', len(fps))

    elapsed_time = time.time() - start_time
    times_rdkit.append(elapsed_time)
    print(f'Time taken for RDKit processing DataFrame: {elapsed_time:.2f} seconds')


In [None]:
times_3d = []

for df in dfs:
    start_time = time.time()

    smiles_dict = dict(zip(list(df['Name']), list(df['smiles'])))

    # Convert the smiles_dict to an iterable for processing
    smiles_iter = ((smi, name) for name, smi in smiles_dict.items())

    # Define parameters for conformer generation and fingerprinting
    confgen_params = {}  # Define your conformer generation parameters here
    fprint_params = {}   # Define your fingerprint parameters here
    kwargs = {"confgen_params": confgen_params, "fprint_params": fprint_params}

    # Initialize the parallelizer
    parallelizer = Parallelizer(parallel_mode="processes")

    # Generate fingerprints in parallel
    fprints_list = parallelizer.run(fprints_from_smiles, smiles_iter, kwargs=kwargs) 
    print('Number of generated fingerprints: ', len(fprints_list))

    # Stop the timer and record the elapsed time
    elapsed_time = time.time() - start_time
    times_3d.append(elapsed_time)

    print(f'Time taken for processing DataFrame: {elapsed_time:.2f} seconds')

In [None]:
print(times_rdkit)
print(times_3d)