# Analyze speed of generating E3FPs compared to 2D fingerprints

In [1]:
import time

import numpy as np
import pandas as pd
from e3fp.pipeline import fprints_from_smiles
from python_utilities.parallel import Parallelizer
from rdkit import Chem
from rdkit.Chem import AllChem

In [2]:
data_dir = "../out/predictions_from_models/pk_37k_model/800k/"
df = pd.read_csv(data_dir + "clean_preds_05_02_2022.csv")

num_trials = 5
dfs = []
for i in range(num_trials):
    df = df.sample(n=50, random_state=i)
    dfs.append(df)

In [3]:
times_rdkit = []

for df in dfs:
    start_time = time.time()

    smiles_dict = dict(zip(list(df["Name"]), list(df["smiles"])))
    smiles_iter = ((smi, name) for name, smi in smiles_dict.items())

    # Generate RDKit fingerprints
    mols = [Chem.MolFromSmiles(smi) for smi in df["smiles"]]
    fps = [AllChem.GetMorganFingerprint(mol, radius=4) for mol in mols]

    print("Number of generated fingerprints: ", len(fps))

    elapsed_time = time.time() - start_time
    times_rdkit.append(elapsed_time)
    print(f"Time taken for RDKit processing DataFrame: {elapsed_time:.2f} seconds")

Number of generated fingerprints:  50
Time taken for RDKit processing DataFrame: 0.02 seconds
Number of generated fingerprints:  50
Time taken for RDKit processing DataFrame: 0.02 seconds
Number of generated fingerprints:  50
Time taken for RDKit processing DataFrame: 0.02 seconds
Number of generated fingerprints:  50
Time taken for RDKit processing DataFrame: 0.02 seconds
Number of generated fingerprints:  50
Time taken for RDKit processing DataFrame: 0.02 seconds


In [4]:
times_3d = []

for df in dfs:
    start_time = time.time()

    smiles_dict = dict(zip(list(df["Name"]), list(df["smiles"])))

    # Convert the smiles_dict to an iterable for processing
    smiles_iter = ((smi, name) for name, smi in smiles_dict.items())

    # Define parameters for conformer generation and fingerprinting
    confgen_params = {}  # Define your conformer generation parameters here
    fprint_params = {}  # Define your fingerprint parameters here
    kwargs = {"confgen_params": confgen_params, "fprint_params": fprint_params}

    # Initialize the parallelizer
    parallelizer = Parallelizer(parallel_mode="processes")

    # Generate fingerprints in parallel
    fprints_list = parallelizer.run(fprints_from_smiles, smiles_iter, kwargs=kwargs)
    print("Number of generated fingerprints: ", len(fprints_list))

    # Stop the timer and record the elapsed time
    elapsed_time = time.time() - start_time
    times_3d.append(elapsed_time)

    print(f"Time taken for processing DataFrame: {elapsed_time:.2f} seconds")

2024-10-28 13:52:22,173|INFO|num_proc is not specified. 'processes' mode will use all 10 processes
2024-10-28 13:52:22,174|INFO|Parallelizer initialized with mode 'processes' and 10 processors.
2024-10-28 13:52:23,535|INFO|Generating conformers for BRD-K12480746.
2024-10-28 13:52:23,537|INFO|Generating conformers for BRD-K62249989.
2024-10-28 13:52:23,560|INFO|Generating conformers for BRD-A44036906.
2024-10-28 13:52:23,561|INFO|Generating conformers for BRD-K05421434.
2024-10-28 13:52:23,567|INFO|Generating conformers for BRD-K18309024.
2024-10-28 13:52:23,579|INFO|Generating conformers for BRD-A54784366.
2024-10-28 13:52:23,584|INFO|Generating conformers for BRD-K24233785.
2024-10-28 13:52:23,585|INFO|Generating conformers for BRD-K86655638.
2024-10-28 13:52:23,704|INFO|Generating conformers for BRD-K79323793.
2024-10-28 13:52:23,758|INFO|Generating conformers for BRD-K03880106.
2024-10-28 13:52:25,134|INFO|Generated 19 conformers for BRD-K03880106.
2024-10-28 13:52:25,134|INFO|Gener

Number of generated fingerprints:  50
Time taken for processing DataFrame: 192.62 seconds


2024-10-28 13:55:36,110|INFO|Generating conformers for BRD-A40136307.
2024-10-28 13:55:36,112|INFO|Generating conformers for BRD-A87591620.
2024-10-28 13:55:36,113|INFO|Generating conformers for BRD-K05421434.
2024-10-28 13:55:36,114|INFO|Generating conformers for BRD-K89393716.
2024-10-28 13:55:36,115|INFO|Generating conformers for BRD-K70549141.
2024-10-28 13:55:36,117|INFO|Generating conformers for BRD-K71670746.
2024-10-28 13:55:36,118|INFO|Generating conformers for BRD-A29556904.
2024-10-28 13:55:36,127|INFO|Generating conformers for BRD-K89537576.
2024-10-28 13:55:36,132|INFO|Generating conformers for BRD-K81538434.
2024-10-28 13:55:36,187|INFO|Generating conformers for BRD-K12480746.
2024-10-28 13:55:36,674|INFO|Generated 6 conformers for BRD-K89393716.
2024-10-28 13:55:36,674|INFO|Generating fingerprints for BRD-K89393716.
2024-10-28 13:55:36,819|INFO|Generated 3 fingerprints for BRD-K89393716.
2024-10-28 13:55:36,820|INFO|Generating conformers for BRD-K56010861.
2024-10-28 13:

Number of generated fingerprints:  50
Time taken for processing DataFrame: 157.71 seconds


2024-10-28 13:58:13,791|INFO|Generating conformers for BRD-K24233785.
2024-10-28 13:58:13,792|INFO|Generating conformers for BRD-K71670746.
2024-10-28 13:58:13,794|INFO|Generating conformers for BRD-A46224326.
2024-10-28 13:58:13,801|INFO|Generating conformers for BRD-K80805331.
2024-10-28 13:58:13,803|INFO|Generating conformers for BRD-K00755937.
2024-10-28 13:58:13,823|INFO|Generating conformers for BRD-K70549141.
2024-10-28 13:58:13,829|INFO|Generating conformers for BRD-K79323793.
2024-10-28 13:58:13,846|INFO|Generating conformers for BRD-A56560246.
2024-10-28 13:58:13,892|INFO|Generating conformers for BRD-K18309024.
2024-10-28 13:58:13,900|INFO|Generating conformers for BRD-K34906972.
2024-10-28 13:58:15,718|INFO|Generated 8 conformers for BRD-K34906972.
2024-10-28 13:58:15,718|INFO|Generating fingerprints for BRD-K34906972.
2024-10-28 13:58:16,004|INFO|Generated 3 fingerprints for BRD-K34906972.
2024-10-28 13:58:16,005|INFO|Generating conformers for BRD-K11959316.
2024-10-28 13:

Number of generated fingerprints:  50
Time taken for processing DataFrame: 183.58 seconds


2024-10-28 14:01:17,408|INFO|Generating conformers for BRD-K43603689.
2024-10-28 14:01:17,408|INFO|Generating conformers for BRD-K34906972.
2024-10-28 14:01:17,409|INFO|Generating conformers for BRD-K61158478.
2024-10-28 14:01:17,410|INFO|Generating conformers for BRD-K03880106.
2024-10-28 14:01:17,411|INFO|Generating conformers for BRD-K81538434.
2024-10-28 14:01:17,412|INFO|Generating conformers for BRD-A24054015.
2024-10-28 14:01:17,413|INFO|Generating conformers for BRD-K00019132.
2024-10-28 14:01:17,419|INFO|Generating conformers for BRD-K86655638.
2024-10-28 14:01:17,424|INFO|Generating conformers for BRD-K71241206.
2024-10-28 14:01:17,478|INFO|Generating conformers for BRD-A56560246.
2024-10-28 14:01:18,674|INFO|Generated 19 conformers for BRD-K03880106.
2024-10-28 14:01:18,675|INFO|Generating fingerprints for BRD-K03880106.
2024-10-28 14:01:18,865|INFO|Generated 3 fingerprints for BRD-K03880106.
2024-10-28 14:01:18,867|INFO|Generating conformers for BRD-K58473285.
2024-10-28 14

Number of generated fingerprints:  50
Time taken for processing DataFrame: 157.36 seconds


2024-10-28 14:03:54,867|INFO|Generating conformers for BRD-K09753986.
2024-10-28 14:03:54,867|INFO|Generating conformers for BRD-K79135384.
2024-10-28 14:03:54,867|INFO|Generating conformers for BRD-K12480746.
2024-10-28 14:03:54,867|INFO|Generating conformers for BRD-A24054015.
2024-10-28 14:03:54,871|INFO|Generating conformers for BRD-K70549141.
2024-10-28 14:03:54,871|INFO|Generating conformers for BRD-K81538434.
2024-10-28 14:03:54,875|INFO|Generating conformers for BRD-A33804553.
2024-10-28 14:03:54,877|INFO|Generating conformers for BRD-K71241206.
2024-10-28 14:03:54,877|INFO|Generating conformers for BRD-A87591620.
2024-10-28 14:03:54,880|INFO|Generating conformers for BRD-A35805718.
2024-10-28 14:03:55,900|INFO|Generated 8 conformers for BRD-K09753986.
2024-10-28 14:03:55,902|INFO|Generating fingerprints for BRD-K09753986.
2024-10-28 14:03:56,097|INFO|Generated 3 fingerprints for BRD-K09753986.
2024-10-28 14:03:56,098|INFO|Generating conformers for BRD-K88227180.
2024-10-28 14:

Number of generated fingerprints:  50
Time taken for processing DataFrame: 156.98 seconds


2024-10-28 14:06:30,300|INFO|Generated 3 fingerprints for BRD-K81538434.


In [5]:
print(times_rdkit)
print(np.mean(times_rdkit))
print(np.std(times_rdkit))
print(times_3d)
print(np.mean(times_3d))
print(np.std(times_3d))

[0.018946170806884766, 0.0170440673828125, 0.017313241958618164, 0.01691126823425293, 0.016865015029907227]
0.017415952682495118
0.0007808526578640182
[192.61878895759583, 157.70921993255615, 183.5814447402954, 157.35520887374878, 156.97994112968445]
169.6489207267761
15.335742056651833
