In [1]:
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
# import filtered enamine compounds and get mol objects

supplier = Chem.SDMolSupplier('filtered_enamine_primary_amines.sdf')
mols = [mol for mol in supplier if mol is not None]

In [3]:
# generate a dataframe with the fingerprints and catalog number and smiles

catalog_ids = [mol.GetProp('Catalog_ID') for mol in mols]
smiles = [Chem.MolToSmiles(mol) for mol in mols]

mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=1024)
fingerprints = [mfpgen.GetFingerprint(mol) for mol in mols]
fingerprint_array = np.array([np.array(fp) for fp in fingerprints])

In [4]:
# convert to dataframe

df_1024bit = pd.DataFrame({
    'Catalog_ID': catalog_ids,
    'SMILES': smiles,
})

for i in range(fingerprint_array.shape[1]):
    df_1024bit[f'{i}'] = fingerprint_array[:, i]

In [5]:
df_1024bit.to_csv('enamine_primary_amines_1024bit_fingerprints.csv', index=False)

In [6]:
# now do the same for 2048 bit fingerprints

catalog_ids = [mol.GetProp('Catalog_ID') for mol in mols]
smiles = [Chem.MolToSmiles(mol) for mol in mols]

mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
fingerprints = [mfpgen.GetFingerprint(mol) for mol in mols]
fingerprint_array = np.array([np.array(fp) for fp in fingerprints])

In [7]:
# convert to dataframe

df_2048bit = pd.DataFrame({
    'Catalog_ID': catalog_ids,
    'SMILES': smiles,
})

for i in range(fingerprint_array.shape[1]):
    df_2048bit[f'{i}'] = fingerprint_array[:, i]

In [8]:
df_2048bit.to_csv('enamine_primary_amines_2048bit_fingerprints.csv', index=False)