In [4]:
import matplotlib
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator
from rdkit.Chem.Draw import IPythonConsole
from rdkit import DataStructs
import rdkit
print(rdkit.__version__)
%matplotlib inline

2024.03.5


In [12]:
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2,fpSize=2048)
rdkgen = rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=2048)
apgen = rdFingerprintGenerator.GetAtomPairGenerator(fpSize=2048)
ttgen = rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=2048)

# Feature Morgan fingerprints are created using a Morgan generator which uses a different method of assigning atom invariants (atom types)
fmgen = rdFingerprintGenerator.GetMorganGenerator(radius=2,fpSize=2, atomInvariantsGenerator=rdFingerprintGenerator.GetMorganFeatureAtomInvGen())

In [20]:
ms = ['c1ccc(CCc2ccccc2)cc1', 'CCc1cccc2ccccc12', 'Cc1cc(C)c2ccccc2c1', 'c1ccc2cc3ccccc3cc2c1']

m = Chem.MolFromSmiles(ms[0])

# bit vectors:
fp = mfpgen.GetFingerprint(m) # returns a bit vector of size fpSize
sfp = mfpgen.GetSparseFingerprint(m) # returns a sparse bit vector

# count vectors:
cfp = mfpgen.GetCountFingerprint(m) # returns a count vector of size fpSize
scfp = mfpgen.GetSparseCountFingerprint(m) # returns a sparse count vector

# Other generators:
rdkfp = rdkgen.GetFingerprint(m)
apfp = apgen.GetFingerprint(m)
ttfp = ttgen.GetFingerprint(m)
fmfp = fmgen.GetFingerprint(m)

In [22]:
print(fp)
print(sfp)
print(cfp)
print(scfp)
print(rdkfp)
print(apfp)
print(ttfp)

<rdkit.DataStructs.cDataStructs.ExplicitBitVect object at 0x1206df220>
<rdkit.DataStructs.cDataStructs.SparseBitVect object at 0x1206dfd80>
<rdkit.DataStructs.cDataStructs.UIntSparseIntVect object at 0x1206df7d0>
<rdkit.DataStructs.cDataStructs.ULongSparseIntVect object at 0x1206dfd10>
<rdkit.DataStructs.cDataStructs.ExplicitBitVect object at 0x1206dfdf0>
<rdkit.DataStructs.cDataStructs.ExplicitBitVect object at 0x1206dedc0>
<rdkit.DataStructs.cDataStructs.ExplicitBitVect object at 0x1206ded50>


In [24]:
help(rdFingerprintGenerator.GetRDKitFPGenerator)

Help on built-in function GetRDKitFPGenerator in module rdkit.Chem.rdFingerprintGenerator:

GetRDKitFPGenerator(...)
    GetRDKitFPGenerator([  (int)minPath=1 [, (int)maxPath=7 [, (bool)useHs=True [, (bool)branchedPaths=True [, (bool)useBondOrder=True [, (bool)countSimulation=False [, (object)countBounds=None [, (int)fpSize=2048 [, (int)numBitsPerFeature=2 [, (object)atomInvariantsGenerator=None]]]]]]]]]]) -> FingeprintGenerator64 :
        Get an RDKit fingerprint generator
        
          ARGUMENTS:
            - minPath: the minimum path length (in bonds) to be included
            - maxPath: the maximum path length (in bonds) to be included
            - useHs: toggles inclusion of Hs in paths (if the molecule has explicit Hs)
            - branchedPaths: toggles generation of branched subgraphs, not just linear paths
            - useBondOrder: toggles inclusion of bond orders in the path hashes
            - countSimulation:  if set, use count simulation while  generating the 

In [28]:
import json
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator
from rdkit import DataStructs

def fp_to_list(fp):
    """Convert an RDKit fingerprint object into a list"""
    bit_indices = list(fp.GetOnBits())
    return bit_indices

def list_to_fp(fp_list):
    """Convert a fingerprint list to an RDKit fingerprint object"""
    bit_indices = data["fingerprint"]
    fp = DataStructs.ExplicitBitVect(2048)
    for idx in bit_indices:
        fp[idx] = 1
    return fp

# Example usage
mol = Chem.MolFromSmiles('c1ccc(CCc2ccccc2)cc1')
filepath = "fingerprint.json"
fingerprint_to_json(mol, filepath)
loaded_fp = fingerprint_from_json(filepath)

#Verify the result.
rdkgen = rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=2048)
original_fp = rdkgen.GetFingerprint(mol)

print(DataStructs.TanimotoSimilarity(original_fp, loaded_fp))

[4, 27, 35, 103, 120, 143, 161, 183, 194, 235, 294, 330, 349, 368, 418, 441, 446, 455, 464, 508, 536, 540, 608, 617, 661, 696, 704, 744, 767, 792, 805, 812, 824, 826, 830, 842, 854, 855, 879, 898, 899, 915, 930, 977, 988, 1013, 1026, 1046, 1060, 1064, 1082, 1094, 1219, 1230, 1251, 1261, 1287, 1308, 1329, 1333, 1338, 1342, 1343, 1344, 1384, 1456, 1469, 1470, 1483, 1519, 1523, 1545, 1553, 1577, 1585, 1677, 1699, 1717, 1755, 1772, 1784, 1792, 1806, 1814, 1816, 1872, 1888, 1889, 1907, 1909, 1941, 1959, 1975, 2021, 2038]
