Skip to content

Commit

Permalink
Merge 6ef4c4b into 0fcc6b4
Browse files Browse the repository at this point in the history
  • Loading branch information
nissy-dev committed Oct 22, 2020
2 parents 0fcc6b4 + 6ef4c4b commit 73bbf9e
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 0 deletions.
2 changes: 2 additions & 0 deletions deepchem/feat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@
from deepchem.feat.molecule_featurizers import CircularFingerprint
from deepchem.feat.molecule_featurizers import CoulombMatrix
from deepchem.feat.molecule_featurizers import CoulombMatrixEig
from deepchem.feat.molecule_featurizers import MACCSKeysFingerprint
from deepchem.feat.molecule_featurizers import MordredDescriptors
from deepchem.feat.molecule_featurizers import Mol2VecFingerprint
from deepchem.feat.molecule_featurizers import MolGraphConvFeaturizer
from deepchem.feat.molecule_featurizers import OneHotFeaturizer
from deepchem.feat.molecule_featurizers import PubChemFingerprint
from deepchem.feat.molecule_featurizers import RawFeaturizer
from deepchem.feat.molecule_featurizers import RDKitDescriptors
from deepchem.feat.molecule_featurizers import SmilesToImage
Expand Down
2 changes: 2 additions & 0 deletions deepchem/feat/molecule_featurizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from deepchem.feat.molecule_featurizers.circular_fingerprint import CircularFingerprint
from deepchem.feat.molecule_featurizers.coulomb_matrices import CoulombMatrix
from deepchem.feat.molecule_featurizers.coulomb_matrices import CoulombMatrixEig
from deepchem.feat.molecule_featurizers.maccs_keys_fingerprint import MACCSKeysFingerprint
from deepchem.feat.molecule_featurizers.mordred_descriptors import MordredDescriptors
from deepchem.feat.molecule_featurizers.mol2vec_fingerprint import Mol2VecFingerprint
from deepchem.feat.molecule_featurizers.one_hot_featurizer import OneHotFeaturizer
from deepchem.feat.molecule_featurizers.pubchem_fingerprint import PubChemFingerprint
from deepchem.feat.molecule_featurizers.raw_featurizer import RawFeaturizer
from deepchem.feat.molecule_featurizers.rdkit_descriptors import RDKitDescriptors
from deepchem.feat.molecule_featurizers.smiles_to_image import SmilesToImage
Expand Down
47 changes: 47 additions & 0 deletions deepchem/feat/molecule_featurizers/maccs_keys_fingerprint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import numpy as np

from deepchem.utils.typing import RDKitMol
from deepchem.feat.base_classes import MolecularFeaturizer


class MACCSKeysFingerprint(MolecularFeaturizer):
"""MACCS Keys Fingerprint.
The MACCS (Molecular ACCess System) keys are one of the most commonly used structural keys.
Please confirm the details in [1]_, [2]_.
References
----------
.. [1] Durant, Joseph L., et al. "Reoptimization of MDL keys for use in drug discovery."
Journal of chemical information and computer sciences 42.6 (2002): 1273-1280.
.. [2] https://github.com/rdkit/rdkit/blob/master/rdkit/Chem/MACCSkeys.py
Notes
-----
This class requires RDKit to be installed.
"""

def __init__(self):
"""Initialize this featurizer."""
try:
from rdkit.Chem.AllChem import GetMACCSKeysFingerprint # noqa
except ModuleNotFoundError:
raise ValueError("This class requires RDKit to be installed.")

self.calculator = GetMACCSKeysFingerprint

def _featurize(self, mol: RDKitMol) -> np.ndarray:
"""
Calculate MACCS keys fingerprint.
Parameters
----------
mol: rdkit.Chem.rdchem.Mol
RDKit Mol object
Returns
-------
np.ndarray
1D array of RDKit descriptors for `mol`. The length is 167.
"""
return self.calculator(mol)
52 changes: 52 additions & 0 deletions deepchem/feat/molecule_featurizers/pubchem_fingerprint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import numpy as np

from deepchem.utils.typing import RDKitMol
from deepchem.feat.base_classes import MolecularFeaturizer


class PubChemFingerprint(MolecularFeaturizer):
"""PubChem Fingerprint.
The PubChem fingerprint is a 881 bit structural key,
which is used by PubChem for similarity searching.
Please confirm the details in [1]_.
References
----------
.. [1] ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.pdf
Notes
-----
This class requires RDKit and PubChemPy to be installed.
PubChemPy use REST API to get the fingerprint, so you need the internet access.
"""

def __init__(self):
"""Initialize this featurizer."""
try:
from rdkit import Chem # noqa
import pubchempy as pcp # noqa
except ModuleNotFoundError:
raise ValueError("This class requires PubChemPy to be installed.")

self.get_pubchem_compounds = pcp.get_compounds

def _featurize(self, mol: RDKitMol) -> np.ndarray:
"""
Calculate PubChem fingerprint.
Parameters
----------
mol: rdkit.Chem.rdchem.Mol
RDKit Mol object
Returns
-------
np.ndarray
1D array of RDKit descriptors for `mol`. The length is 881.
"""
from rdkit import Chem
smiles = Chem.MolToSmiles(mol)
pubchem_compound = self.get_pubchem_compounds(smiles, 'smiles')[0]
feature = [int(bit) for bit in pubchem_compound.cactvs_fingerprint]
return np.asarray(feature)
25 changes: 25 additions & 0 deletions deepchem/feat/tests/test_maccs_keys_finerprint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import unittest

from deepchem.feat import MACCSKeysFingerprint


class TestMACCSKeysFingerprint(unittest.TestCase):
"""
Test MACCSKeyFingerprint.
"""

def setUp(self):
"""
Set up tests.
"""
from rdkit import Chem
smiles = 'CC(=O)OC1=CC=CC=C1C(=O)O'
self.mol = Chem.MolFromSmiles(smiles)

def test_maccs_key_fingerprint(self):
"""
Test simple fingerprint.
"""
featurizer = MACCSKeysFingerprint()
feature_sum = featurizer([self.mol])
assert feature_sum.shape == (1, 167)
25 changes: 25 additions & 0 deletions deepchem/feat/tests/test_puchem_fingerprint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import unittest

from deepchem.feat import PubChemFingerprint


class TestPubChemFingerprint(unittest.TestCase):
"""
Test PubChemFingerprint.
"""

def setUp(self):
"""
Set up tests.
"""
from rdkit import Chem
smiles = 'CC(=O)OC1=CC=CC=C1C(=O)O'
self.mol = Chem.MolFromSmiles(smiles)

def test_pubchem_fingerprint(self):
"""
Test simple fingerprint.
"""
featurizer = PubChemFingerprint()
feature_sum = featurizer([self.mol])
assert feature_sum.shape == (1, 881)
12 changes: 12 additions & 0 deletions docs/featurizers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,24 @@ WeaveFeaturizer
.. autoclass:: deepchem.feat.WeaveFeaturizer
:members:

MACCSKeysFingerprint
^^^^^^^^^^^^^^^^^^^

.. autoclass:: deepchem.feat.MACCSKeysFingerprint
:members:

CircularFingerprint
^^^^^^^^^^^^^^^^^^^

.. autoclass:: deepchem.feat.CircularFingerprint
:members:

PubChemFingerprint
^^^^^^^^^^^^^^^^^^^

.. autoclass:: deepchem.feat.PubChemFingerprint
:members:

Mol2VecFingerprint
^^^^^^^^^^^^^^^^^^^

Expand Down
5 changes: 5 additions & 0 deletions docs/requirements.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ DeepChem has a number of "soft" requirements.
| | | :code:`dc.trans.transformers` |
| | | |
+--------------------------------+---------------+---------------------------------------------------+
| `PubChemPy`_ | latest | :code:`dc.feat.molecule_featurizers` |
| | | |
| | | |
+--------------------------------+---------------+---------------------------------------------------+
| `pyGPGO`_ | latest | :code:`dc.hyper.gaussian_process` |
| | | |
| | | |
Expand Down Expand Up @@ -134,6 +138,7 @@ DeepChem has a number of "soft" requirements.
.. _`OpenMM`: http://openmm.org/
.. _`PDBFixer`: https://github.com/pandegroup/pdbfixer
.. _`Pillow`: https://pypi.org/project/Pillow/
.. _`PubChemPy`: https://pubchempy.readthedocs.io/en/latest/
.. _`pyGPGO`: https://pygpgo.readthedocs.io/en/latest/
.. _`Pymatgen`: https://pymatgen.org/
.. _`PyTorch`: https://pytorch.org/
Expand Down
1 change: 1 addition & 0 deletions requirements.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies:
- mordred
- networkx
- pillow
- pubchempy
- pyGPGO
- pymatgen
- simdna
Expand Down

0 comments on commit 73bbf9e

Please sign in to comment.