-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
171 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
deepchem/feat/molecule_featurizers/maccs_keys_fingerprint.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import numpy as np | ||
|
||
from deepchem.utils.typing import RDKitMol | ||
from deepchem.feat.base_classes import MolecularFeaturizer | ||
|
||
|
||
class MACCSKeysFingerprint(MolecularFeaturizer): | ||
"""MACCS Keys Fingerprint. | ||
The MACCS (Molecular ACCess System) keys are one of the most commonly used structural keys. | ||
Please confirm the details in [1]_, [2]_. | ||
References | ||
---------- | ||
.. [1] Durant, Joseph L., et al. "Reoptimization of MDL keys for use in drug discovery." | ||
Journal of chemical information and computer sciences 42.6 (2002): 1273-1280. | ||
.. [2] https://github.com/rdkit/rdkit/blob/master/rdkit/Chem/MACCSkeys.py | ||
Notes | ||
----- | ||
This class requires RDKit to be installed. | ||
""" | ||
|
||
def __init__(self): | ||
"""Initialize this featurizer.""" | ||
try: | ||
from rdkit.Chem.AllChem import GetMACCSKeysFingerprint # noqa | ||
except ModuleNotFoundError: | ||
raise ValueError("This class requires RDKit to be installed.") | ||
|
||
self.calculator = GetMACCSKeysFingerprint | ||
|
||
def _featurize(self, mol: RDKitMol) -> np.ndarray: | ||
""" | ||
Calculate MACCS keys fingerprint. | ||
Parameters | ||
---------- | ||
mol: rdkit.Chem.rdchem.Mol | ||
RDKit Mol object | ||
Returns | ||
------- | ||
np.ndarray | ||
1D array of RDKit descriptors for `mol`. The length is 167. | ||
""" | ||
return self.calculator(mol) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import numpy as np | ||
|
||
from deepchem.utils.typing import RDKitMol | ||
from deepchem.feat.base_classes import MolecularFeaturizer | ||
|
||
|
||
class PubChemFingerprint(MolecularFeaturizer): | ||
"""PubChem Fingerprint. | ||
The PubChem fingerprint is a 881 bit structural key, | ||
which is used by PubChem for similarity searching. | ||
Please confirm the details in [1]_. | ||
References | ||
---------- | ||
.. [1] ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.pdf | ||
Notes | ||
----- | ||
This class requires RDKit and PubChemPy to be installed. | ||
PubChemPy use REST API to get the fingerprint, so you need the internet access. | ||
""" | ||
|
||
def __init__(self): | ||
"""Initialize this featurizer.""" | ||
try: | ||
from rdkit import Chem # noqa | ||
import pubchempy as pcp # noqa | ||
except ModuleNotFoundError: | ||
raise ValueError("This class requires PubChemPy to be installed.") | ||
|
||
self.get_pubchem_compounds = pcp.get_compounds | ||
|
||
def _featurize(self, mol: RDKitMol) -> np.ndarray: | ||
""" | ||
Calculate PubChem fingerprint. | ||
Parameters | ||
---------- | ||
mol: rdkit.Chem.rdchem.Mol | ||
RDKit Mol object | ||
Returns | ||
------- | ||
np.ndarray | ||
1D array of RDKit descriptors for `mol`. The length is 881. | ||
""" | ||
from rdkit import Chem | ||
smiles = Chem.MolToSmiles(mol) | ||
pubchem_compound = self.get_pubchem_compounds(smiles, 'smiles')[0] | ||
feature = [int(bit) for bit in pubchem_compound.cactvs_fingerprint] | ||
return np.asarray(feature) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import unittest | ||
|
||
from deepchem.feat import MACCSKeysFingerprint | ||
|
||
|
||
class TestMACCSKeysFingerprint(unittest.TestCase): | ||
""" | ||
Test MACCSKeyFingerprint. | ||
""" | ||
|
||
def setUp(self): | ||
""" | ||
Set up tests. | ||
""" | ||
from rdkit import Chem | ||
smiles = 'CC(=O)OC1=CC=CC=C1C(=O)O' | ||
self.mol = Chem.MolFromSmiles(smiles) | ||
|
||
def test_maccs_key_fingerprint(self): | ||
""" | ||
Test simple fingerprint. | ||
""" | ||
featurizer = MACCSKeysFingerprint() | ||
feature_sum = featurizer([self.mol]) | ||
assert feature_sum.shape == (1, 167) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import unittest | ||
|
||
from deepchem.feat import PubChemFingerprint | ||
|
||
|
||
class TestPubChemFingerprint(unittest.TestCase): | ||
""" | ||
Test PubChemFingerprint. | ||
""" | ||
|
||
def setUp(self): | ||
""" | ||
Set up tests. | ||
""" | ||
from rdkit import Chem | ||
smiles = 'CC(=O)OC1=CC=CC=C1C(=O)O' | ||
self.mol = Chem.MolFromSmiles(smiles) | ||
|
||
def test_pubchem_fingerprint(self): | ||
""" | ||
Test simple fingerprint. | ||
""" | ||
featurizer = PubChemFingerprint() | ||
feature_sum = featurizer([self.mol]) | ||
assert feature_sum.shape == (1, 881) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ dependencies: | |
- mordred | ||
- networkx | ||
- pillow | ||
- pubchempy | ||
- pyGPGO | ||
- pymatgen | ||
- simdna | ||
|