Skip to content

Commit

Permalink
Add morgan fingerprints
Browse files Browse the repository at this point in the history
  • Loading branch information
HellenNamulinda committed May 7, 2024
1 parent f3f877f commit d2ff580
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 8 deletions.
Binary file modified assets/evaluation_data.joblib
Binary file not shown.
10 changes: 5 additions & 5 deletions assets/evaluation_metrics.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"Mean Squared Error": 0.462,
"Root Mean Squared Error": 0.6797,
"Mean Absolute Error": 0.4891,
"R-squared Score": 0.6514,
"Explained Variance Score": 0.6521
"Mean Squared Error": 0.3952,
"Root Mean Squared Error": 0.6287,
"Mean Absolute Error": 0.4569,
"R-squared Score": 0.7018,
"Explained Variance Score": 0.7018
}
Binary file modified assets/evaluation_scatter_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/interpretability_bar_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/interpretability_beeswarm_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/interpretability_sample1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 3 additions & 2 deletions scripts/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from rdkitclassical_desc import RDkitClassicalDescriptor
from mordred_desc import MordredDescriptor
from regressor import Regressor
from morgan_desc import MorganFingerprint

output_folder = os.path.join(root, "..", "assets")

Expand All @@ -31,7 +32,7 @@
y_valid.reset_index(drop=True, inplace=True)

# Instantiate the descriptor class
descriptor = RDkitClassicalDescriptor()
descriptor = MorganFingerprint()

descriptor.fit(smiles_train)

Expand All @@ -40,7 +41,7 @@
smiles_valid_transformed = descriptor.transform(smiles_valid)

# Instantiate the regressor
regressor = Regressor(output_folder, algorithm='xgboost', k=52)
regressor = Regressor(output_folder, algorithm='catboost')

# Train the model
regressor.fit(smiles_train_transformed, y_train)
Expand Down
52 changes: 52 additions & 0 deletions xai4chem/morgan_desc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import numpy as np
import pandas as pd
from rdkit.Chem import rdFingerprintGenerator
from sklearn.feature_selection import VarianceThreshold

from rdkit.Chem import rdMolDescriptors as rd
from rdkit import Chem

RADIUS = 3
NBITS = 2048
DTYPE = np.uint8

def clip_sparse(vect, nbits):
l = [0]*nbits
for i,v in vect.GetNonzeroElements().items():
l[i] = v if v < 255 else 255
return l


class _Fingerprinter(object):

def __init__(self):
self.nbits = NBITS
self.radius = RADIUS

def calc(self, mol):
v = rd.GetHashedMorganFingerprint(mol, radius=self.radius, nBits=self.nbits)
return clip_sparse(v, self.nbits)


def morgan_featurizer(smiles):
d = _Fingerprinter()
X = np.zeros((len(smiles), NBITS))
for i, smi in enumerate(smiles):
mol = Chem.MolFromSmiles(smi)
X[i,:] = d.calc(mol)
return X


class MorganFingerprint(object):

def __init__(self):
pass

def fit(self, smiles):
X = morgan_featurizer(smiles)
self.features = ["fp-{0}".format(i+1) for i in range(X.shape[1])]
return pd.DataFrame(X, columns=self.features)

def transform(self, smiles):
X = morgan_featurizer(smiles)
return pd.DataFrame(X, columns=self.features)
2 changes: 1 addition & 1 deletion xai4chem/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def _train_and_evaluate_optuna_model(self, model, X, y):

def fit(self, X_train, y_train, default_params=True):
self._select_features(X_train, y_train)
print(f'Selected features: {self.selected_features}')
# print(f'Selected features: {self.selected_features}')
X_train = X_train[self.selected_features]

if self.algorithm == 'xgboost':
Expand Down

0 comments on commit d2ff580

Please sign in to comment.