From 55e3df948d67754392c28a230747078a40636a1c Mon Sep 17 00:00:00 2001 From: Bharath Ramsundar Date: Thu, 15 Oct 2020 23:50:11 -0700 Subject: [PATCH 1/3] First steps to reload test --- deepchem/models/tests/test_reload.py | 48 ++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/deepchem/models/tests/test_reload.py b/deepchem/models/tests/test_reload.py index 9b5de9511b..8221c65e64 100644 --- a/deepchem/models/tests/test_reload.py +++ b/deepchem/models/tests/test_reload.py @@ -11,6 +11,7 @@ from flaky import flaky from sklearn.ensemble import RandomForestClassifier from deepchem.molnet.load_function.chembl25_datasets import chembl25_tasks +from deepchem.feat import create_char_to_idx def test_sklearn_classifier_reload(): @@ -967,3 +968,50 @@ def test_chemception_reload(): origpred = model.predict(predset) reloadpred = reloaded_model.predict(predset) assert np.all(origpred == reloadpred) + + +def test_smiles2vec_reload(): + """Test that smiles2vec models can be saved and reloaded.""" + max_len = 250 + pad_len = 10 + char_to_idx = create_char_to_idx( + dataset_file, max_len=max_len, smiles_field="smiles") + feat = dc.feat.SmilesToSeq( + char_to_idx=char_to_idx, max_len=max_len, pad_len=pad_len) + + n_tasks = 1 + data_points = 10 + mols = ["CCCCCCCC"] * data_points + X = featurizer(mols) + + y = np.random.randint(0, 2, size=(data_points, n_tasks)) + w = np.ones(shape=(data_points, n_tasks)) + dataset = dc.data.NumpyDataset(X, y, w, mols) + classsification_metric = dc.metrics.Metric( + dc.metrics.roc_auc_score, np.mean, mode="classification") + + model_dir = tempfile.mkdtemp() + model = Smiles2Vec( + char_to_idx=char_to_idx, + max_seq_len=max_seq_len, + use_conv=True, + n_tasks=n_tasks, + model_dir=model_dir, + mode="classification") + model.fit(dataset, nb_epoch=3) + + ## Reload Trained Model + #reloaded_model = dc.models.ChemCeption( + # n_tasks=n_tasks, + # img_spec="engd", + # model_dir=model_dir, + # mode="classification") + #reloaded_model.restore() + + ## Check predictions match on random sample + #predmols = ["CCCC", "CCCCCO", "CCCCC"] + #Xpred = featurizer(predmols) + #predset = dc.data.NumpyDataset(Xpred) + #origpred = model.predict(predset) + #reloadpred = reloaded_model.predict(predset) + #assert np.all(origpred == reloadpred) From 8a015062717a7eee6d6325e06ed9fc4d369fedc6 Mon Sep 17 00:00:00 2001 From: Bharath Ramsundar Date: Tue, 20 Oct 2020 19:27:30 -0700 Subject: [PATCH 2/3] Getting some more tests in --- deepchem/models/layers.py | 60 +++- deepchem/models/tests/test_reload.py | 400 +++++++++++++++------------ deepchem/models/text_cnn.py | 42 ++- 3 files changed, 301 insertions(+), 201 deletions(-) diff --git a/deepchem/models/layers.py b/deepchem/models/layers.py index 2114bc8afd..4b8dc4346c 100644 --- a/deepchem/models/layers.py +++ b/deepchem/models/layers.py @@ -2686,7 +2686,15 @@ def get_config(self): return config def build(self, input_shape): - init = initializers.get(self.init) + + def init(input_shape): + return self.add_weight( + name='kernel', + shape=(input_shape[0], input_shape[1]), + initializer=self.init, + trainable=True) + + #init = initializers.get(self.init) self.embedding_list = init([self.periodic_table_length, self.n_embedding]) self.built = True @@ -2739,7 +2747,15 @@ def get_config(self): return config def build(self, input_shape): - init = initializers.get(self.init) + + def init(input_shape): + return self.add_weight( + name='kernel', + shape=(input_shape[0], input_shape[1]), + initializer=self.init, + trainable=True) + + #init = initializers.get(self.init) self.W_cf = init([self.n_embedding, self.n_hidden]) self.W_df = init([self.n_distance, self.n_hidden]) self.W_fc = init([self.n_hidden, self.n_embedding]) @@ -2824,7 +2840,15 @@ def get_config(self): def build(self, input_shape): self.W_list = [] self.b_list = [] - init = initializers.get(self.init) + + def init(input_shape): + return self.add_weight( + name='kernel', + shape=(input_shape[0], input_shape[1]), + initializer=self.init, + trainable=True) + + #init = initializers.get(self.init) prev_layer_size = self.n_embedding for i, layer_size in enumerate(self.layer_sizes): self.W_list.append(init([prev_layer_size, layer_size])) @@ -3230,9 +3254,17 @@ def get_config(self): return config def build(self, input_shape): + + def init(input_shape): + return self.add_weight( + name='kernel', + shape=(input_shape[0], input_shape[1]), + initializer=self.init, + trainable=True) + n_pair_features = self.n_pair_features n_hidden = self.n_hidden - init = initializers.get(self.init) + #init = initializers.get(self.init) self.W = init([n_pair_features, n_hidden * n_hidden]) self.b = backend.zeros(shape=(n_hidden * n_hidden,)) self.built = True @@ -3262,7 +3294,15 @@ def get_config(self): def build(self, input_shape): n_hidden = self.n_hidden - init = initializers.get(self.init) + + def init(input_shape): + return self.add_weight( + name='kernel', + shape=(input_shape[0], input_shape[1]), + initializer=self.init, + trainable=True) + + #init = initializers.get(self.init) self.Wz = init([n_hidden, n_hidden]) self.Wr = init([n_hidden, n_hidden]) self.Wh = init([n_hidden, n_hidden]) @@ -3317,7 +3357,15 @@ def get_config(self): return config def build(self, input_shape): - init = initializers.get(self.init) + + def init(input_shape): + return self.add_weight( + name='kernel', + shape=(input_shape[0], input_shape[1]), + initializer=self.init, + trainable=True) + + #init = initializers.get(self.init) self.U = init((2 * self.n_hidden, 4 * self.n_hidden)) self.b = tf.Variable( np.concatenate((np.zeros(self.n_hidden), np.ones(self.n_hidden), diff --git a/deepchem/models/tests/test_reload.py b/deepchem/models/tests/test_reload.py index 8221c65e64..58009b9e70 100644 --- a/deepchem/models/tests/test_reload.py +++ b/deepchem/models/tests/test_reload.py @@ -8,6 +8,7 @@ import numpy as np import deepchem as dc import tensorflow as tf +import scipy from flaky import flaky from sklearn.ensemble import RandomForestClassifier from deepchem.molnet.load_function.chembl25_datasets import chembl25_tasks @@ -528,7 +529,6 @@ def test_DAG_regression_reload(): np.random.seed(123) tf.random.set_seed(123) n_tasks = 1 - #current_dir = os.path.dirname(os.path.abspath(__file__)) # Load mini log-solubility dataset. featurizer = dc.feat.ConvMolFeaturizer() @@ -656,158 +656,147 @@ def test_weave_classification_reload(): assert scores[classification_metric.name] > .6 -# TODO: THIS IS FAILING! -#def test_MPNN_regression_reload(): -# """Test MPNN can reload datasets.""" -# np.random.seed(123) -# tf.random.set_seed(123) -# n_tasks = 1 -# -# # Load mini log-solubility dataset. -# featurizer = dc.feat.WeaveFeaturizer() -# tasks = ["outcome"] -# mols = ["C", "CO", "CC"] -# n_samples = len(mols) -# X = featurizer(mols) -# y = np.random.rand(n_samples, n_tasks) -# dataset = dc.data.NumpyDataset(X, y) -# -# regression_metric = dc.metrics.Metric( -# dc.metrics.pearson_r2_score, task_averager=np.mean) -# -# n_atom_feat = 75 -# n_pair_feat = 14 -# batch_size = 10 -# model_dir = tempfile.mkdtemp() -# model = dc.models.MPNNModel( -# n_tasks, -# n_atom_feat=n_atom_feat, -# n_pair_feat=n_pair_feat, -# T=2, -# M=3, -# batch_size=batch_size, -# learning_rate=0.001, -# use_queue=False, -# mode="regression", -# model_dir=model_dir) -# -# # Fit trained model -# model.fit(dataset, nb_epoch=50) -# -# # Eval model on train -# scores = model.evaluate(dataset, [regression_metric]) -# assert scores[regression_metric.name] > .8 -# -# # Custom save -# save_dir = tempfile.mkdtemp() -# model.model.save(save_dir) -# -# from tensorflow import keras -# reloaded = keras.models.load_model(save_dir) -# -# # Reload trained model -# reloaded_model = dc.models.MPNNModel( -# n_tasks, -# n_atom_feat=n_atom_feat, -# n_pair_feat=n_pair_feat, -# T=2, -# M=3, -# batch_size=batch_size, -# learning_rate=0.001, -# use_queue=False, -# mode="regression", -# model_dir=model_dir) -# #reloaded_model.restore() -# reloaded_model.model = reloaded -# -# # Eval model on train -# scores = reloaded_model.evaluate(dataset, [regression_metric]) -# assert scores[regression_metric.name] > .8 -# -# # Check predictions match on random sample -# predmols = ["CCCC", "CCCCCO", "CCCCC"] -# Xpred = featurizer(predmols) -# predset = dc.data.NumpyDataset(Xpred) -# origpred = model.predict(predset) -# reloadpred = reloaded_model.predict(predset) -# print("np.amax(origpred - reloadpred)") -# print(np.amax(origpred - reloadpred)) -# assert np.all(origpred == reloadpred) +def test_MPNN_regression_reload(): + """Test MPNN can reload datasets.""" + np.random.seed(123) + tf.random.set_seed(123) + n_tasks = 1 -## TODO: THIS IS FAILING! -#def test_textCNN_classification_reload(): -# """Test textCNN model reloadinng.""" -# np.random.seed(123) -# tf.random.set_seed(123) -# n_tasks = 1 -# -# featurizer = dc.feat.RawFeaturizer() -# tasks = ["outcome"] -# mols = ["C", "CO", "CC"] -# n_samples = len(mols) -# X = featurizer(mols) -# y = np.random.randint(2, size=(n_samples, n_tasks)) -# dataset = dc.data.NumpyDataset(X, y, ids=mols) -# -# classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score) -# -# char_dict, length = dc.models.TextCNNModel.build_char_dict(dataset) -# batch_size = 3 -# -# model_dir = tempfile.mkdtemp() -# model = dc.models.TextCNNModel( -# n_tasks, -# char_dict, -# seq_length=length, -# batch_size=batch_size, -# learning_rate=0.001, -# use_queue=False, -# mode="classification", -# model_dir=model_dir) -# -# # Fit trained model -# model.fit(dataset, nb_epoch=200) -# -# # Eval model on train -# scores = model.evaluate(dataset, [classification_metric]) -# assert scores[classification_metric.name] > .8 -# -# # Reload trained model -# reloaded_model = dc.models.TextCNNModel( -# n_tasks, -# char_dict, -# seq_length=length, -# batch_size=batch_size, -# learning_rate=0.001, -# use_queue=False, -# mode="classification", -# model_dir=model_dir) -# reloaded_model.restore() -# -# assert len(reloaded_model.model.get_weights()) == len( -# model.model.get_weights()) -# for (reloaded, orig) in zip(reloaded_model.model.get_weights(), -# model.model.get_weights()): -# assert np.all(reloaded == orig) -# -# # Check predictions match on random sample -# predmols = ["CCCC", "CCCCCO", "CCCCC"] -# Xpred = featurizer(predmols) -# predset = dc.data.NumpyDataset(Xpred, ids=predmols) -# origpred = model.predict(predset) -# reloadpred = reloaded_model.predict(predset) -# -# Xproc = reloaded_model.smiles_to_seq_batch(np.array(predmols)) -# reloadout = reloaded_model.model(Xproc) -# origout = model.model(Xproc) -# -# assert len(model.model.layers) == len(reloaded_model.model.layers) -# -# assert np.all(origpred == reloadpred) -# -# # Eval model on train -# scores = reloaded_model.evaluate(dataset, [classification_metric]) -# assert scores[classification_metric.name] > .8 + # Load mini log-solubility dataset. + featurizer = dc.feat.WeaveFeaturizer() + tasks = ["outcome"] + mols = ["C", "CO", "CC"] + n_samples = len(mols) + X = featurizer(mols) + y = np.random.rand(n_samples, n_tasks) + dataset = dc.data.NumpyDataset(X, y) + + regression_metric = dc.metrics.Metric( + dc.metrics.pearson_r2_score, task_averager=np.mean) + + n_atom_feat = 75 + n_pair_feat = 14 + batch_size = 10 + model_dir = tempfile.mkdtemp() + model = dc.models.MPNNModel( + n_tasks, + n_atom_feat=n_atom_feat, + n_pair_feat=n_pair_feat, + T=2, + M=3, + batch_size=batch_size, + learning_rate=0.001, + use_queue=False, + mode="regression", + model_dir=model_dir) + + # Fit trained model + model.fit(dataset, nb_epoch=50) + + # Eval model on train + scores = model.evaluate(dataset, [regression_metric]) + assert scores[regression_metric.name] > .8 + + # Reload trained model + reloaded_model = dc.models.MPNNModel( + n_tasks, + n_atom_feat=n_atom_feat, + n_pair_feat=n_pair_feat, + T=2, + M=3, + batch_size=batch_size, + learning_rate=0.001, + use_queue=False, + mode="regression", + model_dir=model_dir) + reloaded_model.restore() + + # Eval model on train + scores = reloaded_model.evaluate(dataset, [regression_metric]) + assert scores[regression_metric.name] > .8 + + # Check predictions match on random sample + predmols = ["CCCC", "CCCCCO", "CCCCC"] + Xpred = featurizer(predmols) + predset = dc.data.NumpyDataset(Xpred) + origpred = model.predict(predset) + reloadpred = reloaded_model.predict(predset) + assert np.all(origpred == reloadpred) + + +def test_textCNN_classification_reload(): + """Test textCNN model reloadinng.""" + np.random.seed(123) + tf.random.set_seed(123) + n_tasks = 1 + + featurizer = dc.feat.RawFeaturizer() + tasks = ["outcome"] + mols = ["C", "CO", "CC"] + n_samples = len(mols) + X = featurizer(mols) + y = np.random.randint(2, size=(n_samples, n_tasks)) + dataset = dc.data.NumpyDataset(X, y, ids=mols) + + classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score) + + char_dict, length = dc.models.TextCNNModel.build_char_dict(dataset) + batch_size = 3 + + model_dir = tempfile.mkdtemp() + model = dc.models.TextCNNModel( + n_tasks, + char_dict, + seq_length=length, + batch_size=batch_size, + learning_rate=0.001, + use_queue=False, + mode="classification", + model_dir=model_dir) + + # Fit trained model + model.fit(dataset, nb_epoch=200) + + # Eval model on train + scores = model.evaluate(dataset, [classification_metric]) + assert scores[classification_metric.name] > .8 + + # Reload trained model + reloaded_model = dc.models.TextCNNModel( + n_tasks, + char_dict, + seq_length=length, + batch_size=batch_size, + learning_rate=0.001, + use_queue=False, + mode="classification", + model_dir=model_dir) + reloaded_model.restore() + + assert len(reloaded_model.model.get_weights()) == len( + model.model.get_weights()) + for (reloaded, orig) in zip(reloaded_model.model.get_weights(), + model.model.get_weights()): + assert np.all(reloaded == orig) + + # Check predictions match on random sample + predmols = ["CCCC", "CCCCCO", "CCCCC"] + Xpred = featurizer(predmols) + predset = dc.data.NumpyDataset(Xpred, ids=predmols) + origpred = model.predict(predset) + reloadpred = reloaded_model.predict(predset) + + Xproc = reloaded_model.smiles_to_seq_batch(np.array(predmols)) + reloadout = reloaded_model.model(Xproc) + origout = model.model(Xproc) + + assert len(model.model.layers) == len(reloaded_model.model.layers) + + assert np.all(origpred == reloadpred) + + # Eval model on train + scores = reloaded_model.evaluate(dataset, [classification_metric]) + assert scores[classification_metric.name] > .8 def test_1d_cnn_regression_reload(): @@ -865,7 +854,7 @@ def test_1d_cnn_regression_reload(): assert scores[regression_metric.name] < 0.1 -## TODO: THIS IS FAILING! +### TODO: THIS IS FAILING! #def test_graphconvmodel_reload(): # featurizer = dc.feat.ConvMolFeaturizer() # tasks = ["outcome"] @@ -892,12 +881,6 @@ def test_1d_cnn_regression_reload(): # scores = model.evaluate(dataset, [classification_metric]) # assert scores[classification_metric.name] >= 0.9 # -# # Custom save -# save_dir = tempfile.mkdtemp() -# model.model.save(save_dir) -# -# from tensorflow import keras -# reloaded = keras.models.load_model(save_dir) # # # Reload trained Model # reloaded_model = dc.models.GraphConvModel( @@ -914,7 +897,7 @@ def test_1d_cnn_regression_reload(): # predset = dc.data.NumpyDataset(Xpred) # origpred = model.predict(predset) # reloadpred = reloaded_model.predict(predset) -# #assert np.all(origpred == reloadpred) +# assert np.all(origpred == reloadpred) # # # Try re-restore # reloaded_model.restore() @@ -970,28 +953,35 @@ def test_chemception_reload(): assert np.all(origpred == reloadpred) +# TODO: This test is a little awkward. The Smiles2Vec model awkwardly depends on a dataset_file being available on disk. This needs to be cleaned up to match the standard model handling API. def test_smiles2vec_reload(): """Test that smiles2vec models can be saved and reloaded.""" + dataset_file = os.path.join(os.path.dirname(__file__), "chembl_25_small.csv") max_len = 250 pad_len = 10 + max_seq_len = 20 char_to_idx = create_char_to_idx( dataset_file, max_len=max_len, smiles_field="smiles") feat = dc.feat.SmilesToSeq( char_to_idx=char_to_idx, max_len=max_len, pad_len=pad_len) - n_tasks = 1 + n_tasks = 5 data_points = 10 - mols = ["CCCCCCCC"] * data_points - X = featurizer(mols) + loader = dc.data.CSVLoader( + tasks=chembl25_tasks, smiles_field='smiles', featurizer=feat) + dataset = loader.create_dataset( + inputs=[dataset_file], shard_size=10000, data_dir=tempfile.mkdtemp()) y = np.random.randint(0, 2, size=(data_points, n_tasks)) w = np.ones(shape=(data_points, n_tasks)) - dataset = dc.data.NumpyDataset(X, y, w, mols) + dataset = dc.data.NumpyDataset(dataset.X[:data_points, :max_seq_len], y, w, + dataset.ids[:data_points]) + classsification_metric = dc.metrics.Metric( dc.metrics.roc_auc_score, np.mean, mode="classification") model_dir = tempfile.mkdtemp() - model = Smiles2Vec( + model = dc.models.Smiles2Vec( char_to_idx=char_to_idx, max_seq_len=max_seq_len, use_conv=True, @@ -1000,18 +990,68 @@ def test_smiles2vec_reload(): mode="classification") model.fit(dataset, nb_epoch=3) - ## Reload Trained Model - #reloaded_model = dc.models.ChemCeption( - # n_tasks=n_tasks, - # img_spec="engd", - # model_dir=model_dir, - # mode="classification") - #reloaded_model.restore() - - ## Check predictions match on random sample - #predmols = ["CCCC", "CCCCCO", "CCCCC"] - #Xpred = featurizer(predmols) - #predset = dc.data.NumpyDataset(Xpred) - #origpred = model.predict(predset) - #reloadpred = reloaded_model.predict(predset) - #assert np.all(origpred == reloadpred) + # Reload Trained Model + reloaded_model = dc.models.Smiles2Vec( + char_to_idx=char_to_idx, + max_seq_len=max_seq_len, + use_conv=True, + n_tasks=n_tasks, + model_dir=model_dir, + mode="classification") + reloaded_model.restore() + + # Check predictions match on original dataset + origpred = model.predict(dataset) + reloadpred = reloaded_model.predict(dataset) + assert np.all(origpred == reloadpred) + + +# TODO: We need a cleaner usage example for this +def test_DTNN_regression_reload(): + """Test DTNN can reload datasets.""" + np.random.seed(123) + tf.random.set_seed(123) + n_tasks = 1 + + current_dir = os.path.dirname(os.path.abspath(__file__)) + input_file = os.path.join(current_dir, "example_DTNN.mat") + dataset = scipy.io.loadmat(input_file) + X = dataset['X'] + y = dataset['T'] + w = np.ones_like(y) + dataset = dc.data.NumpyDataset(X, y, w, ids=None) + n_tasks = y.shape[1] + + regression_metric = dc.metrics.Metric( + dc.metrics.pearson_r2_score, task_averager=np.mean) + + model_dir = tempfile.mkdtemp() + model = dc.models.DTNNModel( + n_tasks, + n_embedding=20, + n_distance=100, + learning_rate=1.0, + model_dir=model_dir, + mode="regression") + + # Fit trained model + model.fit(dataset, nb_epoch=250) + + # Eval model on train + pred = model.predict(dataset) + mean_rel_error = np.mean(np.abs(1 - pred / y)) + assert mean_rel_error < 0.2 + + reloaded_model = dc.models.DTNNModel( + n_tasks, + n_embedding=20, + n_distance=100, + learning_rate=1.0, + model_dir=model_dir, + mode="regression") + reloaded_model.restore() + + # Check predictions match on random sample + origpred = model.predict(dataset) + reloadpred = reloaded_model.predict(dataset) + assert np.all(origpred == reloadpred) diff --git a/deepchem/models/text_cnn.py b/deepchem/models/text_cnn.py index 30ee965f5e..e99917ec60 100644 --- a/deepchem/models/text_cnn.py +++ b/deepchem/models/text_cnn.py @@ -54,24 +54,36 @@ class TextCNNModel(KerasModel): """ A Convolutional neural network on smiles strings - Reimplementation of the discriminator module in ORGAN: https://arxiv.org/abs/1705.10843 - Originated from: http://emnlp2014.org/papers/pdf/EMNLP2014181.pdf - This model applies multiple 1D convolutional filters to the padded strings, - then max-over-time pooling is applied on all filters, extracting one feature per filter. - All features are concatenated and transformed through several hidden layers to form predictions. + Reimplementation of the discriminator module in ORGAN [1]_ . + Originated from [2]_. - This model is initially developed for sentence-level classification tasks, with - words represented as vectors. In this implementation, SMILES strings are dissected - into characters and transformed to one-hot vectors in a similar way. The model can - be used for general molecular-level classification or regression tasks. It is also - used in the ORGAN model as discriminator. + This model applies multiple 1D convolutional filters to + the padded strings, then max-over-time pooling is applied on + all filters, extracting one feature per filter. All + features are concatenated and transformed through several + hidden layers to form predictions. - Training of the model only requires SMILES strings input, all featurized datasets - that include SMILES in the `ids` attribute are accepted. PDBbind, QM7 and QM7b - are not supported. To use the model, `build_char_dict` should be called first - before defining the model to build character dict of input dataset, example can - be found in examples/delaney/delaney_textcnn.py + This model is initially developed for sentence-level + classification tasks, with words represented as vectors. In + this implementation, SMILES strings are dissected into + characters and transformed to one-hot vectors in a similar + way. The model can be used for general molecular-level + classification or regression tasks. It is also used in the + ORGAN model as discriminator. + + Training of the model only requires SMILES strings input, + all featurized datasets that include SMILES in the `ids` + attribute are accepted. PDBbind, QM7 and QM7b are not + supported. To use the model, `build_char_dict` should be + called first before defining the model to build character + dict of input dataset, example can be found in + examples/delaney/delaney_textcnn.py + + References + ---------- + .. [1] Guimaraes, Gabriel Lima, et al. "Objective-reinforced generative adversarial networks (ORGAN) for sequence generation models." arXiv preprint arXiv:1705.10843 (2017). + .. [2] Kim, Yoon. "Convolutional neural networks for sentence classification." arXiv preprint arXiv:1408.5882 (2014). """ From a01e688d895f47faa0b9d149e61df12de543e1c5 Mon Sep 17 00:00:00 2001 From: Bharath Ramsundar Date: Tue, 20 Oct 2020 19:39:38 -0700 Subject: [PATCH 3/3] Cleaning up --- deepchem/models/layers.py | 6 ------ deepchem/models/tests/test_reload.py | 15 +++++---------- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/deepchem/models/layers.py b/deepchem/models/layers.py index 4b8dc4346c..7155b8f9c3 100644 --- a/deepchem/models/layers.py +++ b/deepchem/models/layers.py @@ -2694,7 +2694,6 @@ def init(input_shape): initializer=self.init, trainable=True) - #init = initializers.get(self.init) self.embedding_list = init([self.periodic_table_length, self.n_embedding]) self.built = True @@ -2755,7 +2754,6 @@ def init(input_shape): initializer=self.init, trainable=True) - #init = initializers.get(self.init) self.W_cf = init([self.n_embedding, self.n_hidden]) self.W_df = init([self.n_distance, self.n_hidden]) self.W_fc = init([self.n_hidden, self.n_embedding]) @@ -2848,7 +2846,6 @@ def init(input_shape): initializer=self.init, trainable=True) - #init = initializers.get(self.init) prev_layer_size = self.n_embedding for i, layer_size in enumerate(self.layer_sizes): self.W_list.append(init([prev_layer_size, layer_size])) @@ -3264,7 +3261,6 @@ def init(input_shape): n_pair_features = self.n_pair_features n_hidden = self.n_hidden - #init = initializers.get(self.init) self.W = init([n_pair_features, n_hidden * n_hidden]) self.b = backend.zeros(shape=(n_hidden * n_hidden,)) self.built = True @@ -3302,7 +3298,6 @@ def init(input_shape): initializer=self.init, trainable=True) - #init = initializers.get(self.init) self.Wz = init([n_hidden, n_hidden]) self.Wr = init([n_hidden, n_hidden]) self.Wh = init([n_hidden, n_hidden]) @@ -3365,7 +3360,6 @@ def init(input_shape): initializer=self.init, trainable=True) - #init = initializers.get(self.init) self.U = init((2 * self.n_hidden, 4 * self.n_hidden)) self.b = tf.Variable( np.concatenate((np.zeros(self.n_hidden), np.ones(self.n_hidden), diff --git a/deepchem/models/tests/test_reload.py b/deepchem/models/tests/test_reload.py index 58009b9e70..39f000702c 100644 --- a/deepchem/models/tests/test_reload.py +++ b/deepchem/models/tests/test_reload.py @@ -773,6 +773,10 @@ def test_textCNN_classification_reload(): model_dir=model_dir) reloaded_model.restore() + # Eval model on train + scores = reloaded_model.evaluate(dataset, [classification_metric]) + assert scores[classification_metric.name] > .8 + assert len(reloaded_model.model.get_weights()) == len( model.model.get_weights()) for (reloaded, orig) in zip(reloaded_model.model.get_weights(), @@ -785,18 +789,9 @@ def test_textCNN_classification_reload(): predset = dc.data.NumpyDataset(Xpred, ids=predmols) origpred = model.predict(predset) reloadpred = reloaded_model.predict(predset) - - Xproc = reloaded_model.smiles_to_seq_batch(np.array(predmols)) - reloadout = reloaded_model.model(Xproc) - origout = model.model(Xproc) - - assert len(model.model.layers) == len(reloaded_model.model.layers) - assert np.all(origpred == reloadpred) - # Eval model on train - scores = reloaded_model.evaluate(dataset, [classification_metric]) - assert scores[classification_metric.name] > .8 + assert len(model.model.layers) == len(reloaded_model.model.layers) def test_1d_cnn_regression_reload():