From 576842189722e92ff822927b90fba5b314768533 Mon Sep 17 00:00:00 2001 From: Bharath Ramsundar Date: Wed, 21 Oct 2020 15:35:46 -0700 Subject: [PATCH 1/2] Adding in a batch of new save/reload tests --- deepchem/models/tests/test_gan.py | 284 ++++++++++++++++------- deepchem/models/tests/test_gbdt_model.py | 4 +- deepchem/models/tests/test_reload.py | 79 +++++++ deepchem/models/tests/test_scscore.py | 27 +++ 4 files changed, 307 insertions(+), 87 deletions(-) diff --git a/deepchem/models/tests/test_gan.py b/deepchem/models/tests/test_gan.py index 9c75739b0d..99e1f18c2c 100644 --- a/deepchem/models/tests/test_gan.py +++ b/deepchem/models/tests/test_gan.py @@ -2,6 +2,7 @@ import numpy as np import tensorflow as tf import unittest +import tempfile from tensorflow.keras.layers import Input, Concatenate, Dense from flaky import flaky @@ -49,91 +50,204 @@ def create_discriminator(self): return tf.keras.Model(inputs=inputs, outputs=output) -class TestGAN(unittest.TestCase): - - @flaky - def test_cgan(self): - """Test fitting a conditional GAN.""" - - gan = ExampleGAN(learning_rate=0.01) - gan.fit_gan( - generate_data(gan, 500, 100), - generator_steps=0.5, - checkpoint_interval=0) - - # See if it has done a plausible job of learning the distribution. - - means = 10 * np.random.random([1000, 1]) - values = gan.predict_gan_generator(conditional_inputs=[means]) - deltas = values - means - assert abs(np.mean(deltas)) < 1.0 - assert np.std(deltas) > 1.0 - assert gan.get_global_step() == 500 - - @flaky - def test_mix_gan(self): - """Test a GAN with multiple generators and discriminators.""" - - gan = ExampleGAN(n_generators=2, n_discriminators=2, learning_rate=0.01) - gan.fit_gan( - generate_data(gan, 1000, 100), - generator_steps=0.5, - checkpoint_interval=0) - - # See if it has done a plausible job of learning the distribution. - - means = 10 * np.random.random([1000, 1]) - for i in range(2): - values = gan.predict_gan_generator( - conditional_inputs=[means], generator_index=i) - deltas = values - means - assert abs(np.mean(deltas)) < 1.0 - assert np.std(deltas) > 1.0 - assert gan.get_global_step() == 1000 - - @flaky - def test_wgan(self): - """Test fitting a conditional WGAN.""" - - class ExampleWGAN(dc.models.WGAN): - - def get_noise_input_shape(self): - return (2,) - - def get_data_input_shapes(self): - return [(1,)] - - def get_conditional_input_shapes(self): - return [(1,)] - - def create_generator(self): - noise_input = Input(self.get_noise_input_shape()) - conditional_input = Input(self.get_conditional_input_shapes()[0]) - inputs = [noise_input, conditional_input] - gen_in = Concatenate(axis=1)(inputs) - output = Dense(1)(gen_in) - return tf.keras.Model(inputs=inputs, outputs=output) - - def create_discriminator(self): - data_input = Input(self.get_data_input_shapes()[0]) - conditional_input = Input(self.get_conditional_input_shapes()[0]) - inputs = [data_input, conditional_input] - discrim_in = Concatenate(axis=1)(inputs) - dense = Dense(10, activation=tf.nn.relu)(discrim_in) - output = Dense(1)(dense) - return tf.keras.Model(inputs=inputs, outputs=output) - - # We have to set the gradient penalty very small because the generator's - # output is only a single number, so the default penalty would constrain - # it far too much. - - gan = ExampleWGAN(learning_rate=0.01, gradient_penalty=0.1) - gan.fit_gan(generate_data(gan, 1000, 100), generator_steps=0.1) - - # See if it has done a plausible job of learning the distribution. - - means = 10 * np.random.random([1000, 1]) - values = gan.predict_gan_generator(conditional_inputs=[means]) +@flaky +def test_cgan(): + """Test fitting a conditional GAN.""" + + gan = ExampleGAN(learning_rate=0.01) + gan.fit_gan( + generate_data(gan, 500, 100), generator_steps=0.5, checkpoint_interval=0) + + # See if it has done a plausible job of learning the distribution. + + means = 10 * np.random.random([1000, 1]) + values = gan.predict_gan_generator(conditional_inputs=[means]) + deltas = values - means + assert abs(np.mean(deltas)) < 1.0 + assert np.std(deltas) > 1.0 + assert gan.get_global_step() == 500 + + +@flaky +def test_cgan_reload(): + """Test reloading a conditional GAN.""" + + model_dir = tempfile.mkdtemp() + gan = ExampleGAN(learning_rate=0.01, model_dir=model_dir) + gan.fit_gan(generate_data(gan, 500, 100), generator_steps=0.5) + + # See if it has done a plausible job of learning the distribution. + means = 10 * np.random.random([1000, 1]) + batch_size = len(means) + noise_input = gan.get_noise_batch(batch_size=batch_size) + values = gan.predict_gan_generator( + noise_input=noise_input, conditional_inputs=[means]) + deltas = values - means + assert abs(np.mean(deltas)) < 1.0 + assert np.std(deltas) > 1.0 + assert gan.get_global_step() == 500 + + reloaded_gan = ExampleGAN(learning_rate=0.01, model_dir=model_dir) + reloaded_gan.restore() + reloaded_values = reloaded_gan.predict_gan_generator( + noise_input=noise_input, conditional_inputs=[means]) + + assert np.all(values == reloaded_values) + + +@flaky +def test_mix_gan_reload(): + """Test reloading a GAN with multiple generators and discriminators.""" + + model_dir = tempfile.mkdtemp() + gan = ExampleGAN( + n_generators=2, + n_discriminators=2, + learning_rate=0.01, + model_dir=model_dir) + gan.fit_gan(generate_data(gan, 1000, 100), generator_steps=0.5) + + reloaded_gan = ExampleGAN( + n_generators=2, + n_discriminators=2, + learning_rate=0.01, + model_dir=model_dir) + reloaded_gan.restore() + # See if it has done a plausible job of learning the distribution. + + means = 10 * np.random.random([1000, 1]) + batch_size = len(means) + noise_input = gan.get_noise_batch(batch_size=batch_size) + for i in range(2): + values = gan.predict_gan_generator( + noise_input=noise_input, conditional_inputs=[means], generator_index=i) + reloaded_values = reloaded_gan.predict_gan_generator( + noise_input=noise_input, conditional_inputs=[means], generator_index=i) + assert np.all(values == reloaded_values) + assert gan.get_global_step() == 1000 + # No training has been done after reload + assert reloaded_gan.get_global_step() == 0 + + +@flaky +def test_mix_gan(): + """Test a GAN with multiple generators and discriminators.""" + + gan = ExampleGAN(n_generators=2, n_discriminators=2, learning_rate=0.01) + gan.fit_gan( + generate_data(gan, 1000, 100), generator_steps=0.5, checkpoint_interval=0) + + # See if it has done a plausible job of learning the distribution. + + means = 10 * np.random.random([1000, 1]) + for i in range(2): + values = gan.predict_gan_generator( + conditional_inputs=[means], generator_index=i) deltas = values - means assert abs(np.mean(deltas)) < 1.0 assert np.std(deltas) > 1.0 + assert gan.get_global_step() == 1000 + + +@flaky +def test_wgan(): + """Test fitting a conditional WGAN.""" + + class ExampleWGAN(dc.models.WGAN): + + def get_noise_input_shape(self): + return (2,) + + def get_data_input_shapes(self): + return [(1,)] + + def get_conditional_input_shapes(self): + return [(1,)] + + def create_generator(self): + noise_input = Input(self.get_noise_input_shape()) + conditional_input = Input(self.get_conditional_input_shapes()[0]) + inputs = [noise_input, conditional_input] + gen_in = Concatenate(axis=1)(inputs) + output = Dense(1)(gen_in) + return tf.keras.Model(inputs=inputs, outputs=output) + + def create_discriminator(self): + data_input = Input(self.get_data_input_shapes()[0]) + conditional_input = Input(self.get_conditional_input_shapes()[0]) + inputs = [data_input, conditional_input] + discrim_in = Concatenate(axis=1)(inputs) + dense = Dense(10, activation=tf.nn.relu)(discrim_in) + output = Dense(1)(dense) + return tf.keras.Model(inputs=inputs, outputs=output) + + # We have to set the gradient penalty very small because the generator's + # output is only a single number, so the default penalty would constrain + # it far too much. + + gan = ExampleWGAN(learning_rate=0.01, gradient_penalty=0.1) + gan.fit_gan(generate_data(gan, 1000, 100), generator_steps=0.1) + + # See if it has done a plausible job of learning the distribution. + + means = 10 * np.random.random([1000, 1]) + values = gan.predict_gan_generator(conditional_inputs=[means]) + deltas = values - means + assert abs(np.mean(deltas)) < 1.0 + assert np.std(deltas) > 1.0 + + +@flaky +def test_wgan_reload(): + """Test fitting a conditional WGAN.""" + + class ExampleWGAN(dc.models.WGAN): + + def get_noise_input_shape(self): + return (2,) + + def get_data_input_shapes(self): + return [(1,)] + + def get_conditional_input_shapes(self): + return [(1,)] + + def create_generator(self): + noise_input = Input(self.get_noise_input_shape()) + conditional_input = Input(self.get_conditional_input_shapes()[0]) + inputs = [noise_input, conditional_input] + gen_in = Concatenate(axis=1)(inputs) + output = Dense(1)(gen_in) + return tf.keras.Model(inputs=inputs, outputs=output) + + def create_discriminator(self): + data_input = Input(self.get_data_input_shapes()[0]) + conditional_input = Input(self.get_conditional_input_shapes()[0]) + inputs = [data_input, conditional_input] + discrim_in = Concatenate(axis=1)(inputs) + dense = Dense(10, activation=tf.nn.relu)(discrim_in) + output = Dense(1)(dense) + return tf.keras.Model(inputs=inputs, outputs=output) + + # We have to set the gradient penalty very small because the generator's + # output is only a single number, so the default penalty would constrain + # it far too much. + + model_dir = tempfile.mkdtemp() + gan = ExampleWGAN( + learning_rate=0.01, gradient_penalty=0.1, model_dir=model_dir) + gan.fit_gan(generate_data(gan, 1000, 100), generator_steps=0.1) + + reloaded_gan = ExampleWGAN( + learning_rate=0.01, gradient_penalty=0.1, model_dir=model_dir) + reloaded_gan.restore() + + # See if it has done a plausible job of learning the distribution. + means = 10 * np.random.random([1000, 1]) + batch_size = len(means) + noise_input = gan.get_noise_batch(batch_size=batch_size) + values = gan.predict_gan_generator( + noise_input=noise_input, conditional_inputs=[means]) + reloaded_values = reloaded_gan.predict_gan_generator( + noise_input=noise_input, conditional_inputs=[means]) + assert np.all(values == reloaded_values) diff --git a/deepchem/models/tests/test_gbdt_model.py b/deepchem/models/tests/test_gbdt_model.py index 51d31a97f2..c695427bf5 100644 --- a/deepchem/models/tests/test_gbdt_model.py +++ b/deepchem/models/tests/test_gbdt_model.py @@ -13,7 +13,7 @@ import deepchem as dc -def test_signletask_regression_with_xgboost(): +def test_singletask_regression_with_xgboost(): np.random.seed(123) # prepare dataset @@ -41,7 +41,7 @@ def test_signletask_regression_with_xgboost(): assert scores[regression_metric.name] < 55 -def test_signletask_regression_with_lightgbm(): +def test_singletask_regression_with_lightgbm(): np.random.seed(123) # prepare dataset diff --git a/deepchem/models/tests/test_reload.py b/deepchem/models/tests/test_reload.py index 136374ec9f..0866c48c9f 100644 --- a/deepchem/models/tests/test_reload.py +++ b/deepchem/models/tests/test_reload.py @@ -1043,3 +1043,82 @@ def test_DTNN_regression_reload(): origpred = model.predict(dataset) reloadpred = reloaded_model.predict(dataset) assert np.all(origpred == reloadpred) + + +def generate_sequences(sequence_length, num_sequences): + for i in range(num_sequences): + seq = [ + np.random.randint(10) + for x in range(np.random.randint(1, sequence_length + 1)) + ] + yield (seq, seq) + + +def test_seq2seq_reload(): + """Test reloading for seq2seq models.""" + + sequence_length = 8 + tokens = list(range(10)) + model_dir = tempfile.mkdtemp() + s = dc.models.SeqToSeq( + tokens, + tokens, + sequence_length, + encoder_layers=2, + decoder_layers=2, + embedding_dimension=150, + learning_rate=0.01, + dropout=0.1, + model_dir=model_dir) + + # Train the model on random sequences. We aren't training long enough to + # really make it reliable, but I want to keep this test fast, and it should + # still be able to reproduce a reasonable fraction of input sequences. + + s.fit_sequences(generate_sequences(sequence_length, 25000)) + + # Test it out. + + tests = [seq for seq, target in generate_sequences(sequence_length, 50)] + pred1 = s.predict_from_sequences(tests, beam_width=1) + pred4 = s.predict_from_sequences(tests, beam_width=4) + + reloaded_s = dc.models.SeqToSeq( + tokens, + tokens, + sequence_length, + encoder_layers=2, + decoder_layers=2, + embedding_dimension=150, + learning_rate=0.01, + dropout=0.1, + model_dir=model_dir) + reloaded_s.restore() + + reloaded_pred1 = reloaded_s.predict_from_sequences(tests, beam_width=1) + assert len(pred1) == len(reloaded_pred1) + for (p1, r1) in zip(pred1, reloaded_pred1): + assert p1 == r1 + reloaded_pred4 = reloaded_s.predict_from_sequences(tests, beam_width=4) + assert len(pred4) == len(reloaded_pred4) + for (p4, r4) in zip(pred4, reloaded_pred4): + assert p4 == r4 + embeddings = s.predict_embeddings(tests) + pred1e = s.predict_from_embeddings(embeddings, beam_width=1) + pred4e = s.predict_from_embeddings(embeddings, beam_width=4) + + reloaded_embeddings = reloaded_s.predict_embeddings(tests) + reloaded_pred1e = reloaded_s.predict_from_embeddings( + reloaded_embeddings, beam_width=1) + reloaded_pred4e = reloaded_s.predict_from_embeddings( + reloaded_embeddings, beam_width=4) + + assert np.all(embeddings == reloaded_embeddings) + + assert len(pred1e) == len(reloaded_pred1e) + for (p1e, r1e) in zip(pred1e, reloaded_pred1e): + assert p1e == r1e + + assert len(pred4e) == len(reloaded_pred4e) + for (p4e, r4e) in zip(pred4e, reloaded_pred4e): + assert p4e == r4e diff --git a/deepchem/models/tests/test_scscore.py b/deepchem/models/tests/test_scscore.py index 87461b1ab2..89be00886c 100644 --- a/deepchem/models/tests/test_scscore.py +++ b/deepchem/models/tests/test_scscore.py @@ -23,3 +23,30 @@ def test_overfit_scscore(self): model.fit(dataset, nb_epoch=100) pred = model.predict(dataset) assert np.array_equal(y, pred[0] > pred[1]) + + +def test_scscore_reload(): + """Test reloading of ScScoreModel""" + n_samples = 10 + n_features = 3 + n_tasks = 1 + + # Create a dataset and an input function for processing it. + + X = np.random.rand(n_samples, 2, n_features) + y = np.random.randint(2, size=(n_samples, n_tasks)) + dataset = dc.data.NumpyDataset(X, y) + + model_dir = tempfile.mkdtemp() + model = dc.models.ScScoreModel(n_features, dropouts=0, model_dir=model_dir) + model.fit(dataset, nb_epoch=100) + pred = model.predict(dataset) + assert np.array_equal(y, pred[0] > pred[1]) + + reloaded_model = dc.models.ScScoreModel( + n_features, dropouts=0, model_dir=model_dir) + reloaded_model.restore() + reloaded_pred = reloaded_model.predict(dataset) + assert len(pred) == len(reloaded_pred) + for p, r in zip(pred, reloaded_pred): + assert np.all(p == r) From 4019d5526562c0c53aa56da587f43eec9e1fd1df Mon Sep 17 00:00:00 2001 From: Bharath Ramsundar Date: Wed, 21 Oct 2020 18:37:53 -0700 Subject: [PATCH 2/2] Fixing scscore --- deepchem/models/tests/test_scscore.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/deepchem/models/tests/test_scscore.py b/deepchem/models/tests/test_scscore.py index 89be00886c..1576ae86d4 100644 --- a/deepchem/models/tests/test_scscore.py +++ b/deepchem/models/tests/test_scscore.py @@ -1,6 +1,6 @@ import unittest - -import deepchem +import tempfile +import deepchem as dc import numpy as np @@ -16,9 +16,9 @@ def test_overfit_scscore(self): X = np.random.rand(n_samples, 2, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) - dataset = deepchem.data.NumpyDataset(X, y) + dataset = dc.data.NumpyDataset(X, y) - model = deepchem.models.ScScoreModel(n_features, dropouts=0) + model = dc.models.ScScoreModel(n_features, dropouts=0) model.fit(dataset, nb_epoch=100) pred = model.predict(dataset)