Skip to content

Commit

Permalink
Merge 4019d55 into e06055e
Browse files Browse the repository at this point in the history
  • Loading branch information
rbharath committed Oct 22, 2020
2 parents e06055e + 4019d55 commit 34e4f88
Show file tree
Hide file tree
Showing 4 changed files with 311 additions and 91 deletions.
284 changes: 199 additions & 85 deletions deepchem/models/tests/test_gan.py
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import tensorflow as tf
import unittest
import tempfile
from tensorflow.keras.layers import Input, Concatenate, Dense
from flaky import flaky

Expand Down Expand Up @@ -49,91 +50,204 @@ def create_discriminator(self):
return tf.keras.Model(inputs=inputs, outputs=output)


class TestGAN(unittest.TestCase):

@flaky
def test_cgan(self):
"""Test fitting a conditional GAN."""

gan = ExampleGAN(learning_rate=0.01)
gan.fit_gan(
generate_data(gan, 500, 100),
generator_steps=0.5,
checkpoint_interval=0)

# See if it has done a plausible job of learning the distribution.

means = 10 * np.random.random([1000, 1])
values = gan.predict_gan_generator(conditional_inputs=[means])
deltas = values - means
assert abs(np.mean(deltas)) < 1.0
assert np.std(deltas) > 1.0
assert gan.get_global_step() == 500

@flaky
def test_mix_gan(self):
"""Test a GAN with multiple generators and discriminators."""

gan = ExampleGAN(n_generators=2, n_discriminators=2, learning_rate=0.01)
gan.fit_gan(
generate_data(gan, 1000, 100),
generator_steps=0.5,
checkpoint_interval=0)

# See if it has done a plausible job of learning the distribution.

means = 10 * np.random.random([1000, 1])
for i in range(2):
values = gan.predict_gan_generator(
conditional_inputs=[means], generator_index=i)
deltas = values - means
assert abs(np.mean(deltas)) < 1.0
assert np.std(deltas) > 1.0
assert gan.get_global_step() == 1000

@flaky
def test_wgan(self):
"""Test fitting a conditional WGAN."""

class ExampleWGAN(dc.models.WGAN):

def get_noise_input_shape(self):
return (2,)

def get_data_input_shapes(self):
return [(1,)]

def get_conditional_input_shapes(self):
return [(1,)]

def create_generator(self):
noise_input = Input(self.get_noise_input_shape())
conditional_input = Input(self.get_conditional_input_shapes()[0])
inputs = [noise_input, conditional_input]
gen_in = Concatenate(axis=1)(inputs)
output = Dense(1)(gen_in)
return tf.keras.Model(inputs=inputs, outputs=output)

def create_discriminator(self):
data_input = Input(self.get_data_input_shapes()[0])
conditional_input = Input(self.get_conditional_input_shapes()[0])
inputs = [data_input, conditional_input]
discrim_in = Concatenate(axis=1)(inputs)
dense = Dense(10, activation=tf.nn.relu)(discrim_in)
output = Dense(1)(dense)
return tf.keras.Model(inputs=inputs, outputs=output)

# We have to set the gradient penalty very small because the generator's
# output is only a single number, so the default penalty would constrain
# it far too much.

gan = ExampleWGAN(learning_rate=0.01, gradient_penalty=0.1)
gan.fit_gan(generate_data(gan, 1000, 100), generator_steps=0.1)

# See if it has done a plausible job of learning the distribution.

means = 10 * np.random.random([1000, 1])
values = gan.predict_gan_generator(conditional_inputs=[means])
@flaky
def test_cgan():
"""Test fitting a conditional GAN."""

gan = ExampleGAN(learning_rate=0.01)
gan.fit_gan(
generate_data(gan, 500, 100), generator_steps=0.5, checkpoint_interval=0)

# See if it has done a plausible job of learning the distribution.

means = 10 * np.random.random([1000, 1])
values = gan.predict_gan_generator(conditional_inputs=[means])
deltas = values - means
assert abs(np.mean(deltas)) < 1.0
assert np.std(deltas) > 1.0
assert gan.get_global_step() == 500


@flaky
def test_cgan_reload():
"""Test reloading a conditional GAN."""

model_dir = tempfile.mkdtemp()
gan = ExampleGAN(learning_rate=0.01, model_dir=model_dir)
gan.fit_gan(generate_data(gan, 500, 100), generator_steps=0.5)

# See if it has done a plausible job of learning the distribution.
means = 10 * np.random.random([1000, 1])
batch_size = len(means)
noise_input = gan.get_noise_batch(batch_size=batch_size)
values = gan.predict_gan_generator(
noise_input=noise_input, conditional_inputs=[means])
deltas = values - means
assert abs(np.mean(deltas)) < 1.0
assert np.std(deltas) > 1.0
assert gan.get_global_step() == 500

reloaded_gan = ExampleGAN(learning_rate=0.01, model_dir=model_dir)
reloaded_gan.restore()
reloaded_values = reloaded_gan.predict_gan_generator(
noise_input=noise_input, conditional_inputs=[means])

assert np.all(values == reloaded_values)


@flaky
def test_mix_gan_reload():
"""Test reloading a GAN with multiple generators and discriminators."""

model_dir = tempfile.mkdtemp()
gan = ExampleGAN(
n_generators=2,
n_discriminators=2,
learning_rate=0.01,
model_dir=model_dir)
gan.fit_gan(generate_data(gan, 1000, 100), generator_steps=0.5)

reloaded_gan = ExampleGAN(
n_generators=2,
n_discriminators=2,
learning_rate=0.01,
model_dir=model_dir)
reloaded_gan.restore()
# See if it has done a plausible job of learning the distribution.

means = 10 * np.random.random([1000, 1])
batch_size = len(means)
noise_input = gan.get_noise_batch(batch_size=batch_size)
for i in range(2):
values = gan.predict_gan_generator(
noise_input=noise_input, conditional_inputs=[means], generator_index=i)
reloaded_values = reloaded_gan.predict_gan_generator(
noise_input=noise_input, conditional_inputs=[means], generator_index=i)
assert np.all(values == reloaded_values)
assert gan.get_global_step() == 1000
# No training has been done after reload
assert reloaded_gan.get_global_step() == 0


@flaky
def test_mix_gan():
"""Test a GAN with multiple generators and discriminators."""

gan = ExampleGAN(n_generators=2, n_discriminators=2, learning_rate=0.01)
gan.fit_gan(
generate_data(gan, 1000, 100), generator_steps=0.5, checkpoint_interval=0)

# See if it has done a plausible job of learning the distribution.

means = 10 * np.random.random([1000, 1])
for i in range(2):
values = gan.predict_gan_generator(
conditional_inputs=[means], generator_index=i)
deltas = values - means
assert abs(np.mean(deltas)) < 1.0
assert np.std(deltas) > 1.0
assert gan.get_global_step() == 1000


@flaky
def test_wgan():
"""Test fitting a conditional WGAN."""

class ExampleWGAN(dc.models.WGAN):

def get_noise_input_shape(self):
return (2,)

def get_data_input_shapes(self):
return [(1,)]

def get_conditional_input_shapes(self):
return [(1,)]

def create_generator(self):
noise_input = Input(self.get_noise_input_shape())
conditional_input = Input(self.get_conditional_input_shapes()[0])
inputs = [noise_input, conditional_input]
gen_in = Concatenate(axis=1)(inputs)
output = Dense(1)(gen_in)
return tf.keras.Model(inputs=inputs, outputs=output)

def create_discriminator(self):
data_input = Input(self.get_data_input_shapes()[0])
conditional_input = Input(self.get_conditional_input_shapes()[0])
inputs = [data_input, conditional_input]
discrim_in = Concatenate(axis=1)(inputs)
dense = Dense(10, activation=tf.nn.relu)(discrim_in)
output = Dense(1)(dense)
return tf.keras.Model(inputs=inputs, outputs=output)

# We have to set the gradient penalty very small because the generator's
# output is only a single number, so the default penalty would constrain
# it far too much.

gan = ExampleWGAN(learning_rate=0.01, gradient_penalty=0.1)
gan.fit_gan(generate_data(gan, 1000, 100), generator_steps=0.1)

# See if it has done a plausible job of learning the distribution.

means = 10 * np.random.random([1000, 1])
values = gan.predict_gan_generator(conditional_inputs=[means])
deltas = values - means
assert abs(np.mean(deltas)) < 1.0
assert np.std(deltas) > 1.0


@flaky
def test_wgan_reload():
"""Test fitting a conditional WGAN."""

class ExampleWGAN(dc.models.WGAN):

def get_noise_input_shape(self):
return (2,)

def get_data_input_shapes(self):
return [(1,)]

def get_conditional_input_shapes(self):
return [(1,)]

def create_generator(self):
noise_input = Input(self.get_noise_input_shape())
conditional_input = Input(self.get_conditional_input_shapes()[0])
inputs = [noise_input, conditional_input]
gen_in = Concatenate(axis=1)(inputs)
output = Dense(1)(gen_in)
return tf.keras.Model(inputs=inputs, outputs=output)

def create_discriminator(self):
data_input = Input(self.get_data_input_shapes()[0])
conditional_input = Input(self.get_conditional_input_shapes()[0])
inputs = [data_input, conditional_input]
discrim_in = Concatenate(axis=1)(inputs)
dense = Dense(10, activation=tf.nn.relu)(discrim_in)
output = Dense(1)(dense)
return tf.keras.Model(inputs=inputs, outputs=output)

# We have to set the gradient penalty very small because the generator's
# output is only a single number, so the default penalty would constrain
# it far too much.

model_dir = tempfile.mkdtemp()
gan = ExampleWGAN(
learning_rate=0.01, gradient_penalty=0.1, model_dir=model_dir)
gan.fit_gan(generate_data(gan, 1000, 100), generator_steps=0.1)

reloaded_gan = ExampleWGAN(
learning_rate=0.01, gradient_penalty=0.1, model_dir=model_dir)
reloaded_gan.restore()

# See if it has done a plausible job of learning the distribution.
means = 10 * np.random.random([1000, 1])
batch_size = len(means)
noise_input = gan.get_noise_batch(batch_size=batch_size)
values = gan.predict_gan_generator(
noise_input=noise_input, conditional_inputs=[means])
reloaded_values = reloaded_gan.predict_gan_generator(
noise_input=noise_input, conditional_inputs=[means])
assert np.all(values == reloaded_values)
4 changes: 2 additions & 2 deletions deepchem/models/tests/test_gbdt_model.py
Expand Up @@ -13,7 +13,7 @@
import deepchem as dc


def test_signletask_regression_with_xgboost():
def test_singletask_regression_with_xgboost():
np.random.seed(123)

# prepare dataset
Expand Down Expand Up @@ -41,7 +41,7 @@ def test_signletask_regression_with_xgboost():
assert scores[regression_metric.name] < 55


def test_signletask_regression_with_lightgbm():
def test_singletask_regression_with_lightgbm():
np.random.seed(123)

# prepare dataset
Expand Down
79 changes: 79 additions & 0 deletions deepchem/models/tests/test_reload.py
Expand Up @@ -1043,3 +1043,82 @@ def test_DTNN_regression_reload():
origpred = model.predict(dataset)
reloadpred = reloaded_model.predict(dataset)
assert np.all(origpred == reloadpred)


def generate_sequences(sequence_length, num_sequences):
for i in range(num_sequences):
seq = [
np.random.randint(10)
for x in range(np.random.randint(1, sequence_length + 1))
]
yield (seq, seq)


def test_seq2seq_reload():
"""Test reloading for seq2seq models."""

sequence_length = 8
tokens = list(range(10))
model_dir = tempfile.mkdtemp()
s = dc.models.SeqToSeq(
tokens,
tokens,
sequence_length,
encoder_layers=2,
decoder_layers=2,
embedding_dimension=150,
learning_rate=0.01,
dropout=0.1,
model_dir=model_dir)

# Train the model on random sequences. We aren't training long enough to
# really make it reliable, but I want to keep this test fast, and it should
# still be able to reproduce a reasonable fraction of input sequences.

s.fit_sequences(generate_sequences(sequence_length, 25000))

# Test it out.

tests = [seq for seq, target in generate_sequences(sequence_length, 50)]
pred1 = s.predict_from_sequences(tests, beam_width=1)
pred4 = s.predict_from_sequences(tests, beam_width=4)

reloaded_s = dc.models.SeqToSeq(
tokens,
tokens,
sequence_length,
encoder_layers=2,
decoder_layers=2,
embedding_dimension=150,
learning_rate=0.01,
dropout=0.1,
model_dir=model_dir)
reloaded_s.restore()

reloaded_pred1 = reloaded_s.predict_from_sequences(tests, beam_width=1)
assert len(pred1) == len(reloaded_pred1)
for (p1, r1) in zip(pred1, reloaded_pred1):
assert p1 == r1
reloaded_pred4 = reloaded_s.predict_from_sequences(tests, beam_width=4)
assert len(pred4) == len(reloaded_pred4)
for (p4, r4) in zip(pred4, reloaded_pred4):
assert p4 == r4
embeddings = s.predict_embeddings(tests)
pred1e = s.predict_from_embeddings(embeddings, beam_width=1)
pred4e = s.predict_from_embeddings(embeddings, beam_width=4)

reloaded_embeddings = reloaded_s.predict_embeddings(tests)
reloaded_pred1e = reloaded_s.predict_from_embeddings(
reloaded_embeddings, beam_width=1)
reloaded_pred4e = reloaded_s.predict_from_embeddings(
reloaded_embeddings, beam_width=4)

assert np.all(embeddings == reloaded_embeddings)

assert len(pred1e) == len(reloaded_pred1e)
for (p1e, r1e) in zip(pred1e, reloaded_pred1e):
assert p1e == r1e

assert len(pred4e) == len(reloaded_pred4e)
for (p4e, r4e) in zip(pred4e, reloaded_pred4e):
assert p4e == r4e

0 comments on commit 34e4f88

Please sign in to comment.