In [1]:
import deepchem as dc
import numpy as np
import tensorflow as tf
from tensorflow.python import debug as tf_debug
from deepchem.utils.genomics import encode_fasta_sequence
from Bio import SeqIO
import h5py

  from numpy.core.umath_tests import inner1d


In [5]:
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.9
config.gpu_options.allow_growth = True
config.log_device_placement = False

sess = tf.Session(config=config)
tf.keras.backend.set_session(sess)

In [13]:
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train_flat = x_train.reshape([len(x_train),-1])

max_size = 28*28
encode_length = 1
batch_size = 100

In [15]:
np.shape(x_train_flat[:batch_size])

(100, 784)

In [7]:
def residual_block(filter_name1,bias_name1,filter_name2,bias_name2,model,in_dim,out_dim,in_tensor):
    with tf.variable_scope('',reuse=tf.AUTO_REUSE):
        filter1 = tf.get_variable(filter_name1,collections=[model],trainable=True,shape=[5,in_dim,64])
        bias1 = tf.get_variable(bias_name1,collections=[model],trainable=True,shape=[max_size,64])
        filter2 = tf.get_variable(filter_name2,collections=[model],trainable=True,shape=[5,64,out_dim])
        bias2 = tf.get_variable(bias_name2,collections=[model],trainable=True,shape=[max_size,out_dim])

        x = in_tensor
        x = tf.nn.leaky_relu(x)
        
        x = tf.nn.conv1d(x,filters=filter1,padding='SAME',stride=1)
        x = tf.add(x,bias1)
            
        x = tf.nn.leaky_relu(x)
        x = tf.nn.conv1d(x,filters=filter2,padding='SAME',stride=1)
        x = tf.add(x,bias2)

    return x+0.3*in_tensor
    
def dense(matrix,bias,model,in_dim,out_dim,in_tensor):
    with tf.variable_scope('',reuse=tf.AUTO_REUSE):
        W = tf.get_variable(matrix,collections=[model],trainable=True,shape=[in_dim,out_dim])
        b = tf.get_variable(bias,collections=[model],trainable=True,shape=[out_dim,])

    return tf.matmul(in_tensor,W) + b


def conv(filter_name,bias_name,model,filter_shape,in_tensor):
    with tf.variable_scope('',reuse=tf.AUTO_REUSE):
        filt = tf.get_variable(filter_name,collections=[model],trainable=True,shape=filter_shape)
        bias = tf.get_variable(bias_name,collections=[model],trainable=True,shape=[max_size,filter_shape[-1]])
        
    out = tf.nn.conv1d(in_tensor,filters=filt,padding='SAME',stride=1)
    out = tf.add(out,bias)
    return out
    
# Generator

def generator(seed):
    seed = tf.reshape(seed,(batch_size,100),name='generator.reshape1')
    
    seed2 = dense('generator.dense1.matrix','generator.dense1.bias','generator',100,max_size*64,seed)
    seed2 = tf.nn.leaky_relu(seed2)
    seed2 = tf.reshape(seed2,[batch_size,max_size,64])
    
    x = residual_block('generator.res1.filter1','generator.res1.bias1','generator.res1.filter2','generator.res1.bias2','generator',64,64,seed2)
    x = residual_block('generator.res2.filter1','generator.res2.bias1','generator.res2.filter2','generator.res2.bias2','generator',64,64,x)
    x = residual_block('generator.res3.filter1','generator.res3.bias1','generator.res3.filter2','generator.res3.bias2','generator',64,64,x)
    x = residual_block('generator.res4.filter1','generator.res4.bias1','generator.res4.filter2','generator.res4.bias2','generator',64,64,x)
    x = residual_block('generator.res5.filter1','generator.res5.bias1','generator.res5.filter2','generator.res5.bias2','generator',64,64,x)

    x = conv('generator.conv1.filter','generator.conv1.bias','generator',(5,64,encode_length),x)
    
    synthetic = tf.reshape(x,[batch_size,max_size])
    return synthetic

# Discriminator

def discriminator(sequence):
    sequence = tf.reshape(sequence,[batch_size,max_size,encode_length])
    x = conv('discriminator.conv1.filter','discriminator.conv1.bias','discriminator',(5,encode_length,64),sequence)
    x = tf.nn.leaky_relu(x)
    
    x = residual_block('discriminator.res1.filter1','discriminator.res1.bias1','discriminator.res1.filter2','discriminator.res1.bias1','discriminator',64,64,x)
    x = residual_block('discriminator.res2.filter1','discriminator.res2.bias1','discriminator.res2.filter2','discriminator.res2.bias1','discriminator',64,64,x)
    x = residual_block('discriminator.res3.filter1','discriminator.res3.bias1','discriminator.res3.filter2','discriminator.res3.bias1','discriminator',64,64,x)
    x = residual_block('discriminator.res4.filter1','discriminator.res4.bias1','discriminator.res4.filter2','discriminator.res4.bias1','discriminator',64,64,x)
    x = residual_block('discriminator.res5.filter1','discriminator.res5.bias1','discriminator.res5.filter2','discriminator.res5.bias1','discriminator',64,64,x)
    
    x = tf.reshape(x,(batch_size,max_size*64))
    
    output = dense('discriminator.dense1.matrix','discriminator.dense1.bias','discriminator',max_size*64,1,x)
    return output
    
### Constructing the loss function

real_images = tf.placeholder(float,name='real_images')
noise = tf.placeholder(float,name='noise')

fake_images = generator(noise)
fake_images = tf.identity(fake_images,name='fake_images')

# Sampling images in the encoded space between the fake ones and the real ones

interpolation_coeffs = tf.random_uniform(shape=(batch_size,1,1))
sampled_images = tf.add(real_images,tf.multiply(tf.subtract(fake_images,real_images),interpolation_coeffs),name='sampled_images')

# Gradient penalty
gradients = tf.gradients(discriminator(sampled_images),sampled_images,name='gradients')[0]
norms = tf.norm(gradients,axis=[1,2])
score = tf.reduce_mean(tf.square(tf.subtract(norms,1.)),name='gradient_penalty')

# Loss based on discriminator's predictions

pred_real = tf.reshape(discriminator(real_images),[-1])
pred_real = tf.identity(pred_real,name='pred_real')

pred_fake = tf.reshape(discriminator(fake_images),[-1])
pred_fake = tf.identity(pred_fake,name='pred_fake')

diff = tf.reduce_mean(tf.subtract(pred_fake,pred_real))

# Discriminator wants fake sequences to be labeled 0, real to be labeled 1
disc_loss = tf.add(diff,tf.multiply(tf.constant(10.),score),name='disc_loss')

# Generator wants fake sequences to be labeled 1
gen_loss = - tf.reduce_mean(pred_fake,name='gen_loss')

# For tracking using tensorboard

a = tf.summary.scalar('discriminator_difference', diff)
b = tf.summary.scalar('generator_difference',gen_loss)
merged = tf.summary.merge_all()

# Optimizers
disc_optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)
gen_optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)

train_discriminator = disc_optimizer.minimize(disc_loss,var_list=tf.get_collection('discriminator'),name='train_discriminator')
grads_discriminator = disc_optimizer.compute_gradients(disc_loss,var_list=tf.get_collection('discriminator'))

train_generator = gen_optimizer.minimize(gen_loss,var_list=tf.get_collection('generator'),name='train_generator')
grads_generator = gen_optimizer.compute_gradients(gen_loss,var_list=tf.get_collection('generator'))

init = tf.initializers.variables(tf.get_collection('discriminator')+tf.get_collection('generator'))

writer = tf.summary.FileWriter('/home/ceolson0/Documents/tensorboard',sess.graph)
saver = tf.train.Saver(tf.get_collection('discriminator')+tf.get_collection('generator'))
### Training


In [8]:
sess.run(init)
sess.run(tf.global_variables_initializer())
print('############')
print(np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]))
epochs=10

############
6331793


In [19]:
for epoch in range(epochs):
    print('\ngan17, epoch ',epoch)

    
    # Train discriminator
    for i in range(5):
        real = np.random.permutation(x_train_flat)[:batch_size].astype(np.float32)
        noise_input = np.random.normal(0,1,(batch_size,100))
        _,d_loss,grads = sess.run([train_discriminator,diff,grads_discriminator],feed_dict={real_images:real,noise:noise_input})
        print("Training discriminator",d_loss)
            
    # Train generator

    real = np.random.permutation(x_train_flat)[:batch_size].astype(np.float32)
    noise_input = np.random.normal(0,1,(batch_size,100))
    _,g_loss,grads = sess.run([train_generator,gen_loss,grads_generator],feed_dict={noise:noise_input})
    print("Training generator",g_loss)

    print("Generator loss: ",g_loss)
    print("Discriminator loss: ",d_loss)


gan17, epoch  0


InvalidArgumentError: Input to reshape is a tensor with 7840000 values, but the requested shape has 78400
	 [[node Reshape_14 (defined at <ipython-input-7-84e1f0d6a05a>:60) ]]
	 [[node Mean_1 (defined at <ipython-input-7-84e1f0d6a05a>:101) ]]

Caused by op 'Reshape_14', defined at:
  File "/software/conda/envs/deepchem/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/software/conda/envs/deepchem/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/software/conda/envs/deepchem/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/software/conda/envs/deepchem/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/software/conda/envs/deepchem/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tornado/gen.py", line 781, in inner
    self.run()
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 272, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 542, in execute_request
    user_expressions, allow_stdin,
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2848, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2874, in _run_cell
    return runner(coro)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3049, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3214, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-84e1f0d6a05a>", line 89, in <module>
    gradients = tf.gradients(discriminator(sampled_images),sampled_images,name='gradients')[0]
  File "<ipython-input-7-84e1f0d6a05a>", line 60, in discriminator
    sequence = tf.reshape(sequence,[batch_size,max_size,encode_length])
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 7179, in reshape
    "Reshape", tensor=tensor, shape=shape, name=name)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
    op_def=op_def)
  File "/software/conda/envs/deepchem/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 7840000 values, but the requested shape has 78400
	 [[node Reshape_14 (defined at <ipython-input-7-84e1f0d6a05a>:60) ]]
	 [[node Mean_1 (defined at <ipython-input-7-84e1f0d6a05a>:101) ]]


In [18]:
np.shape(noise_input)

(100, 100)