In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os, time

In [2]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig

In [4]:
def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)

In [5]:
mb_size = 32
X_dim = 784
z_dim = 20
h_dim = 128


In [6]:
X = tf.placeholder(tf.float32, shape=[None, X_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


z = tf.placeholder(tf.float32, shape=[None, z_dim], name="hereYouGo")

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]

## WGAN

In [9]:
def sample_z(m, n):
    
    return np.random.uniform(-1., 1., size=[m, n])


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)

    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    out = tf.matmul(D_h1, D_W2) + D_b2
#     print(out)
    return out

In [30]:
G_sample = generator(z)
D_real = discriminator(X)
D_fake = discriminator(G_sample)

D_loss = tf.reduce_mean(D_real) - tf.reduce_mean(D_fake)
G_loss = -tf.reduce_mean(D_fake)

D_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4)
            .minimize(-D_loss, var_list=theta_D))
G_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4)
            .minimize(G_loss, var_list=theta_G))

clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in theta_D]

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    for _ in range(5):
        X_mb, _ = mnist.train.next_batch(mb_size)

        _, D_loss_curr, _ = sess.run(
            [D_solver, D_loss, clip_D],
            feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
        )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss],
        feed_dict={z: sample_z(mb_size, z_dim)}
    )

    if it % 100 == 0:
        print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        if it % 1000 == 0:
            samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

            fig = plot(samples)
            plt.savefig('out/{}.png'
                        .format(str(i).zfill(3)), bbox_inches='tight')
            i += 1
            plt.close(fig)
        

Iter: 0; D loss: -0.003368; G_loss: -0.002383
Iter: 100; D loss: 1.824; G_loss: 1.376
Iter: 200; D loss: 1.781; G_loss: 1.229
Iter: 300; D loss: 1.567; G_loss: 0.9889
Iter: 400; D loss: 1.32; G_loss: 0.8205
Iter: 500; D loss: 1.028; G_loss: 0.777
Iter: 600; D loss: 0.7022; G_loss: 0.6411
Iter: 700; D loss: 0.4505; G_loss: 0.5951
Iter: 800; D loss: 0.3162; G_loss: 0.4934
Iter: 900; D loss: 0.1659; G_loss: 0.3691
Iter: 1000; D loss: 0.1118; G_loss: 0.05402
Iter: 1100; D loss: 0.05006; G_loss: 0.04562
Iter: 1200; D loss: 0.02963; G_loss: 0.004156
Iter: 1300; D loss: 0.0206; G_loss: 0.01705
Iter: 1400; D loss: 0.01558; G_loss: -0.02134
Iter: 1500; D loss: 0.001415; G_loss: -0.04802
Iter: 1600; D loss: 0.006767; G_loss: 0.006029
Iter: 1700; D loss: 0.003845; G_loss: 0.04103
Iter: 1800; D loss: 0.001434; G_loss: 0.0329
Iter: 1900; D loss: -4.686e-05; G_loss: -0.09211
Iter: 2000; D loss: 0.001947; G_loss: -0.03738
Iter: 2100; D loss: 0.01005; G_loss: -0.01618
Iter: 2200; D loss: 0.001043; G_l

Iter: 17800; D loss: 0.01955; G_loss: -0.01237
Iter: 17900; D loss: 0.02324; G_loss: -0.01335
Iter: 18000; D loss: 0.02576; G_loss: -0.01381
Iter: 18100; D loss: 0.02187; G_loss: -0.01243
Iter: 18200; D loss: 0.01861; G_loss: -0.01083
Iter: 18300; D loss: 0.01896; G_loss: -0.01066
Iter: 18400; D loss: 0.02239; G_loss: -0.01485
Iter: 18500; D loss: 0.01822; G_loss: -0.01556
Iter: 18600; D loss: 0.01878; G_loss: -0.0114
Iter: 18700; D loss: 0.02235; G_loss: -0.01545
Iter: 18800; D loss: 0.0246; G_loss: -0.01372
Iter: 18900; D loss: 0.02209; G_loss: -0.01133
Iter: 19000; D loss: 0.01907; G_loss: -0.01564
Iter: 19100; D loss: 0.0217; G_loss: -0.008271
Iter: 19200; D loss: 0.02054; G_loss: -0.01304
Iter: 19300; D loss: 0.02038; G_loss: -0.01228
Iter: 19400; D loss: 0.02125; G_loss: -0.0108
Iter: 19500; D loss: 0.02329; G_loss: -0.01193
Iter: 19600; D loss: 0.01949; G_loss: -0.01435
Iter: 19700; D loss: 0.01636; G_loss: -0.01169
Iter: 19800; D loss: 0.0219; G_loss: -0.009502
Iter: 19900; D l

Iter: 35300; D loss: 0.01349; G_loss: -0.008861
Iter: 35400; D loss: 0.01282; G_loss: -0.01511
Iter: 35500; D loss: 0.01474; G_loss: -0.01323
Iter: 35600; D loss: 0.01609; G_loss: -0.01276
Iter: 35700; D loss: 0.01394; G_loss: -0.006258
Iter: 35800; D loss: 0.01379; G_loss: -0.01392
Iter: 35900; D loss: 0.0126; G_loss: -0.01146
Iter: 36000; D loss: 0.01501; G_loss: -0.0147
Iter: 36100; D loss: 0.01418; G_loss: -0.01065
Iter: 36200; D loss: 0.01013; G_loss: -0.01043
Iter: 36300; D loss: 0.01492; G_loss: -0.01137
Iter: 36400; D loss: 0.01063; G_loss: -0.01081
Iter: 36500; D loss: 0.01442; G_loss: -0.008802
Iter: 36600; D loss: 0.01339; G_loss: -0.009972
Iter: 36700; D loss: 0.01534; G_loss: -0.01395
Iter: 36800; D loss: 0.01466; G_loss: -0.01043
Iter: 36900; D loss: 0.01447; G_loss: -0.01262
Iter: 37000; D loss: 0.01422; G_loss: -0.01042
Iter: 37100; D loss: 0.01569; G_loss: -0.01369
Iter: 37200; D loss: 0.01226; G_loss: -0.01079
Iter: 37300; D loss: 0.01064; G_loss: -0.01055
Iter: 37400

KeyboardInterrupt: 

## Improved WGAN

In [6]:
def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob, name="genSample")

    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    out = tf.matmul(D_h1, D_W2) + D_b2
#     print(out)
    return out

In [7]:
X = tf.placeholder(tf.float32, shape=[None, X_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


z = tf.placeholder(tf.float32, shape=[None, z_dim], name="hereYouGo")

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]

In [8]:
G_sample = generator(z)
print(G_sample.shape)
D_real = discriminator(X)
D_fake = discriminator(G_sample)


# elif MODE == 'wgan-gp':
LAMBDA = 10 #Gradient penalty lambda hyperparameter
D_loss = tf.reduce_mean(D_real) - tf.reduce_mean(D_fake)
G_loss = tf.reduce_mean(D_fake)




alpha = tf.random_uniform(
    shape=[], 
    minval=0.,
    maxval=1.
)
differences =  X - G_sample
interpolates = G_sample + (alpha*differences)
gradients = tf.gradients(discriminator(interpolates), [interpolates])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
gradient_penalty = tf.reduce_mean((slopes-1.)**2)
D_loss += LAMBDA*gradient_penalty

G_solver = tf.train.AdamOptimizer(
    learning_rate=1e-4, 
    beta1=0.5,
    beta2=0.9
).minimize(G_loss, var_list=theta_G)

# D_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4)
#             .minimize(-D_loss, var_list=theta_D))

D_solver = tf.train.AdamOptimizer(
    learning_rate=1e-4, 
    beta1=0.5, 
    beta2=0.9
).minimize(D_loss, var_list=theta_D)




# clip_disc_weights = None

(?, 784)


In [9]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out_improvedGAN_baseline/'):
    os.makedirs('out_improvedGAN_baseline/')
    
if not os.path.exists('out_improvedGAN_baseline/model/'):
    os.makedirs('out_improvedGAN_baseline/model/')

i = 0

for it in range(10000):
    for _ in range(5):
        X_mb, _ = mnist.train.next_batch(mb_size)

        _, D_loss_curr = sess.run(
            [D_solver, D_loss],
            feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
        )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss],
        feed_dict={z: sample_z(mb_size, z_dim)}
    )

    if it % 100 == 0:
        print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))
        samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out_improvedGAN_baseline/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)

        saver = tf.train.Saver()
        cur_model_name = 'model_{}'.format(it)
        saver.save(sess, 'out_improvedGAN_baseline/model/{}'.format(cur_model_name))


Iter: 0; D loss: -0.259; G_loss: 2.2
Iter: 100; D loss: -12.06; G_loss: 15.51
Iter: 200; D loss: -9.574; G_loss: 10.99
Iter: 300; D loss: -7.49; G_loss: 7.133
Iter: 400; D loss: -6.168; G_loss: 4.697
Iter: 500; D loss: -5.351; G_loss: 2.442


KeyboardInterrupt: 

In [97]:
tf_input=graph.get_operation_by_name('genSample').name+':0'
gen=graph.get_tensor_by_name(tf_input)
# gen=graph.get_tensor_by_name(tf_input)
    

In [11]:
tf.reset_default_graph()
with tf.Session() as sess:
#     z = tf.placeholder(tf.float32, shape=[None, z_dim])
#     saver = tf.train.Saver()
    saver=tf.train.import_meta_graph('out_improvedGAN_baseline/model/model_400.meta')
    saver.restore(sess, 'out_improvedGAN_baseline/model/model_400')
    graph=tf.get_default_graph()

    tf_input=graph.get_operation_by_name('hereYouGo').name+':0'
    z=graph.get_tensor_by_name(tf_input)
    
    tf_input_1=graph.get_operation_by_name('genSample').name+':0'
    gen=graph.get_tensor_by_name(tf_input_1)
    

    
    for i in range(10):
        samples = sess.run(gen, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out_improvedGAN_baseline/generated/hallelujah{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')


INFO:tensorflow:Restoring parameters from out_improvedGAN_baseline/model/model_400


In [24]:
graph.get_all_collection_keys()

['train_op', 'variables', 'trainable_variables']

In [69]:
graph.get_operation_by_name('')

KeyError: "The name 'G_sample' refers to an Operation not in the graph."