In [16]:
import time
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt

In [2]:
tf.test.gpu_device_name()

'/device:GPU:0'

In [17]:
def make_generator_network(hidden_layers=1, hidden_units=100, output_units=784):
    model = tf.keras.Sequential()
    for i in range(hidden_layers):
        model.add(
            tf.keras.layers.Dense(units=hidden_units, use_bias=False))
        model.add(tf.keras.layers.LeakyReLU())
 
    model.add(
        tf.keras.layers.Dense(units=output_units, activation='tanh'))

    return model


def make_discriminator_network(hidden_layers=1, hidden_units=100, output_units=1):
    model = tf.keras.Sequential()
    for i in range(hidden_layers):
        model.add(
            tf.keras.layers.Dense(units=hidden_units))
        model.add(tf.keras.layers.LeakyReLU())
        model.add(tf.keras.layers.Dropout(rate=0.5))
        
    model.add(
        tf.keras.layers.Dense(units=output_units,
                              activation=None))
    
    return model

In [18]:
tf.random.set_seed(0)
layer = tf.keras.layers.Dropout(.5, input_shape=(2,))
data = np.arange(10).reshape(5, 2).astype(np.float32)
print(data)

outputs = layer(data, training=True)
outputs

[[0. 1.]
 [2. 3.]
 [4. 5.]
 [6. 7.]
 [8. 9.]]


<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[ 0.,  0.],
       [ 4.,  6.],
       [ 0., 10.],
       [ 0., 14.],
       [16.,  0.]], dtype=float32)>

In [5]:
np.prod([28, 28])

784

In [19]:
image_size = (28, 28)
z_size = 20
mode_z = 'uniform'
gen_hidden_layers = 1
gen_hidden_size = 100
disc_hidden_layers = 1
disc_hidden_size = 100

tf.random.set_seed(0)

gen_model = make_generator_network(hidden_layers=gen_hidden_layers,
                                   hidden_units=gen_hidden_size,
                                   output_units=np.prod(image_size))
gen_model.build(input_shape=(None, z_size))
gen_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               2000      
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 100)               0         
                                                                 
 dense_1 (Dense)             (None, 784)               79184     
                                                                 
Total params: 81,184
Trainable params: 81,184
Non-trainable params: 0
_________________________________________________________________


In [20]:
disc_model = make_discriminator_network(hidden_layers=disc_hidden_layers,
                                        hidden_units=disc_hidden_size)
disc_model.build(input_shape=(None, np.prod(image_size)))
disc_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 100)               78500     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 dense_3 (Dense)             (None, 1)                 101       
                                                                 
Total params: 78,601
Trainable params: 78,601
Non-trainable params: 0
_________________________________________________________________


In [21]:
mnist_bldr = tfds.builder('mnist')
mnist_bldr.download_and_prepare()
mnist = mnist_bldr.as_dataset(shuffle_files=False)

def preprocess(ex, mode='uniform'):
    assert mode in ('uniform', 'normal')
    image = ex['image']                                       # 0.0-255.0
    image = tf.image.convert_image_dtype(image, tf.float32)   # 0.0-1.0
    image = tf.reshape(image, [-1])                           # flatten
    image = image * 2 - 1.0                                   # -1.0-1.0
    if mode == 'uniform':
        input_z = tf.random.uniform(shape=(z_size,), minval=-1.0, maxval=1.0)
    else:   # mode == 'normal'
        input_z = tf.random.normal(shape=(z_size,))

    return input_z, image

In [22]:
mnist_trainset = mnist['train']
mnist_trainset = mnist_trainset.map(preprocess)
mnist_trainset = mnist_trainset.batch(32, drop_remainder=True)
input_z, input_real = next(iter(mnist_trainset))
print('input_z.shape:', input_z.shape)
print('input_real.shape:', input_real.shape)

g_output = gen_model(input_z)
print('g_output.shape:', g_output.shape)

d_logits_real = disc_model(input_real)
d_logits_fake = disc_model(g_output)
print('d_logist_real.shape:', d_logits_real.shape)
print('d_logist_fake.shape:', d_logits_fake.shape)

print(d_logits_real)
print(d_logits_fake)

input_z.shape: (32, 20)
input_real.shape: (32, 784)
g_output.shape: (32, 784)
d_logist_real.shape: (32, 1)
d_logist_fake.shape: (32, 1)
tf.Tensor(
[[ 0.07255906]
 [ 0.6032193 ]
 [ 0.21647859]
 [ 0.27428222]
 [ 0.8696209 ]
 [ 0.45024568]
 [ 0.5359401 ]
 [ 0.86959165]
 [ 0.4507677 ]
 [-0.29162532]
 [ 0.27120787]
 [ 0.38178104]
 [ 0.8137118 ]
 [ 0.29135898]
 [ 0.20903385]
 [ 0.39721602]
 [-0.12903944]
 [ 0.55011773]
 [ 0.69221824]
 [ 0.9343266 ]
 [ 0.71590745]
 [ 0.5829706 ]
 [-0.2631213 ]
 [ 0.0783605 ]
 [ 0.79585797]
 [ 0.5529344 ]
 [-0.1979656 ]
 [-0.08673513]
 [ 0.46586826]
 [ 0.49324766]
 [ 0.8305299 ]
 [ 0.50397414]], shape=(32, 1), dtype=float32)
tf.Tensor(
[[-0.11402284]
 [-0.20345762]
 [-0.18124537]
 [ 0.05794936]
 [-0.05845938]
 [ 0.07330941]
 [-0.24241251]
 [-0.18452373]
 [ 0.09481211]
 [-0.03855374]
 [-0.0768047 ]
 [-0.04407243]
 [-0.16574474]
 [-0.10423222]
 [-0.12113376]
 [-0.29944634]
 [-0.01906119]
 [-0.18081096]
 [-0.03058762]
 [-0.34934577]
 [-0.00152941]
 [ 0.13642542]


In [23]:
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
g_labels_real = tf.ones_like(d_logits_fake)
g_loss = loss_fn(y_true=g_labels_real, y_pred=d_logits_fake)
print('Generator loss:', g_loss.numpy())

Generator loss: 0.7445603


In [24]:
d_labels_real = tf.ones_like(d_logits_real)
d_labels_fake = tf.zeros_like(d_logits_fake)
d_loss_real = loss_fn(y_true=d_labels_real, y_pred=d_logits_real)
d_loss_fake = loss_fn(y_true=d_labels_fake, y_pred=d_logits_fake)

print('Discriminator real loss:', d_loss_real.numpy())
print('Discriminator fake loss:', d_loss_fake.numpy())

Discriminator real loss: 0.5252441
Discriminator fake loss: 0.648277


In [25]:
tf.random.set_seed(0)
np.random.seed(1)

num_epochs = 100
batch_size = 64
image_size = (28, 28)
z_size = 20
mode_z = 'uniform'
gen_hidden_layers = 1
gen_hidden_size = 100
disc_hidden_layers = 1
disc_hidden_size = 100

if mode_z == 'uniform':
    fixed_z = tf.random.uniform(shape=(batch_size, z_size),
                                minval=-1.0,
                                maxval=1.0)
elif mode_z == 'normal':
    fixed_z = tf.random.normal(shape=(batch_size, z_size))
else:
    assert False
    
mnist_trainset = mnist['train']
mnist_trainset = mnist_trainset.map(lambda ex: preprocess(ex, mode=mode_z))
mnist_trainset = mnist_trainset.shuffle(10000)
mnist_trainset = mnist_trainset.batch(32, drop_remainder=True)

gen_model = make_generator_network(hidden_layers=gen_hidden_layers,
                                   hidden_units=gen_hidden_size,
                                   output_units=np.prod(image_size))
gen_model.build(input_shape=(None, z_size))

disc_model = make_discriminator_network(hidden_layers=disc_hidden_layers,
                                        hidden_units=disc_hidden_size)
disc_model.build(input_shape=(None, np.prod(image_size)))

loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
g_optimizer = tf.keras.optimizers.Adam()
d_optimizer = tf.keras.optimizers.Adam()
all_losses, all_d_vals, epoch_samples = [], [], []

In [None]:
start_time = time.time()
for epoch in range(1, num_epochs+1):
    epoch_losses, epoch_d_vals = [], []
    for i, (input_z, input_real) in enumerate(mnist_trainset):
        with tf.GradientTape() as g_tape:
            g_output = gen_model(input_z)
            d_logits_fake = disc_model(g_output, training=True)
            labels_real = tf.ones_like(d_logits_fake)
            g_loss = loss_fn(y_true=labels_real, y_pred=d_logits_fake)
            
        g_grads = g_tape.gradient(g_loss, gen_model.trainable_variables)
        g_optimizer.apply_gradients(grads_and_vars=zip(g_grads,
                                                       gen_model.trainable_variables))
        
        with tf.GradientTape() as d_tape:
            d_logits_real = disc_model(input_real, training=True)
            d_labels_real = tf.ones_like(d_logits_real)
            d_loss_real = loss_fn(y_true=d_labels_real, y_pred=d_logits_real)
            
            d_logits_fake = disc_model(g_output, training=True)
            d_labels_fake = tf.zeros_like(d_logits_fake)
            d_loss_fake = loss_fn(y_true=d_labels_fake, y_pred=d_logits_fake)
            
            d_loss = d_loss_real + d_loss_fake
            
        d_grads = d_tape.gradient(d_loss, disc_model.trainable_variables)
        d_optimizer.apply_gradients(grads_and_vars=zip(d_grads,
                                                       disc_model.trainable_variables))

        epoch_losses.append((g_loss.numpy(),
                             d_loss.numpy(),
                             d_loss_real.numpy(),
                             d_loss_fake.numpy()))
        d_probs_real = tf.reduce_mean(tf.sigmoid(d_logits_real))
        d_probs_fake = tf.reduce_mean(tf.sigmoid(d_logits_fake))
        epoch_d_vals.append((d_probs_real.numpy(), d_probs_fake.numpy()))
        
    all_losses.append(epoch_losses)
    all_d_vals.append(epoch_d_vals)
    
    print(
        'Epoch: {:03d} | ET {:.2f} mins | Mean loss >>'
        ' G/D {:.4f}/{:.4f} [D-Real: {:.4f} D-Fake: {:.4f}]'.format(
            epoch,
            (time.time() - start_time) / 60,
            *list(np.mean(all_losses[-1], axis=0))
        )
    )

Epoch: 001 | ET 31.74 mins | Mean loss >> G/D 3.5396/0.3215 [D-Real: 0.0756 D-Fake: 0.2458]
Epoch: 002 | ET 33.11 mins | Mean loss >> G/D 2.5328/0.8208 [D-Real: 0.3882 D-Fake: 0.4326]
Epoch: 003 | ET 35.48 mins | Mean loss >> G/D 1.7863/0.9099 [D-Real: 0.4932 D-Fake: 0.4167]
Epoch: 004 | ET 36.85 mins | Mean loss >> G/D 1.3742/1.0810 [D-Real: 0.5738 D-Fake: 0.5072]
Epoch: 005 | ET 39.22 mins | Mean loss >> G/D 1.2738/1.0939 [D-Real: 0.5872 D-Fake: 0.5066]
Epoch: 006 | ET 40.59 mins | Mean loss >> G/D 1.2017/1.1643 [D-Real: 0.6094 D-Fake: 0.5549]
Epoch: 007 | ET 41.90 mins | Mean loss >> G/D 1.1541/1.1808 [D-Real: 0.6145 D-Fake: 0.5663]
Epoch: 008 | ET 43.27 mins | Mean loss >> G/D 1.0585/1.2305 [D-Real: 0.6353 D-Fake: 0.5951]
Epoch: 009 | ET 44.64 mins | Mean loss >> G/D 1.0152/1.2388 [D-Real: 0.6372 D-Fake: 0.6016]
Epoch: 010 | ET 46.03 mins | Mean loss >> G/D 1.0364/1.2433 [D-Real: 0.6362 D-Fake: 0.6071]
Epoch: 011 | ET 47.46 mins | Mean loss >> G/D 0.9644/1.2860 [D-Real: 0.6522 D-Fa

In [11]:
opt = tf.keras.optimizers.SGD(learning_rate=0.05)
var1, var2 = tf.Variable(1.0), tf.Variable(2.0)
loss = lambda: 3 * var1 * var1 + 2 * var2 * var2
for i in range(50):
    opt.minimize(loss, var_list=[var1, var2])
    print(f'var1: {var1.numpy()} / var2: {var2.numpy()}')

var1: 0.699999988079071 / var2: 1.600000023841858
var1: 0.4899999797344208 / var2: 1.2799999713897705
var1: 0.34299999475479126 / var2: 1.0239999294281006
var1: 0.2400999814271927 / var2: 0.8191999197006226
var1: 0.168069988489151 / var2: 0.6553599238395691
var1: 0.11764898896217346 / var2: 0.5242879390716553
var1: 0.08235429227352142 / var2: 0.41943034529685974
var1: 0.05764800310134888 / var2: 0.33554428815841675
var1: 0.04035360366106033 / var2: 0.26843541860580444
var1: 0.028247522190213203 / var2: 0.2147483378648758
var1: 0.019773265346884727 / var2: 0.1717986762523651
var1: 0.013841286301612854 / var2: 0.13743893802165985
var1: 0.009688900783658028 / var2: 0.10995115339756012
var1: 0.006782230455428362 / var2: 0.08796092122793198
var1: 0.004747561179101467 / var2: 0.07036873698234558
var1: 0.003323292825371027 / var2: 0.056294988840818405
var1: 0.0023263050243258476 / var2: 0.045035991817712784
var1: 0.001628413563594222 / var2: 0.036028794944286346
var1: 0.001139889471232891 / v

In [14]:
opt = tf.keras.optimizers.Adam(learning_rate=0.1)
var1, var2 = tf.Variable(1.0), tf.Variable(2.0)
loss = lambda: 3 * var1 * var1 + 2 * var2 * var2
for i in range(100):
    opt.minimize(loss, var_list=[var1, var2])
    print(f'var1: {var1.numpy()} / var2: {var2.numpy()}')

var1: 0.8999970555305481 / var2: 1.8999969959259033
var1: 0.8004081845283508 / var2: 1.8001623153686523
var1: 0.7015819549560547 / var2: 1.700618863105774
var1: 0.6039344072341919 / var2: 1.6015000343322754
var1: 0.5079587697982788 / var2: 1.5029505491256714
var1: 0.414231538772583 / var2: 1.4051263332366943
var1: 0.3234156668186188 / var2: 1.3081938028335571
var1: 0.23625895380973816 / var2: 1.2123314142227173
var1: 0.1535801738500595 / var2: 1.1177276372909546
var1: 0.07624486833810806 / var2: 1.0245814323425293
var1: 0.005127407610416412 / var2: 0.9331026077270508
var1: -0.058941639959812164 / var2: 0.8435097336769104
var1: -0.11523428559303284 / var2: 0.7560288906097412
var1: -0.163182333111763 / var2: 0.6708924174308777
var1: -0.2024233639240265 / var2: 0.5883365273475647
var1: -0.23282656073570251 / var2: 0.5085986852645874
var1: -0.25449511408805847 / var2: 0.43191468715667725
var1: -0.267747700214386 / var2: 0.35851553082466125
var1: -0.2730855941772461 / var2: 0.28862345218658