In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import random
%tensorflow_version 1.x
from collections import defaultdict

TensorFlow is already loaded. Please restart the runtime to change versions.


# Linear Regression

Documentazione GradientTape():  
https://www.tensorflow.org/api_docs/python/tf/GradientTape?version=nightly  

* tensors created with tf.Variable are automatically watched
* I can manually watch a tensor with tape.watch(my_tensor). Questo metodo non riesco a farlo funionare. Anche usando @tf.function decorator, non riesco a farlo funzionare
* @tf.function decorator sopra train_step trasforma EagerTensor in Tensor

In both cases, my params in order to be watched must be tensors  
  
Documentazione customize fit:
https://keras.io/guides/customizing_what_happens_in_fit/

In [None]:
class LinearRegression(keras.Model):

  def __init__(self):
    super(LinearRegression, self).__init__(self)
    self.m = tf.Variable(
        initial_value=tf.convert_to_tensor(random.uniform(0, 1), dtype="float32"),
        trainable=True
    )
    self.q = tf.Variable(
        initial_value=tf.convert_to_tensor(random.uniform(0, 1), dtype="float32"),
        trainable=True
    )


  def compile(self, loss_fn, optimizer):
    super(LinearRegression, self).compile()
    self.loss_fn = loss_fn
    self.optimizer = optimizer

  def __call__(self, x):
    return self.m*x + self.q

  def train_step(self, x, y):
    
    with tf.GradientTape() as tape:
      y_pred = self(x)
      loss = self.loss_fn(y, y_pred)

    gradients = tape.gradient(loss, [self.m, self.q])
    self.optimizer.apply_gradients(zip(gradients, [self.m, self.q]))

    return loss

In [None]:
# Synthetic dataset
noise = np.random.normal(0, 1, size=(10,))
x = np.random.randint(0, 10, size=(10, ))
y = x + 2 + noise

In [None]:
# Arguments
epochs = 100
learning_rate = 1e-1

In [None]:
model = LinearRegression()
optim = keras.optimizers.Adam(learning_rate)
loss = keras.losses.MeanSquaredError()

model.compile(loss, optim)
for epoch in range(epochs):
  loss = model.train_step(x, y)
  print(f"Epoch {epoch} - Loss: {loss}")

# GAN

Some key points from tf.data.Dataset documentation:  
* *__prefetch__(buffer_size)*  
Creates a Dataset that prefetches elements from this dataset.  
Like other Dataset methods, prefetch operates on the elements of the input dataset. It has no concept of examples vs. batches. *examples.prefetch(2)* will prefetch two elements (2 examples), while *examples.batch(20).prefetch(2)* will prefetch 2 batches (2 batches, of 20 examples each).  
* *__batch__(batch_size, drop_remainder=False)*
* *shuffle(buffer_size, seed=None, reshuffle_each_iteration=None)*  
Randomly shuffles the elements of this dataset.  
This dataset fills a buffer with buffer_size elements, then randomly samples elements from this buffer, replacing the selected elements with new elements. __For perfect shuffling__, a buffer size greater than or equal to the full size of the dataset is required.  

Both shuffle and batch methods are **not inplace**

## Arguments


In [None]:
LATENT_DIM = 100
BATCH_SIZE = 64
BUFFER_SIZE = 70000
EPOCHS = 30

## Dataset

load_data() returns
*Tuple of Numpy arrays: (x_train, y_train), (x_test, y_test).*  

**Important point**: the tuples are made of numpy array ==> I can access shape with *x_train.shape*  
I consider more safe to do *tf.shape(x_train)*

In [None]:
train_data, test_data = keras.datasets.fashion_mnist.load_data()

# Concatenate test and train datasets
dataset = np.concatenate((train_data[0], test_data[0]), axis=0).astype('float32') / 255 # normalize data
dataset = np.expand_dims(dataset, axis=-1)
print(dataset.shape)
dataset = tf.data.Dataset.from_tensor_slices(dataset)

dataset = dataset.batch(BATCH_SIZE).shuffle(BUFFER_SIZE).prefetch(10)

(70000, 28, 28, 1)


## Generator
Implementing Generator of DCGAN

In [None]:
inputs = keras.layers.Input(shape=(LATENT_DIM, ))  # One is the number of channels. We need to specify it only if the first layer is Conv
x = keras.layers.Dense( 4 * 4* 256)(inputs)
x = keras.layers.Reshape((4, 4, 256))(x)  # 'channel_last'
x = keras.layers.Conv2DTranspose(filters=64,
                                 kernel_size=(4, 4), 
                                 strides=(1, 1),
                                 padding='valid')(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Conv2DTranspose(filters=32,
                                 kernel_size=(4, 4),
                                 strides=(2, 2),
                                 padding='same')(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Conv2DTranspose(filters=16,
                                 kernel_size=(4, 4),
                                 strides=(2, 2),
                                 padding='same')(x)
x = keras.layers.ReLU()(x)
outputs = keras.layers.Conv2DTranspose(filters=1,
                                 kernel_size=(1, 1),
                                 strides=(1, 1),
                                 padding='same')(x)

# x = keras.layers.Flatten()(x)
# x = keras.layers.Dense(28 * 28 * 1)(x)
# outputs = keras.layers.Reshape((28, 28, 1))(x)

generator = keras.Model(inputs, outputs)
generator.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 100)]             0         
_________________________________________________________________
dense_6 (Dense)              (None, 4096)              413696    
_________________________________________________________________
reshape_3 (Reshape)          (None, 4, 4, 256)         0         
_________________________________________________________________
conv2d_transpose_12 (Conv2DT (None, 7, 7, 64)          262208    
_________________________________________________________________
re_lu_9 (ReLU)               (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_transpose_13 (Conv2DT (None, 14, 14, 32)        32800     
_________________________________________________________________
re_lu_10 (ReLU)              (None, 14, 14, 32)        0   

In [None]:
z = tf.random.normal(shape=(1, 100))
generated = generator(z)

In [None]:
discriminator_shape = tuple(generated[0].shape)

In [None]:
discriminator_shape

(28, 28, 1)

## Discriminator

* Global max pooling ???

In [None]:
discriminator = tf.keras.Sequential(
    [                                     
    tf.keras.layers.Input(shape=discriminator_shape),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(4,4), strides=(2,2), padding='same'),
    tf.keras.layers.LeakyReLU(alpha=0.2),
    tf.keras.layers.Conv2D(filters=128, kernel_size=(2,2), strides=(2,2), padding='same'),
    tf.keras.layers.LeakyReLU(alpha=0.2),
    tf.keras.layers.Conv2D(filters=256, kernel_size=(2,2), strides=(2,2), padding='same'),
    tf.keras.layers.GlobalMaxPooling2D(),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
    ],
  
    name="discriminator"
)

discriminator.summary()

Model: "discriminator"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 14, 14, 64)        1088      
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 7, 7, 128)         32896     
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 7, 7, 128)         0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 4, 4, 256)         131328    
_________________________________________________________________
global_max_pooling2d_3 (Glob (None, 256)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 1)               

## Custom Callbacks
Reference documentation:  
https://keras.io/guides/writing_your_own_callbacks/

In [None]:
class GANMonitor(tf.keras.callbacks.Callback):
    def __init__(self, num_img=2, latent_dim=LATENT_DIM, root='images'):
        super(GANMonitor, self).__init__()
        self.num_img = num_img
        self.latent_dim = latent_dim
        self.root = root

        if not os.path.exists(root):
          os.mkdir(root)

    def on_epoch_end(self, epoch, logs=None):
        random_latent_vectors = tf.random.normal(shape=(self.num_img, self.latent_dim))
        generated_images = self.model.generator(random_latent_vectors)
        generated_images *= 255 # Denormalize
        generated_images.numpy()
        for i in range(self.num_img):
            img = tf.keras.preprocessing.image.array_to_img(generated_images[i])
            img.save("generated_img_%03d_%d.png" % (epoch, i))

In [None]:
def schedule(epoch, lr):
  epochs = [25]
  if epoch in epochs:
    return lr*0.1
  
  return lr

## Model

Nota importante:
Se come loss uso una string invece che una *keras.losses.LossClass*
allora devo poi usare *self.compiled_loss* invece che *self.loss* in train_step


__Nota Importante__  
Se uno aggiunge 0.05 random noise alle labels, le porta fuori dal range 0, 1. In particolare, nel secondo termina della loss, se la label originaria era 1, adesso è portata fuori dal range 1: (1- '>1') < 1 ==> introduco un termine negativo nella loss, che può diventare negativa.   
La domanda ssarebbe: allora perché ce lo infilano dentro? Aiuta la convergenza e tanto basta, anche se da' una loss negativa?

In [None]:
class GAN(keras.Model):
  def __init__(self, generator, discriminator, latent_dim):
    super(GAN, self).__init__()
    self.generator = generator
    self.discriminator = discriminator
    self.latent_dim = latent_dim


  def compile(self, d_optim, g_optim, loss_fn):
    super(GAN, self).compile()
    self.d_optim = d_optim
    self.g_optim = g_optim
    self.loss_fn = loss_fn
    

  def train_step(self, real_images):
    if isinstance(real_images, tuple):
      real_images = real_images[0]
    batch_size = tf.shape(real_images)[0]

    # 1. Train the discriminator passing both fake and real images

    # Generate images
    z = tf.random.normal(shape=(batch_size, self.latent_dim))
    generated = self.generator(z)

    # Concatenate real and fake images
    batch = tf.concat([generated, real_images], axis=0)

    # Define labels
    labels = tf.concat([tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0)
    # labels += 0.05 * tf.random.uniform(tf.shape(labels))  # add random noise to the labels - important trick!

    with tf.GradientTape() as tape:
      preds = self.discriminator(batch)
      d_loss = self.loss_fn(labels, preds)

    gradients = tape.gradient(d_loss, self.discriminator.trainable_weights)
    self.d_optim.apply_gradients(zip(gradients, self.discriminator.trainable_weights))

    # 2. Train the generator to trick the discriminator

    # Labels to trick: mark everything as real
    # In this way, I will have low loss if discriminator thinks generated images are real,
    # and viceversa update only if discriminator understand the images are false
    misleading_labels = tf.zeros((batch_size, 1))

    with tf.GradientTape() as tape:
      
      z = tf.random.normal(shape=(batch_size, self.latent_dim))
      generated = self.generator(z)
      preds = self.discriminator(generated)
      g_loss = self.loss_fn(misleading_labels, preds)


    gradients = tape.gradient(g_loss, self.generator.trainable_weights)
    self.g_optim.apply_gradients(zip(gradients, self.generator.trainable_weights))

    return {'d_loss': d_loss, 'g_loss': g_loss}

From the documentation:  
*Do not specify the batch_size if your data is in the form of datasets, generators, or keras.utils.Sequence instances (since they generate batches)*

In [None]:
gan = GAN(generator, discriminator, LATENT_DIM)

d_optim = keras.optimizers.Adam(learning_rate=0.0003)
g_optim = keras.optimizers.Adam(learning_rate=0.0003)
loss_fn = tf.keras.losses.BinaryCrossentropy()

gan.compile(d_optim, g_optim, loss_fn)
gan.fit(dataset, epochs=EPOCHS,
        callbacks = [GANMonitor(num_img=2, latent_dim=LATENT_DIM),
                     keras.callbacks.LearningRateScheduler(schedule)])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7fc8ae036ba8>

In [None]:
!zip -r images.zip images

In [None]:
!rm -r images