In this notebook we generate mnist dataset examples by using Deep Convolutional Generative Adversarial network.

### Starting with libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import tensorflow.keras as keras
import time
%matplotlib inline

As we are gonna use tangent hyperbolic function(-1,1) in our generator so we shall scale our data to zero mean with limits(-1,1).

In [3]:
train,test=mnist.load_data()
x,y=train
x=(x-127.5)/127.5
x=np.expand_dims(x,3)

Creating a function to display images in tiles form

In [4]:
def show_data(data,r,c,w=2,size=(8,8),batch=0):
    """
    The function displays data points in tiles form.
    
    Args: 
        data: The tensor containing datapoints.
        l: number of images to be arranged in rows.
        b: number of images to be arranged in columns.
        w: width of separator between images.
        size: sigure size
        
    Returns:
        None
    """
    img=iter(tf.constant(data))

    for i in range(1,(r*c)+1):

        if i%c==1:
            arr=img.__next__().numpy()
            continue

        arr=np.append(arr,np.ones((28,w,1))*arr.max(),axis=1)
        arr=np.concatenate([arr,img.__next__().numpy()],axis=1)

        if i%c==0.0:
            if i/c==1.0:
                fig=arr
            else:
                fig=np.concatenate([fig,np.ones((w,fig.shape[1],1))*arr.max()],axis=0)
                fig=np.concatenate([fig,arr],axis=0)

    figure=plt.figure(figsize=size)
    plt.imshow(fig,'gray')
    plt.xticks([])
    plt.yticks([])
    figure.savefig("mnist_%d.png"%(batch))
    plt.show()

### Creating model

The Generator:- Generates images from random noise of a specific shape. This includes:-<br>
<br>
>*Convolutional transpose layer which is convolution in reverse direction, i.e. increasing strides increases size of output image thus no need to up sample. <br>
*Batch normalization is used to normalize output from layers to a standard mean with beta=0.8 in order to make learning process stable.
*Dropout layer is not used as its combination with batch normalization layer may cause unstablity.<br>
*For activation we use Leaky Relu which gives a small value rather than zero when input is less than negative.<br>
*At last we use tangent hyperbolic function as activation to give output as an image.

In [5]:
def generator(noise_shape):
    inp=keras.layers.Input(noise_shape)
    x=keras.layers.Dense(7*7*256,name='Dense_layer')(inp)
    x=keras.layers.LeakyReLU()(x)
    x=keras.layers.Reshape((7,7,256))(x)
    assert tuple(x.shape)==(None, 7, 7, 256)
    
    x=keras.layers.Conv2DTranspose(128,kernel_size=(4,4),strides=(1,1),padding='same',\
                                   name='ConvT_layer_1')(x)
    x=keras.layers.BatchNormalization(momentum=0.8)(x)
    x=keras.layers.LeakyReLU()(x)
    assert tuple(x.shape)==(None,7,7,128)
    
    x=keras.layers.Conv2DTranspose(64,kernel_size=(4,4),strides=(2,2),padding='same',\
                                  name='ConvT_layer_2')(x)
    x=keras.layers.BatchNormalization(momentum=0.8)(x)
    x=keras.layers.LeakyReLU()(x)
    assert tuple(x.shape)==(None,14,14,64)
    
    x=keras.layers.Conv2DTranspose(1,kernel_size=(4,4),strides=(2,2),padding='same',\
                                  name='ConvT_layer_3')(x)
    x=keras.layers.BatchNormalization(momentum=0.8)(x)
    out=keras.layers.Activation('tanh')(x)
    assert tuple(x.shape)==(None,28,28,1)
    
    return keras.models.Model(inp,out)

The Discriminator:- This is a simple convolutional network which takes input as image and predicts fake or real using sigmoid as activation function.

In [6]:
def discriminator(img_size):
    inp=keras.layers.Input(img_size)
    x=keras.layers.Conv2D(32,kernel_size=(5,5),strides=(1,1),padding='same')(inp)
    x=keras.layers.LeakyReLU(alpha=0.2)(x)
    x=keras.layers.Dropout(0.2)(x)
    
    x=keras.layers.Conv2D(64,kernel_size=(5,5),strides=(1,1),padding='same')(x)
    x=keras.layers.LeakyReLU(alpha=0.2)(x)
    x=keras.layers.Dropout(0.2)(x)
    
    x=keras.layers.Conv2D(128,kernel_size=(5,5),strides=(1,1),padding='same')(x)
    x=keras.layers.LeakyReLU(alpha=0.2)(x)
    x=keras.layers.Dropout(0.2)(x)
    
    x=keras.layers.Flatten()(x)
    x=keras.layers.Dense(1)(x)
    out=keras.layers.Activation('sigmoid')(x)
    
    return keras.models.Model(inp,out)

### Creating an instance of model

Optimizer used is ADAM which not only adapts step size but works with momentum too thus results in stable learning process.

While creating a combined model we initialize discriminater as non trainable and then compile to see the effect. Thus the generator and discrimator are trained separately.
Where, loss is binary cross entropy. 

In [7]:
def DCGAN():
    opt=keras.optimizers.Adam(lr=0.0002,beta_1=0.5)
    dis=discriminator((28,28,1))
    dis.compile(optimizer=opt,loss='binary_crossentropy',metrics=['accuracy'])
    
    gen=generator((100,))
    inp=keras.layers.Input((100,))
    x=gen(inp)
    
    dis.trainable=False
    out=dis(x)
    combined=keras.models.Model(inp,out)
    combined.compile(optimizer=opt,loss='binary_crossentropy')
    
    return gen,dis,combined

In [8]:
gen,disc,combined=DCGAN()

### Training process

In [9]:
def train_model(dis,comb,batch_size,batch_len,data):
    w=int(np.sqrt(batch_size))
    t0=time.time()
    for i in range(1,batch_len+1):
        t1=time.time()
        inp=tf.random.normal(mean=0,stddev=1,shape=(batch_size,100,))
        x_fake=gen(inp)
        y_fake=tf.zeros((batch_size,))
        
        ind=np.random.randint(0,len(data),batch_size)
        x_real=data[(ind)]
        y_real=tf.ones((batch_size,))
        
        loss_fk=dis.train_on_batch(x_fake,y_fake)
        loss_rl=dis.train_on_batch(x_real,y_real)
        total_loss,acc=0.5*np.add(loss_fk,loss_rl)
        
        g_loss=comb.train_on_batch(inp,y_real)
        t2=time.time()
        if (i%100==0) and (i>1):
            print("Batch: %d/%d [G-Loss: %.3f, D-Loss: %.3f, Accuracy: %.2f%%] [Batch time: %.3f sec, Total time: %.2f min]\n"\
                  %(i,batch_len,g_loss,total_loss,acc*100,t2-t1,(t2-t0)/60))
            show_data(x_fake,w,w,size=(5,5),batch=i)

In [None]:
train_model(disc,combined,16,batch_len=1000,data=x)

In [None]:
train_model(disc,combined,32,batch_len=1000,data=x)

In [None]:
im=gen(tf.random.normal((25,100)))
show_data(im,5,5,w=2,size=(5,5),batch=0)