We investigate semi supervisede learning techniques on the MNIST dataset

In [19]:
#import tensorflow as tf
import tensorflow.compat.v1 as tf
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import math

### Load and Prepare Data

In [20]:
#Load MNIST data
(train_x, train_y),(test_x, test_y) = tf.keras.datasets.mnist.load_data()
#Shape
train_x.shape

(60000, 28, 28)

In [21]:
#Reshape images to be 3D
train_x = np.reshape(train_x, (-1,28,28,1))
test_x = np.reshape(test_x, (-1,28,28,1))

In [22]:
train_x.shape, test_x.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

In [23]:
#Normalize Data
train_x = train_x/127.5 - 1
test_x = test_x/127.5 - 1

Split Training Data between Supervised and Unsupervised Examples. 10% of the data will be used in Supervised learning while rest of it will be used for UnSupervised Learning.

In [24]:
supervised_data_percent = 0.015
unsupervised_data_percent = 1 - supervised_data_percent

In [25]:
train_x_sup, train_x_unsup, train_y_sup, train_y_unsup = train_test_split(train_x, train_y, 
                                                                          train_size=supervised_data_percent,
                                                                          test_size=unsupervised_data_percent)

In [26]:
train_x_sup.shape

(900, 28, 28, 1)

Following function will do 2 things:

1. Convert MNIST labels to One-hot encoding
2. Append a column at the end with zeros to indicate Real Image

In [27]:
def prepare_labels(y):
    
    extended_labels = tf.keras.utils.to_categorical(y, 10)
    extended_labels = np.concatenate([extended_labels, np.zeros((extended_labels.shape[0],1))], axis=1)
    
    return extended_labels

### Build Generator

Generator will take 100 random numbers as input and will produce an image of shape (28,28,1). Image data values will be between -1 to 1. 

In [28]:
def generator(input_x, training, reuse=False):
    
    with tf.variable_scope('Generator', reuse=reuse) as scope:
        
        #Layer 0
        x = tf.keras.layers.Reshape((1,1,100,))(input_x)
        
        #Layer 1
        x = tf.keras.layers.Conv2DTranspose(100, kernel_size=(2,2), strides=1, padding='valid')(x)
        x = tf.layers.batch_normalization(x, training=training)
        x = tf.keras.activations.relu(x)
        
        #Layer 2
        x = tf.keras.layers.Conv2DTranspose(64, kernel_size=(3,3), strides=2, padding='valid')(x)
        x = tf.layers.batch_normalization(x, training=training)
        x = tf.keras.activations.relu(x)
        
        #Layer 3
        x = tf.keras.layers.Conv2DTranspose(32, kernel_size=(4,4), strides=2, padding='valid')(x)
        x = tf.layers.batch_normalization(x, training=training)
        x = tf.keras.activations.relu(x)
        
        #Layer 4
        x = tf.keras.layers.Conv2DTranspose(1, kernel_size=(6,6), strides=2, padding='valid')(x)
        x = tf.keras.activations.tanh(x)
        
        return x       

### Build Discriminator

Discriminator will Images of shape (28,28,1) as input and will produce a vector with 11 values.

- 10 Values for MNIST label Classification
- 1 Value for Classifying if image is Fake(1) OR Real(0)

In [29]:
def discriminator(input_d, p_drop, reuse=True, training = True):
    
    with tf.variable_scope('Discriminator', reuse=reuse) as scope:
        
        #Layer 1
        x = tf.keras.layers.Conv2D(32, kernel_size=(5,5), strides=2, padding='same')(input_d)
        x = tf.keras.layers.Dropout(p_drop)(x)
        x = tf.keras.activations.relu(x, alpha=0.2)
        
        #Layer 2
        x = tf.keras.layers.Conv2D(64, kernel_size=(3,3), strides=2, padding='same')(x)
        x = tf.layers.batch_normalization(x, training=training)
        x = tf.keras.activations.relu(x, alpha=0.2)
        
        #Layer 3
        x = tf.keras.layers.Conv2D(128, kernel_size=(2,2), strides=2, padding='same')(x)
        x = tf.layers.batch_normalization(x, training=training)
        x = tf.keras.activations.relu(x, alpha=0.2)
        x = tf.keras.layers.Dropout(p_drop)(x)
        
        #Layer 4
        x = tf.keras.layers.Conv2D(128, kernel_size=(2,2), strides=2, padding='same')(x)
        x = tf.keras.activations.relu(x, alpha=0.2)
        
        #Layer 5
        features = tf.keras.layers.Flatten()(x)
        logits = tf.keras.layers.Dense(11)(features)
        
        return features, logits

### Define Loss

Loss will be calculated for the following 3 inputs:

1. Real images with actual labels (Supervised Learning)
2. Real images with NO labels (Unsupervised Learning)
3. Fake images with NO labels (Unsupervised Learning)


Loss will be calculated for Discriminator and Generator. 

#### 1. Discriminator Loss

Following will be considered to calculate Loss:

Unsupervised:
1. Loss to predict Real Image is Real and Not fake.
2. Loss to predict Fake Image is Fake and Not Real.

Supervised:
1. Loss to predict MNIST label classification

#### 2. Generator Loss

Unsupervised Loss:
1. Loss to predict Fake Image as Real
2. Feature Mapping loss 

In [30]:
def model_loss(real_un_sup_ip, real_sup_ip, fake_ip, p_drop, training, y):
    
        
    #Get Discriminator output for Real Supervised Data
    rs_features, rs_logits = discriminator(real_sup_ip, p_drop, reuse=False, training=training)
    
    #Get Discriminator output for Real Un-Supervised Data
    ru_features, ru_logits = discriminator(real_un_sup_ip, p_drop, reuse=True, training=training)
    
    #Get Fake images from Generator
    fake_images = generator(fake_ip, training=training)
    
    #Get Dicriminator output for Fake images
    fake_features, fake_logits = discriminator(fake_images, p_drop, reuse=True, training=training)
    
    
    #Calculating Discriminator Loss
    
    #1. Let's calculate Unsupervised Loss for both Real and Fake data
    real_un_sup_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=ru_logits[:,-1], 
                                                                              labels=tf.zeros_like(ru_logits[:,-1])))
        
    
    fake_un_sup_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logits[:,-1], 
                                                                              labels=tf.ones_like(fake_logits[:,-1])))
    
    #2. Supervised Loss
    real_sup_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=rs_logits, 
                                                                              labels=y))
    
    d_loss = real_un_sup_loss + fake_un_sup_loss + real_sup_loss
    
    
    #Calculating feature mapping loss for Generator
    tmp1 = tf.reduce_mean(ru_features, axis = 0)
    tmp2 = tf.reduce_mean(fake_features, axis = 0)
    feature_mapping_loss = tf.reduce_mean(tf.square(tmp1 - tmp2))
    
    #Fake vs Real loss
    fake_loss_2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logits[:,-1], 
                                                                              labels=tf.zeros_like(fake_logits[:,-1])))
    
    #g_loss = feature_mapping_loss +  fake_loss_2
    g_loss = fake_loss_2
    
    rs_class_op = tf.nn.softmax(rs_logits)
    
    #Calculate Accuracy
    correct_prediction = tf.equal(tf.argmax(rs_class_op, axis=1), tf.argmax(y, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    return fake_images, d_loss, g_loss, accuracy

### Model Optimization

Training Discriminator and Generator models

In [31]:
def model_optimization(d_loss, g_loss):
    
    # Get weights and biases to update. Get them separately for the discriminator and the generator
    discriminator_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES , scope='Discriminator')    
    generator_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator')
    
    #Minimize loss
    d_opt = tf.train.AdamOptimizer(name='d_optimizer').minimize(d_loss, var_list=discriminator_train_vars)
    
    g_opt = tf.train.AdamOptimizer(name='g_optimizer').minimize(g_loss, var_list=generator_train_vars)
    
    return d_opt, g_opt

### Training Module

In [38]:
def train(batch_size = 64, epochs = 1000):
    
    train_D_losses = []
    train_G_losses = []
    train_Accs  = []
    test_D_losses = []
    test_G_losses = []
    test_Accs = []
    noise_size = 100
    
    
    tf.reset_default_graph()
    tf.compat.v1.disable_eager_execution()
    #Declare Placeholders for input values
    real_sup_img = tf.placeholder(tf.float32, shape=(None,28,28,1))
    labels = tf.placeholder(tf.int64, shape=(None))
    
    real_unsup_img = tf.placeholder(tf.float32, shape=(None,28,28,1))
    
    noise_input = tf.placeholder(tf.float32, shape=(None, noise_size))
    
    dropout_rate = tf.placeholder(tf.float32)
    training = tf.placeholder(tf.bool)
    
    #Learning rate for Generator and Discriminator
    lr_g = tf.placeholder(tf.float32)
    lr_d = tf.placeholder(tf.float32)
    
    
    #Build the Graph
    fake_images, d_loss, g_loss, accuracy = model_loss(real_unsup_img, real_sup_img, noise_input, dropout_rate, 
                                                       training, labels)    
    d_opt, g_opt = model_optimization(d_loss, g_loss)
    
    
    #Execute Graph
    with tf.Session() as sess:
        
        sess.run(tf.global_variables_initializer())
        
        for i in range(epochs):
            
            #90% real images will be unsupervised
            unsup_indexes = np.random.randint(0, train_x_unsup.shape[0], size=int(0.9*batch_size))
            #10% of images will be supervised
            sup_indexes = np.random.randint(0, train_x_sup.shape[0], size=int(0.1*batch_size))
            
            
            train_feed_dict = {real_sup_img: train_x_sup[sup_indexes], 
                         labels: prepare_labels(train_y_sup[sup_indexes]), 
                         real_unsup_img: train_x_unsup[unsup_indexes], 
                         noise_input: np.random.uniform(-1.0, 1.0, size = (batch_size, 100)), 
                         dropout_rate: 0.5,
                         training: True,
                         lr_g: 1e-5, 
                         lr_d: 1e-5}
            
            _,_, dloss, gloss, acc = sess.run([d_opt, g_opt, d_loss, g_loss, accuracy], feed_dict=train_feed_dict)
            
            
            #Calculate Loss and Accuracy for Test Data
            if i % 200 == 0:
                
                print(i, '. Training Acc', acc, end='\t')
                train_Accs.append(acc)
                
                test_feed_dict = {real_sup_img: test_x, 
                         labels: prepare_labels(test_y), 
                         real_unsup_img: test_x, 
                         noise_input: np.random.uniform(-1.0, 1.0, size = (batch_size, 100)), 
                         dropout_rate: 0,
                         training: False}
                
                t_dloss, t_gloss, t_acc, fakeImgs = sess.run([d_loss, g_loss, accuracy, fake_images], 
                                                             feed_dict=test_feed_dict)
                
                test_Accs.append(t_acc)
                
                print('Test Acc', t_acc)
    return train_Accs, test_Accs

In [39]:
accs, val_accs = train(batch_size=32,epochs=20000)

  '`tf.layers.batch_normalization` is deprecated and '


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

0 . Training Acc 0.0	Test Acc 0.159
200 . Training Acc 1.0	Test Acc 0.6312
400 . Training Acc 0.6666667	Test Acc 0.6751
600 . Training Acc 1.0	Test Acc 0.6751
800 . Training Acc 1.0	Test Acc 0.8403
1000 . Training Acc 1.0	Test Acc 0.8104
1200 . Training Acc 1.0	Test Acc 0.838
1400 . Training Acc 1.0	Test Acc 0.7919
1600 . Training Acc 1.0	Test Acc 0.8792
1800 . Training Acc 1.0	Test Acc 0.8446
2000 . Training Acc 1.0	Test Acc 0.8888
2200 . Training Acc 1.0	Test Acc 0.8651
2400 . Training Acc 1.0	Test Acc 0.8885
2600 . Training Acc 1.0	Test Acc 0.8835
2800 . Training Acc 1.0	Test Acc 0.9048
3000 . Training Acc 1.0	Test Acc 0.8366
3200 . Training Acc 1.0	Test Acc 0.8142
3400 . Training Acc 1.0	Test Acc 0.9
3600 . Training Acc 1.0	Test Acc 0.8822
3800 . Training Acc 1.0	Test Acc 0.8608
4000 . Training

In [None]:
def plot_images(fake_images):
    
    plt.figure(figsize=(2.2, 2.2))
    num_images = 16
    
    image_size = 28
    rows = 4
    
    for i in range(num_images):
        plt.subplot(rows, rows, i + 1)
        image = np.reshape(fake_images[i], [image_size, image_size])
        image = (image + 1)/2
        plt.imshow(image, cmap='gray')
        plt.axis('off')
    plt.show()   