<a href="https://colab.research.google.com/github/consequencesunintended/Pseudo-Labelling/blob/master/Pseudo-Labelling-MNIST-1st.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Pseudo Labelling on MNIST dataset**

References:

*1 - Pseudo-Label : The Simple and Efficient Semi-Supervised Learning
Method for Deep Neural Networks, Dong-Hyun Lee
http://deeplearning.net/wp-content/uploads/2013/03/pseudo_label_final.pdf*

*2 - Naive semi-supervised deep learning using pseudo-label, Zhun Li, ByungSoo Ko & Ho-Jin Choi
https://link.springer.com/article/10.1007/s12083-018-0702-9*

In [0]:
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Softmax, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras import losses
from tensorflow.keras import Sequential
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt

In [0]:
from keras.datasets import mnist
import numpy as np
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [0]:
classifier = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(10)
])
classifier.build()

In [0]:
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape( (len(x_train),28,28,1))
x_test = x_test.reshape( (len(x_test),28,28,1))

# **Split the data**
The idea here is to have a small labelled dataset and a large unlablled set to help improve increasing the accuracy of the model in scenario were we dont have access to large labelled set. We also set aside some data for evaluation test.

In [0]:
data_0 = [ x for x,y in zip(x_train,y_train) if y == 0 ] 
data_1 = [ x for x,y in zip(x_train,y_train) if y == 1 ] 
data_2 = [ x for x,y in zip(x_train,y_train) if y == 2 ] 
data_3 = [ x for x,y in zip(x_train,y_train) if y == 3 ] 
data_4 = [ x for x,y in zip(x_train,y_train) if y == 4 ] 
data_5 = [ x for x,y in zip(x_train,y_train) if y == 5 ] 
data_6 = [ x for x,y in zip(x_train,y_train) if y == 6 ] 
data_7 = [ x for x,y in zip(x_train,y_train) if y == 7 ] 
data_8 = [ x for x,y in zip(x_train,y_train) if y == 8 ] 
data_9 = [ x for x,y in zip(x_train,y_train) if y == 9 ] 

In [0]:
source = 0
target = 10
x_train_shorten = data_0[source:target] + data_1[source:target] + data_2[source:target] + data_3[source:target] + data_4[source:target] + data_5[source:target] + data_6[source:target] + data_7[source:target] + data_8[source:target] + data_9[source:target]
y_train_shorten = list(np.zeros(target)) + list(np.ones(target)) + list( np.ones(target) * 2 ) + list( np.ones(target) * 3 ) + list( np.ones(target) * 4 ) + list( np.ones(target) * 5 ) + list( np.ones(target) * 6 ) + list( np.ones(target) * 7 ) + list( np.ones(target) * 8 ) + list( np.ones(target) * 9 )

x_train_shorten = np.array( x_train_shorten )
y_train_shorten = np.array( y_train_shorten )

In [0]:
source = 10
target = 250
x_train_unlabelled = data_0[source:target] + data_1[source:target] + data_2[source:target] + data_3[source:target] + data_4[source:target] + data_5[source:target] + data_6[source:target] + data_7[source:target] + data_8[source:target] + data_9[source:target]
y_train_unlabelled = list(np.zeros(target - source)) + list(np.ones(target - source)) + list( np.ones(target - source) * 2 ) + list( np.ones(target - source) * 3 ) + list( np.ones(target - source) * 4 ) + list( np.ones(target - source) * 5 ) + list( np.ones(target - source) * 6 ) + list( np.ones(target - source) * 7 ) + list( np.ones(target - source) * 8 ) + list( np.ones(target - source) * 9 )

x_train_unlabelled = np.array( x_train_unlabelled )
y_train_unlabelled = np.array( y_train_unlabelled )

In [0]:
source = 250
target = 450
x_eval_shorten = data_0[source:target] + data_1[source:target] + data_2[source:target] + data_3[source:target] + data_4[source:target] + data_5[source:target] + data_6[source:target] + data_7[source:target] + data_8[source:target] + data_9[source:target]
y_eval_shorten = list(np.zeros(target - source)) + list(np.ones(target - source)) + list( np.ones(target - source) * 2 ) + list( np.ones(target - source) * 3 ) + list( np.ones(target - source) * 4 ) + list( np.ones(target - source) * 5 ) + list( np.ones(target - source) * 6 ) + list( np.ones(target - source) * 7 ) + list( np.ones(target - source) * 8 ) + list( np.ones(target - source) * 9 )

x_eval_shorten = np.array( x_eval_shorten )
y_eval_shorten = np.array( y_eval_shorten )

In [0]:
classifier.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# **Define the datasets**

In [0]:
batch_size = 100

train_dataset = tf.data.Dataset.from_tensor_slices((x_train_shorten,y_train_shorten))
train_dataset = train_dataset.shuffle(1000).batch(batch_size)

pseudo_dataset = tf.data.Dataset.from_tensor_slices((x_train_unlabelled))
pseudo_dataset = pseudo_dataset.shuffle(1000).batch(batch_size)

eval_dataset = tf.data.Dataset.from_tensor_slices((x_eval_shorten,y_eval_shorten))
eval_dataset = eval_dataset.shuffle(1000).batch(batch_size)

# **Pre-train the Model with labelled data**
Firstly we pre-train the model here with 30 epochs on only labelled data, and achieving a maximum accuracy of around 70 percent.

In [0]:
classifier.fit(train_dataset, validation_data=eval_dataset, batch_size=100, epochs=30)

## **Train the model with unlabelled dataset**
Train the model with unlabelled dataset and fine tune it by training the model for one epoch on labelled dataset. 

In [0]:
avg_main_loss = tf.keras.metrics.Mean(name='avg_main_loss', dtype=tf.float32)
pseudo_steps = int(x_train_unlabelled.shape[0] / batch_size )
eval_steps = int(x_eval_shorten.shape[0] / batch_size )

epoch = 30
step = 100

images, labels = next(iter(train_dataset))
eval_images, eval_labels = next(iter(eval_dataset))
pseudo_images = next(iter(pseudo_dataset))
pseudo_labels = classifier.predict(pseudo_images)
pseudo_labels = np.array( [ np.argmax(x) for x in pseudo_labels])

classifier_optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

T1 = 100
T2 = 700
af = 3
step_value = 1.0

def alpha_weight(step):
    if step < T1:
        return 0.0
    elif step > T2:
        return af
    else:
         return ((step-T1) / (T2-T1))*af

for epoch_idx in range( epoch ):

  print_info = True

  for batch_idx in range(pseudo_steps):

    with tf.GradientTape() as pseudo_tape:
        generated_images = classifier(images, training=True)
        generated_pesudo_images = classifier(pseudo_images, training=False)


        main_loss = loss_fn(labels, generated_images)
        psuedo_loss = alpha_weight(step) + loss_fn( pseudo_labels, generated_pesudo_images )
        loss = psuedo_loss + main_loss

    images, labels = next(iter(train_dataset))
    
    pseudo_images = next(iter(pseudo_dataset))
    pseudo_labels = classifier.predict(pseudo_images)
    pseudo_labels = np.array( [ np.argmax(x) for x in pseudo_labels])

    gradients_of_classifier = pseudo_tape.gradient(loss, classifier.trainable_variables)
    classifier_optimizer.apply_gradients(zip(gradients_of_classifier, classifier.trainable_variables))

    if ( print_info ): 
      print("Epoch {} - Step {}".format( epoch_idx + 1, step ) )
      classifier.evaluate(eval_dataset)

    avg_main_loss.update_state(loss)
    main_loss = avg_main_loss.result()


    print_info = False

    step += 1