<a href="https://colab.research.google.com/github/consequencesunintended/Pseudo-Labelling/blob/master/Pseudo_Labelling_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Pseudo Labelling on MNIST dataset**

References:

*1 - Pseudo-Label : The Simple and Efficient Semi-Supervised Learning
Method for Deep Neural Networks, Dong-Hyun Lee
http://deeplearning.net/wp-content/uploads/2013/03/pseudo_label_final.pdf*

*2 - Naive semi-supervised deep learning using pseudo-label, Zhun Li, ByungSoo Ko & Ho-Jin Choi
https://link.springer.com/article/10.1007/s12083-018-0702-9*

In [0]:
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Softmax, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras import losses
from tensorflow.keras import Sequential
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt

In [0]:
from keras.datasets import mnist
import numpy as np
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [0]:
classifier = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(10)
])
classifier.build()

In [0]:
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape( (len(x_train),28,28,1))
x_test = x_test.reshape( (len(x_test),28,28,1))

# **Split the data**
The idea here is to have a small labelled dataset and a large unlablled set to help improve increasing the accuracy of the model in scenario were we dont have access to large labelled set. We also set aside some data for evaluation test.

In [0]:
x_train_shorten = x_train[:100]
y_train_shorten = y_train[:100]

x_train_unlabelled = x_train[100:2500]
y_train_unlabelled = y_train[100:2500]

x_eval_shorten = x_train[2600:4500]
y_eval_shorten = y_train[2600:4500]

In [0]:
classifier.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# **Pre-train the Model with labelled data**
Firstly we pre-train the model here with 30 epochs on only labelled data, and achieving a maximum accuracy of about 68 percent.

In [0]:
classifier.fit(x_train_shorten, y_train_shorten, validation_data=(x_eval_shorten, y_eval_shorten), batch_size=100, epochs=30)

# **Define the datasets**

In [0]:
batch_size = 100

train_dataset = tf.data.Dataset.from_tensor_slices((x_train_shorten,y_train_shorten))
train_dataset = train_dataset.shuffle(100).batch(batch_size)

pseudo_dataset = tf.data.Dataset.from_tensor_slices((x_train_unlabelled))
pseudo_dataset = pseudo_dataset.shuffle(100).batch(batch_size)

eval_dataset = tf.data.Dataset.from_tensor_slices((x_eval_shorten,y_eval_shorten))
eval_dataset = eval_dataset.shuffle(100).batch(batch_size)

# **Train the model with unlabelled dataset**
Train the model with unlabelled dataset and fine tune it by training the model for one epoch on labelled dataset. 

In [0]:
avg_main_loss = tf.keras.metrics.Mean(name='avg_main_loss', dtype=tf.float32)
pseudo_steps = int(x_train_unlabelled.shape[0] / batch_size )

epoch = 100
step = 100

images, labels = next(iter(train_dataset))
eval_images, eval_labels = next(iter(eval_dataset))
pseudo_images = next(iter(pseudo_dataset))
pseudo_labels = classifier.predict(pseudo_images)
pseudo_labels = np.array( [ np.argmax(x) for x in pseudo_labels])

classifier_optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)


for epoch_idx in range( epoch ):

  print_info = True

  for batch_idx in range(pseudo_steps):

    with tf.GradientTape() as pseudo_tape:
        generated_images = classifier(images, training=True)
        generated_pesudo_images = classifier(pseudo_images, training=False)


        main_loss = loss_fn(labels, generated_images)
        psuedo_loss = loss_fn( pseudo_labels, generated_pesudo_images )
        loss = psuedo_loss + main_loss

    images, labels = next(iter(train_dataset))
    eval_images, eval_labels = next(iter(eval_dataset))
    pseudo_images = next(iter(pseudo_dataset))
    pseudo_labels = classifier.predict(pseudo_images)
    pseudo_labels = np.array( [ np.argmax(x) for x in pseudo_labels])

    gradients_of_classifier = pseudo_tape.gradient(loss, classifier.trainable_variables)
    classifier_optimizer.apply_gradients(zip(gradients_of_classifier, classifier.trainable_variables))

    acc = tf.reduce_mean(tf.metrics.sparse_categorical_accuracy(tf.constant(eval_labels), classifier(eval_images))).numpy()

    avg_main_loss.update_state(loss)
    main_loss = avg_main_loss.result()

    if ( print_info ):
      print("Epoch {} :::::: Loss: {} accuracy: {} ".format( epoch_idx + 1, main_loss, acc ) )

    print_info = False

    step += 1