In [41]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Layer

np.random.seed(1234)
eps = 1e-8

In [42]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_val = x_train[50000:60000]
x_train = x_train[0:50000]
y_val = y_train[50000:60000]
y_train = y_train[0:50000]
x_train = x_train.astype(np.float32).reshape(50000,28*28) / 255.0
x_val = x_val.astype(np.float32).reshape(10000,28*28) / 255.0
x_test = x_test.astype(np.float32).reshape(10000,28*28) / 255.0
y_train = tf.one_hot(y_train, depth=10)
y_val = tf.one_hot(y_val, depth=10)
y_test = tf.one_hot(y_test, depth=10)
print(x_train.shape)
print(x_test.shape)
print(x_val.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(128)
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(128)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(128)
print(len(train_dataset))
print(len(test_dataset))

(50000, 784)
(10000, 784)
(10000, 784)
391
79


In [43]:
size_input = 784
size_hidden1 = 128
size_hidden2 = 128
size_hidden3 = 128
size_output = 10

In [44]:
class BatchNormalization(Layer):
  def __init__(self,units):
    super(BatchNormalization,self).__init__()
    self.running_mean = tf.zeros((1,units))
    self.running_var = tf.zeros((1,units))
    self.units = units
    b_init = tf.zeros_initializer()
    self.bias = tf.Variable(initial_value=b_init(shape=(1,units), dtype= "float32"),trainable=True)
    g_init = tf.ones_initializer()
    self.gamma = tf.Variable(initial_value = g_init(shape = (1, units), dtype = "float32"), trainable=True)
    self.weighted_mean_ratio = 0.9
    self.train = True

  def train_off(self):
    self.train = False

  def call(self, inputs):
    if self.train:
      mean = tf.reduce_mean(inputs,0)
      mean = tf.reshape(mean, (1,self.units))
      var = tf.reduce_mean((inputs-mean)**2,0)
      var = tf.reshape(var, (1,self.units))
      is_running_mean_empty = tf.math.equal(tf.zeros((1,self.units)),self.running_mean)
      is_running_mean_empty = tf.reduce_all(is_running_mean_empty)
      if is_running_mean_empty:
        self.running_mean = tf.identity(mean)
        self.running_var = tf.identity(var)
      else:
        self.running_mean = self.weighted_mean_ratio*self.running_mean + (1-self.weighted_mean_ratio)*mean
        self.running_var = self.weighted_mean_ratio*self.running_var + (1-self.weighted_mean_ratio)*var
    else: 
      mean = tf.identity(self.running_mean)
      var = tf.identity(self.running_var)
    var += eps
    std_dev = tf.sqrt(var)
    std_x = (inputs-mean)/std_dev
    return self.gamma*std_x + self.bias




In [45]:
class ImageRecognitionMLP(tf.keras.Model):
    
    def __init__(self, num_classes, device='cpu:0', checkpoint_directory=None):
        ''' Define the parameterized layers used during forward-pass, the device
            where you would like to run the computation (GPU, TPU, CPU) on and the checkpoint
            directory.
            
            Args:
                num_classes: the number of labels in the network.
                device: string, 'cpu:n' or 'gpu:n' (n can vary). Default, 'cpu:0'.
                checkpoint_directory: the directory where you would like to save or 
                                      restore a model.
        ''' 
        super(ImageRecognitionMLP, self).__init__()
        
        # Initialize layers

        self.h1 = tf.keras.layers.Dense(size_hidden1, input_shape = (size_input,),activation = None)
        self.h2 = tf.keras.layers.Dense(size_hidden2, activation = None)
        self.h3 = tf.keras.layers.Dense(size_hidden3, activation = None)
        self.ot = tf.keras.layers.Dense(size_output, activation = None)
        self.bn1 = BatchNormalization(128)
        self.bn2 = BatchNormalization(128)
        self.bn3 = BatchNormalization(128)
        
        # Define the device 
        self.device = device
        
        # Define the checkpoint directory
        self.checkpoint_directory = checkpoint_directory
        self.acc = tf.keras.metrics.Accuracy()


    def predict(self, images, training):
        """ Predicts the probability of each class, based on the input sample.
            
            Args:
                images: 4D tensor. Either an image or a batch of images.
                training: Boolean. Either the network is predicting in
                          training mode or not.
        """
        
        x = self.h1(images)
        x = self.bn1(x)
        x = tf.nn.relu(x)
        #x = self.bn1(x)
        x = self.h2(x)
        x = self.bn2(x)
        x = tf.nn.relu(x)
        #x = self.bn2(x)
        
        x = self.h3(x)
        x = self.bn3(x)
        x = tf.nn.relu(x)
        #x = self.bn3(x)
        x = self.ot(x)
        
        
        #x = tf.nn.relu(x)
        #print(x.shape)
        x = tf.reshape(x, (-1, 1, 10))
        #x = tf.keras.layers.Flatten(x)
        return x



    def loss_fn(self, images, target, training):
        """ Defines the loss function used during 
            training.         
        """
        preds = self.predict(images, training)
        loss = tf.nn.softmax_cross_entropy_with_logits(labels=target, logits=preds)
        return loss


    def grads_fn(self, images, target, training):
        """ Dynamically computes the gradients of the loss value
            with respect to the parameters of the model, in each
            forward pass.
        """
        with tf.GradientTape() as tape:
            loss = self.loss_fn(images, target, training)
        return tape.gradient(loss, self.variables)
    
    def restore_model(self):
        """ Function to restore trained model.
        """
        with tf.device(self.device):
            # Run the model once to initialize variables
            dummy_input = tf.constant(tf.zeros((1,48,48,1)))
            dummy_pred = self.predict(dummy_input, training=False)
            # Restore the variables of the model
            saver = tf.Saver(self.variables)
            saver.restore(tf.train.latest_checkpoint
                          (self.checkpoint_directory))
    
    def save_model(self, global_step=0):
        """ Function to save trained model.
        """
        tf.Saver(self.variables).save(self.checkpoint_directory, 
                                       global_step=global_step)   
    
    # def compute_accuracy(self, input_data):
    #     """ Compute the accuracy on the input data.
    #     """
    #     with tf.device(self.device):
    #         #acc = tf.metrics.Accuracy()
    #         for step ,(images, targets) in enumerate(input_data):
    #             # Predict the probability of each class
    #             #print(targets.shape)
    #             logits = self.predict(images, training=False)
    #             # Select the class with the highest probability
    #             #print(logits.shape)
    #             logits = tf.nn.softmax(logits)
    #             logits = tf.reshape(logits, [-1, 10])
    #             targets = tf.reshape(targets, [-1,10])
    #             preds = tf.argmax(logits, axis=1)
                
    #             #m1.update_state
    #             # Compute the accuracy
    #             #print(preds.shape)
    #             acc(tf.reshape(targets, preds))
    #     return acc

    def compute_accuracy_2(self, images, targets):
        """ Compute the accuracy on the input data.
        """
        with tf.device(self.device):
            
            # Predict the probability of each class
            logits = self.predict(images, training=False)
            # Select the class with the highest probability
            
            logits = tf.nn.softmax(logits)
            logits = tf.reshape(logits, [-1, 10])
            targets = tf.reshape(targets, [-1,10])
            preds = tf.argmax(logits, axis=1)
            goal = tf.argmax(targets, axis=1)
            self.acc.update_state(goal, preds)
            # Compute the accuracy
            result = self.acc.result().numpy()
        return result

  
    def fit_fc(self, training_data, eval_data, optimizer, num_epochs=500, 
            early_stopping_rounds=10, verbose=10, train_from_scratch=False):
        """ Function to train the model, using the selected optimizer and
            for the desired number of epochs. You can either train from scratch
            or load the latest model trained. Early stopping is used in order to
            mitigate the risk of overfitting the network.
            
            Args:
                training_data: the data you would like to train the model on.
                                Must be in the tf.data.Dataset format.
                eval_data: the data you would like to evaluate the model on.
                            Must be in the tf.data.Dataset format.
                optimizer: the optimizer used during training.
                num_epochs: the maximum number of iterations you would like to 
                            train the model.
                early_stopping_rounds: stop training if the loss on the eval 
                                       dataset does not decrease after n epochs.
                verbose: int. Specify how often to print the loss value of the network.
                train_from_scratch: boolean. Whether to initialize variables of the
                                    the last trained model or initialize them
                                    randomly.
        """ 
    
        if train_from_scratch==False:
            self.restore_model()
        
        # Initialize best loss. This variable will store the lowest loss on the
        # eval dataset.
        best_loss = 999
        
        # Initialize classes to update the mean loss of train and eval
        train_loss = tf.keras.metrics.Mean('train_loss')
        eval_loss = tf.keras.metrics.Mean('eval_loss')
        acc_train = tf.keras.metrics.Mean('train_acc')
        acc_val = tf.keras.metrics.Mean('val_acc')
        
        # Initialize dictionary to store the loss history
        self.history = {}
        self.history['train_loss'] = []
        self.history['eval_loss'] = []
        self.history['train_acc'] = []
        self.history['val_acc'] = []
        
        # Begin training
        with tf.device(self.device):
            for i in range(num_epochs):
                # Training with gradient descent
                #training_data_x = training_data.shuffle(buffer_size=1024).batch(128)
                for step, (images, target) in enumerate(training_data):
                    grads = self.grads_fn(images, target, True)
                    optimizer.apply_gradients(zip(grads, self.variables))
                    
                # Compute the loss on the training data after one epoch
                for step, (images, target) in enumerate(training_data):
                    loss = self.loss_fn(images, target, False)
                    accuracy = self.compute_accuracy_2(images,target)
                    acc_train(accuracy)
                    train_loss(loss)
                self.history['train_loss'].append(train_loss.result().numpy())
                self.history['train_acc'].append(acc_train.result().numpy())
                # Reset metrics
                train_loss.reset_states()
                acc_train.reset_states()
                
                # Compute the loss on the eval data after one epoch
                for step, (images, target) in enumerate(eval_data):
                    loss = self.loss_fn(images, target, False)
                    accuracy = self.compute_accuracy_2(images,target)
                    acc_val(accuracy)
                    eval_loss(loss)
                self.history['eval_loss'].append(eval_loss.result().numpy())
                self.history['val_acc'].append(acc_val.result().numpy())
                # Reset metrics
                eval_loss.reset_states()
                acc_val.reset_states()
                
                print('Train loss at epoch %d: ' %(i+1), self.history['train_loss'][-1])
                print('Train Acc at epoch %d: ' %(i+1), self.history['train_acc'][-1])
                    
                print('Eval loss at epoch %d: ' %(i+1), self.history['eval_loss'][-1])
                print('Eval Acc at epoch %d: ' %(i+1), self.history['val_acc'][-1])

                # Check for early stopping
                if self.history['eval_loss'][-1]<best_loss:
                    best_loss = self.history['eval_loss'][-1]
                    count = early_stopping_rounds
                else:
                    count -= 1
                if count==0:
                    break

    def test(self, test_dataset):
      self.bn1.train_off()
      self.bn2.train_off()
      self.bn3.train_off()
      acc_test = tf.keras.metrics.Mean('test_acc')
      loss_test = tf.keras.metrics.Mean('test_loss')

      self.history['test_loss'] = []
      self.history['test_acc'] = []
      for step, (images, target) in enumerate(test_dataset):
          loss = self.loss_fn(images, target, False)
          accuracy = self.compute_accuracy_2(images,target)
          acc_test(accuracy)
          loss_test(loss)
      self.history['test_loss'].append(loss_test.result().numpy())
      self.history['test_acc'].append(acc_test.result().numpy())
      print('Test Loss: ', self.history['test_loss'][-1])
      print('Test Accuracy: ', self.history['test_acc'][-1])



In [46]:
# Specify the path where you want to save/restore the trained variables.
checkpoint_directory = 'models_checkpoints/mnist/'

# Use the GPU if available.
device = 'gpu:0'

# Define optimizer.
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4)

# Instantiate model. This doesn't initialize the variables yet.
model = ImageRecognitionMLP(num_classes=10, device=device, 
                              checkpoint_directory=checkpoint_directory)

#model = ImageRecognitionCNN(num_classes=7, device=device)

In [47]:
# Train model
model.fit_fc(train_dataset, val_dataset, optimizer, num_epochs=10, 
          early_stopping_rounds=2, verbose=2, train_from_scratch=True)

Train loss at epoch 1:  0.5390945
Train Acc at epoch 1:  0.8226167
Eval loss at epoch 1:  0.5547092
Eval Acc at epoch 1:  0.8233611
Train loss at epoch 2:  0.4245612
Train Acc at epoch 2:  0.8309889
Eval loss at epoch 2:  0.45168018
Eval Acc at epoch 2:  0.8373636
Train loss at epoch 3:  0.37472156
Train Acc at epoch 3:  0.84235173
Eval loss at epoch 3:  0.40650636
Eval Acc at epoch 3:  0.8473111
Train loss at epoch 4:  0.34051797
Train Acc at epoch 4:  0.8512551
Eval loss at epoch 4:  0.3836054
Eval Acc at epoch 4:  0.85505486
Train loss at epoch 5:  0.317355
Train Acc at epoch 5:  0.858084
Eval loss at epoch 5:  0.36795294
Eval Acc at epoch 5:  0.86110216
Train loss at epoch 6:  0.2982104
Train Acc at epoch 6:  0.863595
Eval loss at epoch 6:  0.3557239
Eval Acc at epoch 6:  0.8661024
Train loss at epoch 7:  0.28064653
Train Acc at epoch 7:  0.8683133
Eval loss at epoch 7:  0.34781283
Eval Acc at epoch 7:  0.8705913
Train loss at epoch 8:  0.2669343
Train Acc at epoch 8:  0.872464
Eva

In [48]:
model.test(test_dataset)

Test Loss:  0.3492569
Test Accuracy:  0.8810738
