<a href="https://colab.research.google.com/github/ZhaoQii/IST597/blob/main/IST597_HW4_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
#np.random.seed(1234)

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_val = x_train[50000:60000]
x_train = x_train[0:50000]
y_val = y_train[50000:60000]
y_train = y_train[0:50000]
x_train = x_train.astype(np.float32).reshape(-1,28,28,1) / 255.0
x_val = x_val.astype(np.float32).reshape(-1,28,28,1) / 255.0
x_test = x_test.astype(np.float32).reshape(-1,28,28,1) / 255.0
y_train = tf.one_hot(y_train, depth=10)
y_val = tf.one_hot(y_val, depth=10)
y_test = tf.one_hot(y_test, depth=10)
print(x_train.shape)
print(x_test.shape)
print(x_val.shape)

batch_size = 128

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
train_dataset_full = train_dataset.shuffle(buffer_size=1024).batch(len(train_dataset))
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(batch_size)
print(len(train_dataset))
print(len(test_dataset))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(50000, 28, 28, 1)
(10000, 28, 28, 1)
(10000, 28, 28, 1)
391
79


In [3]:
y_train.shape

TensorShape([50000, 10])

In [13]:
class ImageRecognitionCNN(tf.keras.Model):
    
    def __init__(self, num_classes, BN_type, device='cpu:0', checkpoint_directory=None):
        ''' Define the parameterized layers used during forward-pass, the device
            where you would like to run the computation (GPU, TPU, CPU) on and the checkpoint
            directory.
            
            Args:
                num_classes: the number of labels in the network.
                device: string, 'cpu:n' or 'gpu:n' (n can vary). Default, 'cpu:0'.
                checkpoint_directory: the directory where you would like to save or 
                                      restore a model.
        ''' 
        super(ImageRecognitionCNN, self).__init__()
        
        # Initialize layers
        self.conv1 = tf.keras.layers.Conv2D(64, 3, padding='same', activation=None)
        self.conv2 = tf.keras.layers.Conv2D(64, 3,padding='same', activation=None)
        self.pool1 = tf.keras.layers.MaxPool2D()
        self.conv3 = tf.keras.layers.Conv2D(64, 3, padding='same', activation=None)
        self.conv4 = tf.keras.layers.Conv2D(64, 3, padding='same', activation=None)
        # self.pool2 = tf.keras.layers.MaxPool2D()
        # self.conv5 = tf.keras.layers.Conv2D(64, 3, padding='same', activation=None)
        # self.pool2 = tf.keras.layers.MaxPool2D()
        # self.conv6 = tf.keras.layers.Conv2D(64, 3, 2, padding='same', activation=None)
        # self.conv7 = tf.keras.layers.Conv2D(64, 1, padding='same', activation=None)
        self.conv8 = tf.keras.layers.Conv2D(num_classes, 1, padding='same', activation=None)
        
        # BN layers
        self.bn1 = tf.keras.layers.BatchNormalization(axis = 3)
        self.bn2 = tf.keras.layers.BatchNormalization(axis = 3)
        self.bn3 = tf.keras.layers.BatchNormalization(axis = 3)
        self.bn4 = tf.keras.layers.BatchNormalization(axis = 3)
        #self.bn5 = tf.keras.layers.BatchNormalization(axis = 3)

        #self.epsilon = epsilon
        self.BN_type = BN_type
        # Define the device 
        self.device = device
        
        # Define the checkpoint directory
        self.checkpoint_directory = checkpoint_directory
        self.acc = tf.keras.metrics.Accuracy()


    def predict(self, images, training):
        """ Predicts the probability of each class, based on the input sample.
            
            Args:
                images: 4D tensor. Either an image or a batch of images.
                training: Boolean. Either the network is predicting in
                          training mode or not.
        """
        if self.BN_type == 'before':
          x = self.conv1(images)
          x = self.bn1(x, training)
          x = self.pool1(x)
          x = self.conv2(x)
          x = self.bn2(x, training)
          x = tf.nn.relu(x)
          x = self.pool1(x)
          x = self.conv3(x)
          x = self.bn3(x, training)
          x = tf.nn.relu(x)
          x = self.pool1(x)
          x = self.conv4(x)
          x = self.bn4(x, training)
          x = tf.nn.relu(x)
          x = self.pool1(x)
          x = self.conv8(x)
        else:
          x = self.conv1(images)
          x = self.pool1(x)
          x = self.bn1(x, training)
          x = self.conv2(x)
          x = tf.nn.relu(x)
          x = self.bn2(x, training)
          x = self.pool1(x)
          x = self.conv3(x)
          x = tf.nn.relu(x)
          x = self.bn3(x, training)
          x = self.pool1(x)
          x = self.conv4(x)
          x = tf.nn.relu(x)
          x = self.bn4(x, training)
          x = self.pool1(x)
          x = self.conv8(x)
        
        #x = tf.nn.relu(x)
        #print(x.shape)
        x = tf.reshape(x, (-1, 1, 10))
        #x = tf.keras.layers.Flatten(x)
        return x

    def loss_fn(self, images, target, training):
        """ Defines the loss function used during 
            training.         
        """
        preds = self.predict(images, training)
        #print(preds.shape)
        #print(target.shape)
        loss = tf.nn.softmax_cross_entropy_with_logits(labels=target, logits=preds)
        return loss


    def grads_fn(self, images, target, training):
        """ Dynamically computes the gradients of the loss value
            with respect to the parameters of the model, in each
            forward pass.
        """
        with tf.GradientTape() as tape:
            loss = self.loss_fn(images, target, training)
        return tape.gradient(loss, self.variables)
    
    def restore_model(self):
        """ Function to restore trained model.
        """
        with tf.device(self.device):
            # Run the model once to initialize variables
            dummy_input = tf.constant(tf.zeros((1,48,48,1)))
            dummy_pred = self.predict(dummy_input, training=False)
            # Restore the variables of the model
            saver = tf.Saver(self.variables)
            saver.restore(tf.train.latest_checkpoint
                          (self.checkpoint_directory))
    
    def save_model(self, global_step=0):
        """ Function to save trained model.
        """
        tf.Saver(self.variables).save(self.checkpoint_directory, 
                                       global_step=global_step)   
    
    # def compute_accuracy(self, input_data):
    #     """ Compute the accuracy on the input data.
    #     """
    #     with tf.device(self.device):
    #         #acc = tf.metrics.Accuracy()
    #         for step ,(images, targets) in enumerate(input_data):
    #             # Predict the probability of each class
    #             #print(targets.shape)
    #             logits = self.predict(images, training=False)
    #             # Select the class with the highest probability
    #             #print(logits.shape)
    #             logits = tf.nn.softmax(logits)
    #             logits = tf.reshape(logits, [-1, 10])
    #             targets = tf.reshape(targets, [-1,10])
    #             preds = tf.argmax(logits, axis=1)
                
    #             #m1.update_state
    #             # Compute the accuracy
    #             #print(preds.shape)
    #             acc(tf.reshape(targets, preds))
    #     return acc

    def compute_accuracy_2(self, images, targets):
        """ Compute the accuracy on the input data.
        """
        with tf.device(self.device):
            
            # Predict the probability of each class
            logits = self.predict(images, training=False)
            # Select the class with the highest probability
            
            logits = tf.nn.softmax(logits)
            logits = tf.reshape(logits, [-1, 10])
            targets = tf.reshape(targets, [-1,10])
            preds = tf.argmax(logits, axis=1)
            goal = tf.argmax(targets, axis=1)
            self.acc.update_state(goal, preds)
            # Compute the accuracy
            result = self.acc.result().numpy()
        return result

  
    def fit_fc(self, training_data, eval_data, optimizer, num_epochs=500, 
            early_stopping_rounds=10, verbose=10, train_from_scratch=False):
        """ Function to train the model, using the selected optimizer and
            for the desired number of epochs. You can either train from scratch
            or load the latest model trained. Early stopping is used in order to
            mitigate the risk of overfitting the network.
            
            Args:
                training_data: the data you would like to train the model on.
                                Must be in the tf.data.Dataset format.
                eval_data: the data you would like to evaluate the model on.
                            Must be in the tf.data.Dataset format.
                optimizer: the optimizer used during training.
                num_epochs: the maximum number of iterations you would like to 
                            train the model.
                early_stopping_rounds: stop training if the loss on the eval 
                                       dataset does not decrease after n epochs.
                verbose: int. Specify how often to print the loss value of the network.
                train_from_scratch: boolean. Whether to initialize variables of the
                                    the last trained model or initialize them
                                    randomly.
        """ 
    
        if train_from_scratch==False:
            self.restore_model()
        
        # Initialize best loss. This variable will store the lowest loss on the
        # eval dataset.
        best_loss = 999
        
        # Initialize classes to update the mean loss of train and eval
        train_loss = tf.keras.metrics.Mean('train_loss')
        eval_loss = tf.keras.metrics.Mean('eval_loss')
        acc_train = tf.keras.metrics.Mean('train_acc')
        acc_val = tf.keras.metrics.Mean('val_acc')
        
        # Initialize dictionary to store the loss history
        self.history = {}
        self.history['train_loss'] = []
        self.history['eval_loss'] = []
        self.history['train_acc'] = []
        self.history['val_acc'] = []
        
        # Begin training
        with tf.device(self.device):
            for i in range(num_epochs):
                # Training with gradient descent
                #training_data_x = training_data.shuffle(buffer_size=1024).batch(128)
                for step, (images, target) in enumerate(training_data):
                    grads = self.grads_fn(images, target, True)
                    optimizer.apply_gradients(zip(grads, self.variables))
                    
                # Compute the loss on the training data after one epoch
                for step, (images, target) in enumerate(training_data):
                    loss = self.loss_fn(images, target, False)
                    accuracy = self.compute_accuracy_2(images,target)
                    acc_train(accuracy)
                    train_loss(loss)
                self.history['train_loss'].append(train_loss.result().numpy())
                self.history['train_acc'].append(acc_train.result().numpy())
                # Reset metrics
                train_loss.reset_states()
                acc_train.reset_states()
                
                # Compute the loss on the eval data after one epoch
                for step, (images, target) in enumerate(eval_data):
                    loss = self.loss_fn(images, target, False)
                    accuracy = self.compute_accuracy_2(images,target)
                    acc_val(accuracy)
                    eval_loss(loss)
                self.history['eval_loss'].append(eval_loss.result().numpy())
                self.history['val_acc'].append(acc_val.result().numpy())
                # Reset metrics
                eval_loss.reset_states()
                acc_val.reset_states()
                
                # Print train and eval losses
                if (i==0) | ((i+1)%verbose==0):
                    print('Train loss at epoch %d: ' %(i+1), self.history['train_loss'][-1])
                    print('Train Acc at epoch %d: ' %(i+1), self.history['train_acc'][-1])
                    
                    print('Eval loss at epoch %d: ' %(i+1), self.history['eval_loss'][-1])
                    print('Eval Acc at epoch %d: ' %(i+1), self.history['val_acc'][-1])

                # Check for early stopping
                if self.history['eval_loss'][-1]<best_loss:
                    best_loss = self.history['eval_loss'][-1]
                    count = early_stopping_rounds
                else:
                    count -= 1
                if count==0:
                    break

In [14]:
# Specify the path where you want to save/restore the trained variables.
checkpoint_directory = 'models_checkpoints/mnist/'

# Use the GPU if available.
device = 'gpu:0'

# Define optimizer.
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4)

# Instantiate model. This doesn't initialize the variables yet.
model = ImageRecognitionCNN(num_classes=10, BN_type = 'after', device=device, 
                              checkpoint_directory=checkpoint_directory)

#model = ImageRecognitionCNN(num_classes=7, device=device)

In [15]:
# Train model
model.fit_fc(train_dataset, val_dataset, optimizer, num_epochs=10, 
            early_stopping_rounds=2, verbose=2, train_from_scratch = True)

Train loss at epoch 1:  1.0407768
Train Acc at epoch 1:  0.6646767
Eval loss at epoch 1:  1.0061307
Eval Acc at epoch 1:  0.6706415
Train loss at epoch 2:  0.07905306
Train Acc at epoch 2:  0.755615
Eval loss at epoch 2:  0.089403614
Eval Acc at epoch 2:  0.81871593
Train loss at epoch 4:  0.03681089
Train Acc at epoch 4:  0.8912302
Eval loss at epoch 4:  0.05933886
Eval Acc at epoch 4:  0.9042841
Train loss at epoch 6:  0.02264533
Train Acc at epoch 6:  0.92816925
Eval loss at epoch 6:  0.05278081
Eval Acc at epoch 6:  0.9338139
Train loss at epoch 8:  0.016893867
Train Acc at epoch 8:  0.94570744
Eval loss at epoch 8:  0.05219248
Eval Acc at epoch 8:  0.9488292


KeyboardInterrupt: ignored

## Performance

In [17]:
num_trials = 3

test_accuracy = np.zeros([num_trials, 2]) # 0 for before 1 for after
used_time = np.zeros([num_trials, 2])

# Set number of epochs
NUM_EPOCHS = 10

In [20]:
for t in range(num_trials):
  print(t)
  ############### before
  # Instantiate model. This doesn't initialize the variables yet.
  model = ImageRecognitionCNN(num_classes=10, BN_type = 'before', device=device, 
                                checkpoint_directory=checkpoint_directory)
  time_start = time.time()
  # Train model
  model.fit_fc(train_dataset, val_dataset, optimizer, num_epochs=10, 
              early_stopping_rounds=2, verbose=2, train_from_scratch = True)
  time_taken = time.time() - time_start
  used_time[t, 0] = time_taken
  test_accuracy[t, 0] = model.compute_accuracy_2(x_test, y_test)

0
Train loss at epoch 1:  1.2806253
Train Acc at epoch 1:  0.40236956
Eval loss at epoch 1:  1.2596962
Eval Acc at epoch 1:  0.3948096
Train loss at epoch 2:  0.060730226
Train Acc at epoch 2:  0.5567807
Eval loss at epoch 2:  0.0651087
Eval Acc at epoch 2:  0.6779345
Train loss at epoch 4:  0.033470202
Train Acc at epoch 4:  0.81424147
Eval loss at epoch 4:  0.048535645
Eval Acc at epoch 4:  0.83767295
Train loss at epoch 6:  0.021034742
Train Acc at epoch 6:  0.8804386
Eval loss at epoch 6:  0.04033547
Eval Acc at epoch 6:  0.89036673
Train loss at epoch 8:  0.014632783
Train Acc at epoch 8:  0.9115222
Eval loss at epoch 8:  0.036745653
Eval Acc at epoch 8:  0.91699153
Train loss at epoch 10:  0.010495742
Train Acc at epoch 10:  0.92968404
Eval loss at epoch 10:  0.036776874
Eval Acc at epoch 10:  0.93313354
1
Train loss at epoch 1:  1.6208779
Train Acc at epoch 1:  0.489605
Eval loss at epoch 1:  1.5967416
Eval Acc at epoch 1:  0.48844612
Train loss at epoch 2:  0.06620212
Train Acc

In [23]:
for t in range(num_trials):
  print(t)
  ############### before
  # Instantiate model. This doesn't initialize the variables yet.
  model = ImageRecognitionCNN(num_classes=10, BN_type = 'after', device=device, 
                                checkpoint_directory=checkpoint_directory)
  time_start = time.time()
  # Train model
  model.fit_fc(train_dataset, val_dataset, optimizer, num_epochs=10, 
              early_stopping_rounds=2, verbose=2, train_from_scratch = True)
  time_taken = time.time() - time_start
  used_time[t, 1] = time_taken
  test_accuracy[t, 1] = model.compute_accuracy_2(x_test, y_test)

0
Train loss at epoch 1:  0.97584784
Train Acc at epoch 1:  0.6533267
Eval loss at epoch 1:  0.9594147
Eval Acc at epoch 1:  0.65066123
Train loss at epoch 2:  0.048765365
Train Acc at epoch 2:  0.743582
Eval loss at epoch 2:  0.06285479
Eval Acc at epoch 2:  0.81219137
Train loss at epoch 4:  0.023978828
Train Acc at epoch 4:  0.889857
Eval loss at epoch 4:  0.050114673
Eval Acc at epoch 4:  0.90345985
Train loss at epoch 6:  0.012978662
Train Acc at epoch 6:  0.9286336
Eval loss at epoch 6:  0.047538847
Eval Acc at epoch 6:  0.93443334
Train loss at epoch 8:  0.0065201917
Train Acc at epoch 8:  0.94695634
Eval loss at epoch 8:  0.04191241
Eval Acc at epoch 8:  0.9501908
Train loss at epoch 10:  0.0046346663
Train Acc at epoch 10:  0.9576313
Eval loss at epoch 10:  0.0454509
Eval Acc at epoch 10:  0.9596581
1
Train loss at epoch 1:  1.1928918
Train Acc at epoch 1:  0.6541593
Eval loss at epoch 1:  1.1623669
Eval Acc at epoch 1:  0.65033215
Train loss at epoch 2:  0.04532225
Train Acc 

In [24]:
used_time

array([[234.88589525, 228.45673037],
       [217.97964382, 220.13404584],
       [219.99534798, 195.80568457]])

In [25]:
test_accuracy

array([[0.93451309, 0.96036559],
       [0.94404095, 0.96020001],
       [0.90786391, 0.92882729]])