## CNN for MNIST

In [None]:
# Import libraries

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Input,Conv2D,Dense,Flatten,Dropout
from tensorflow.keras.models import Model

In [None]:
# Load in the data

mnist = tf.keras.datasets.mnist
(x_train,y_train),(x_test,y_test) = mnist.load_data()

x_train,x_test = x_train/255.0, x_test/255.0        # normlaize train and test data(as the original values were between 0 and 255, we scale them between 0 and 1)

print("x_train.shape: ", x_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
x_train.shape:  (60000, 28, 28)


x_train and x_test contain 28 x 28 grayscale images which is a 2D input.
For a CNN, we need 3D for convolution operation(H x W x C)

In [None]:
# data is only 2D
# CNN expects 3D input

x_train = np.expand_dims(x_train, axis = -1)
x_test = np.expand_dims(x_test, axis = -1)

print("x_train.shape: ", x_train.shape)


x_train.shape:  (60000, 28, 28, 1)


In [None]:
# Number of classes

K = len(set(y_train))
print("Number of classes: ", K)

Number of classes:  10


In [None]:
# Build the model using functional API

i = Input(x_train[0].shape)
x = Conv2D(32, (3,3), activation='relu')(i)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Conv2D(64,(3,3), activation='relu')(x)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Conv2D(128,(3,3), activation='relu')(x)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Flatten()(x)                              # to convert the images into a feature vector
x = Dropout(0.2)(x)                           # to regularize
x = Dense(512, activation='relu')(x)
x = Dropout(0.2)(x) 
x = Dense(K,activation='softmax')(x)

model = Model(i,x)


In [None]:
# Prepare the training dataset.
batch_size = 128
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(128)

In [None]:
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()

In [None]:
epochs = 3
for epoch in range(epochs):
  print('Start of epoch %d' % (epoch,))

  # Iterate over the batches of the dataset.
  for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      logits = model(x_batch_train, training=True)
      loss_value = loss_fn(y_batch_train, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    # Update training metric.
    train_acc_metric(y_batch_train, logits)

    # Log every 200 batches.
    if step % 200 == 0:
        print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
        print('Seen so far: %s samples' % ((step + 1) * 128))

  # Display metrics at the end of each epoch.
  train_acc = train_acc_metric.result()
  print('Training acc over epoch: %s' % (float(train_acc),))
  # Reset training metrics at the end of each epoch
  train_acc_metric.reset_states()

  # Run a validation loop at the end of each epoch.
  for x_batch_val, y_batch_val in val_dataset:
    val_logits = model(x_batch_val)
    # Update val metrics
    val_acc_metric(y_batch_val, val_logits)
  val_acc = val_acc_metric.result()
  val_acc_metric.reset_states()
  print('Validation acc: %s' % (float(val_acc),))

Start of epoch 0
Training loss (for one batch) at step 0: 2.3017172813415527
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.5152587890625
Seen so far: 25728 samples
Training loss (for one batch) at step 400: 1.517162561416626
Seen so far: 51328 samples
Training acc over epoch: 0.8903999924659729
Validation acc: 0.9750000238418579
Start of epoch 1
Training loss (for one batch) at step 0: 1.478553295135498
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.473681092262268
Seen so far: 25728 samples
Training loss (for one batch) at step 400: 1.5124603509902954
Seen so far: 51328 samples
Training acc over epoch: 0.9694333076477051
Validation acc: 0.9721999764442444
Start of epoch 2
Training loss (for one batch) at step 0: 1.4767999649047852
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.4797557592391968
Seen so far: 25728 samples
Training loss (for one batch) at step 400: 1.5001823902130127
Seen so far: 51328 samples
Training

## Task 2: CNN for Fashion MNIST

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Input,Conv2D,Dense,Flatten,Dropout
from tensorflow.keras.models import Model

In [None]:
# Load in the data

fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train,y_train),(x_test,y_test) = fashion_mnist.load_data()

x_train,x_test = x_train/255.0, x_test/255.0        

print("x_train.shape: ", x_train.shape)

x_train.shape:  (60000, 28, 28)


In [None]:
# data is only 2D
# CNN expects 3D input

x_train = np.expand_dims(x_train, axis = -1)
x_test = np.expand_dims(x_test, axis = -1)

print("x_train.shape: ", x_train.shape)

x_train.shape:  (60000, 28, 28, 1)


In [None]:
# Number of classes

K1 = len(set(y_train))
print("Number of classes: ", K1)

Number of classes:  10


In [None]:
# Build the model using functional API

i = Input(x_train[0].shape)
x = Conv2D(32, (3,3), activation='relu')(i)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Conv2D(64,(3,3), activation='relu')(x)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Conv2D(128,(3,3), activation='relu')(x)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Flatten()(x)                              
x = Dropout(0.2)(x)                          
x = Dense(512, activation='relu')(x)
x = Dropout(0.2)(x) 
x = Dense(K1,activation='softmax')(x)

model = Model(i,x)


In [None]:
# Prepare the training dataset.
batch_size = 128
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(128)

In [None]:
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()

In [None]:
epochs = 3
for epoch in range(epochs):
  print('Start of epoch %d' % (epoch,))

  # Iterate over the batches of the dataset.
  for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      logits = model(x_batch_train, training=True)
      loss_value = loss_fn(y_batch_train, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    # Update training metric.
    train_acc_metric(y_batch_train, logits)

    # Log every 200 batches.
    if step % 200 == 0:
        print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
        print('Seen so far: %s samples' % ((step + 1) * 128))

  # Display metrics at the end of each epoch.
  train_acc = train_acc_metric.result()
  print('Training acc over epoch: %s' % (float(train_acc),))
  # Reset training metrics at the end of each epoch
  train_acc_metric.reset_states()

  # Run a validation loop at the end of each epoch.
  for x_batch_val, y_batch_val in val_dataset:
    val_logits = model(x_batch_val)
    # Update val metrics
    val_acc_metric(y_batch_val, val_logits)
  val_acc = val_acc_metric.result()
  val_acc_metric.reset_states()
  print('Validation acc: %s' % (float(val_acc),))

Start of epoch 0
Training loss (for one batch) at step 0: 2.302354335784912
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.9827063083648682
Seen so far: 25728 samples
Training loss (for one batch) at step 400: 1.9456875324249268
Seen so far: 51328 samples
Training acc over epoch: 0.4683666527271271
Validation acc: 0.5569999814033508
Start of epoch 1
Training loss (for one batch) at step 0: 1.867891550064087
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.9012510776519775
Seen so far: 25728 samples
Training loss (for one batch) at step 400: 1.8570404052734375
Seen so far: 51328 samples
Training acc over epoch: 0.5922166705131531
Validation acc: 0.6176999807357788
Start of epoch 2
Training loss (for one batch) at step 0: 1.8053439855575562
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.8745534420013428
Seen so far: 25728 samples
Training loss (for one batch) at step 400: 1.8906357288360596
Seen so far: 51328 samples
Trai

## Task 3: CNN for CIFAR-10

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input,Conv2D,Dense,Flatten,Dropout
from tensorflow.keras.models import Model

In [None]:
# Load in the data

cifar10 = tf.keras.datasets.cifar10
(x_train,y_train),(x_test,y_test) = cifar10.load_data()
(x_train,x_test) = x_train/255.0,x_test/255.0
(y_train,y_test) = y_train.flatten(),y_test.flatten()

print("x_train.shape: ", x_train.shape)
print("y_train.shape: ", y_train.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
x_train.shape:  (50000, 32, 32, 3)
y_train.shape:  (50000,)


In [4]:
# Number of classes

K2 = len(set(y_train))
print("Number of classes: ", K2)

Number of classes:  10


In [None]:
# Build the model using functional API

i = Input(x_train[0].shape)
x = Conv2D(32, (3,3), activation='relu')(i)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Conv2D(64,(3,3), activation='relu')(x)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Conv2D(128,(3,3), activation='relu')(x)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Flatten()(x)                              # to convert the images into a feature vector
x = Dropout(0.5)(x)                           # to regularize
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x) 
x = Dense(K2,activation='softmax')(x)

model = Model(i,x)


In [None]:
# Prepare the training dataset.
batch_size = 512
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(128)


In [None]:
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()

In [None]:
epochs = 3
for epoch in range(epochs):
  print('Start of epoch %d' % (epoch,))

  # Iterate over the batches of the dataset.
  for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      logits = model(x_batch_train, training=True)
      loss_value = loss_fn(y_batch_train, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    # Update training metric.
    train_acc_metric(y_batch_train, logits)

    # Log every 200 batches.
    if step % 200 == 0:
        print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
        print('Seen so far: %s samples' % ((step + 1) * 128))

  # Display metrics at the end of each epoch.
  train_acc = train_acc_metric.result()
  print('Training acc over epoch: %s' % (float(train_acc),))
  # Reset training metrics at the end of each epoch
  train_acc_metric.reset_states()

  # Run a validation loop at the end of each epoch.
  for x_batch_val, y_batch_val in val_dataset:
    val_logits = model(x_batch_val)
    # Update val metrics
    val_acc_metric(y_batch_val, val_logits)
  val_acc = val_acc_metric.result()
  val_acc_metric.reset_states()
  print('Validation acc: %s' % (float(val_acc),))

Start of epoch 0
Training loss (for one batch) at step 0: 2.3025259971618652
Seen so far: 128 samples
Training acc over epoch: 0.7085666656494141
Validation acc: 0.7771000266075134
Start of epoch 1
Training loss (for one batch) at step 0: 1.6703609228134155
Seen so far: 128 samples
Training acc over epoch: 0.7972166538238525
Validation acc: 0.7989000082015991
Start of epoch 2
Training loss (for one batch) at step 0: 1.6636629104614258
Seen so far: 128 samples
Training acc over epoch: 0.8096166849136353
Validation acc: 0.8095999956130981


## Experiments


### (a) increasing your filter sizes up to the input image for fashion MNIST dataset

In [5]:
# Load in the data

fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train,y_train),(x_test,y_test) = fashion_mnist.load_data()

x_train,x_test = x_train/255.0, x_test/255.0        

print("x_train.shape: ", x_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
x_train.shape:  (60000, 28, 28)


In [6]:
# Number of classes

K1 = len(set(y_train))
print("Number of classes: ", K1)

Number of classes:  10


In [7]:
# data is only 2D
# CNN expects 3D input

x_train = np.expand_dims(x_train, axis = -1)
x_test = np.expand_dims(x_test, axis = -1)

print("x_train.shape: ", x_train.shape)


x_train.shape:  (60000, 28, 28, 1)


In [None]:
# Build the model using functional API

i = Input(x_train[0].shape)
x = Conv2D(32, (1,1), activation='relu')(i)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Conv2D(64,(1,1), activation='relu')(x)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Conv2D(128,(1,1), activation='relu')(x)

max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 
   strides=(1, 1), padding='valid') 
max_pool_2d(x) 

x = Flatten()(x)                              # to convert the images into a feature vector
x = Dropout(0.5)(x)                           # to regularize
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x) 
x = Dense(K1,activation='softmax')(x)

model = Model(i,x)


when I changed the filter size equal to the input size I got the error as below:

ValueError: Negative dimension size caused by subtracting 32 from 28 for '{{node conv2d/Conv2D}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](input_1, conv2d/Conv2D/ReadVariableOp)' with input shapes: [?,28,28,1], [32,32,1,32].

In [None]:
# Prepare the training dataset.
batch_size = 128
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(128)

In [None]:
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()


In [12]:
epochs = 3
for epoch in range(epochs):
  print('Start of epoch %d' % (epoch,))

  # Iterate over the batches of the dataset.
  for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      logits = model(x_batch_train, training=True)
      loss_value = loss_fn(y_batch_train, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    # Update training metric.
    train_acc_metric(y_batch_train, logits)

    # Log every 200 batches.
    if step % 200 == 0:
        print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
        print('Seen so far: %s samples' % ((step + 1) * 128))

  # Display metrics at the end of each epoch.
  train_acc = train_acc_metric.result()
  print('Training acc over epoch: %s' % (float(train_acc),))
  # Reset training metrics at the end of each epoch
  train_acc_metric.reset_states()

  # Run a validation loop at the end of each epoch.
  for x_batch_val, y_batch_val in val_dataset:
    val_logits = model(x_batch_val)
    # Update val metrics
    val_acc_metric(y_batch_val, val_logits)
  val_acc = val_acc_metric.result()
  val_acc_metric.reset_states()
  print('Validation acc: %s' % (float(val_acc),))

Start of epoch 0
Training loss (for one batch) at step 0: 2.3024518489837646
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.75399649143219
Seen so far: 25728 samples
Training loss (for one batch) at step 400: 1.7760828733444214
Seen so far: 51328 samples
Training acc over epoch: 0.6803500056266785
Validation acc: 0.7401999831199646
Start of epoch 1
Training loss (for one batch) at step 0: 1.6880104541778564
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.706146240234375
Seen so far: 25728 samples
Training loss (for one batch) at step 400: 1.7105101346969604
Seen so far: 51328 samples
Training acc over epoch: 0.7587166428565979
Validation acc: 0.8118000030517578
Start of epoch 2
Training loss (for one batch) at step 0: 1.6003992557525635
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.683305025100708
Seen so far: 25728 samples
Training loss (for one batch) at step 400: 1.6812744140625
Seen so far: 51328 samples
Training 

### (b) replace pooling with strided convolutions for CIFAR-10 dataset




In [13]:
# Load in the data

cifar10 = tf.keras.datasets.cifar10
(x_train,y_train),(x_test,y_test) = cifar10.load_data()

(x_train,x_test) = x_train/255.0, x_test/255.0
(y_train,y_test) = y_train.flatten(), y_test.flatten()
print("x_train.shape: ", x_train.shape)
print("y_train.shape: ", y_train.shape)

x_train.shape:  (50000, 32, 32, 3)
y_train.shape:  (50000,)


In [14]:
# Number of classes

K2 = len(set(y_train))
print("Number of classes:a ", K2)

Number of classes:a  10


In [None]:
# Build the model

i = Input(x_train[0].shape)
x = Conv2D(32, (3,3), strides = 2, activation='relu')(i)           #filter : (3,3)
x = Conv2D(64, (3,3), strides=2, activation='relu')(x)             # I have used a stride of 2 so that the image dimensions get reduced by half after each convolution
x = Conv2D(128, (3,3), strides=2, activation='relu')(x)
x = Flatten()(x)                                                   # Flatten layer to convert the image into a feature vector
x = Dropout(0.2)(x)                                                # for regularization
x = Dense(512, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(K2,activation='softmax')(x)

model = Model(i,x)

In [None]:
# Prepare the training dataset.
batch_size = 128
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(128)

In [None]:
# Prepare the training dataset.
batch_size = 128
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(128)

In [None]:
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()


In [19]:
epochs = 3
for epoch in range(epochs):
  print('Start of epoch %d' % (epoch,))

  # Iterate over the batches of the dataset.
  for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      logits = model(x_batch_train, training=True)
      loss_value = loss_fn(y_batch_train, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    # Update training metric.
    train_acc_metric(y_batch_train, logits)

    # Log every 200 batches.
    if step % 200 == 0:
        print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
        print('Seen so far: %s samples' % ((step + 1) * 128))

  # Display metrics at the end of each epoch.
  train_acc = train_acc_metric.result()
  print('Training acc over epoch: %s' % (float(train_acc),))
  # Reset training metrics at the end of each epoch
  train_acc_metric.reset_states()

  # Run a validation loop at the end of each epoch.
  for x_batch_val, y_batch_val in val_dataset:
    val_logits = model(x_batch_val)
    # Update val metrics
    val_acc_metric(y_batch_val, val_logits)
  val_acc = val_acc_metric.result()
  val_acc_metric.reset_states()
  print('Validation acc: %s' % (float(val_acc),))

Start of epoch 0
Training loss (for one batch) at step 0: 2.3028151988983154
Seen so far: 128 samples
Training loss (for one batch) at step 200: 2.0910136699676514
Seen so far: 25728 samples
Training acc over epoch: 0.3396399915218353
Validation acc: 0.4374000132083893
Start of epoch 1
Training loss (for one batch) at step 0: 2.0169994831085205
Seen so far: 128 samples
Training loss (for one batch) at step 200: 2.0345849990844727
Seen so far: 25728 samples
Training acc over epoch: 0.43320000171661377
Validation acc: 0.4636000096797943
Start of epoch 2
Training loss (for one batch) at step 0: 1.9440230131149292
Seen so far: 128 samples
Training loss (for one batch) at step 200: 1.9988384246826172
Seen so far: 25728 samples
Training acc over epoch: 0.47968000173568726
Validation acc: 0.4984000027179718


Using strides = 2 in our model has lowered the accuracy in this model. Since during the training some of the features got lost in the striding process, hence our model doesn't genralize well of the validation dataset. 