CS512 - Computer Vision - Assignment 3 - S3

Submitted by-

Tushar Gwal A20449419

# Part 1: Dataset Loading through Pickle


In [None]:
import pickle
import numpy as np

# Function to unpickle the dataset
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')  #refeered from Cifar
    return dict

# Loading all CIFAR-10 batches
db_1 = unpickle(r"/content/data_batch_1")
db_2 = unpickle(r"/content/data_batch_2")
db_3 = unpickle(r"/content/data_batch_3")
db_4 = unpickle(r"/content/data_batch_4")
db_5 = unpickle(r"/content/data_batch_5")

# Extracting data and labels from batches
def load_batch(data_batch):
    x = data_batch[b'data']
    Y = np.array(data_batch[b'labels'])
    x = x.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)  # Reshape and transpose to (32, 32, 3)
    return x, Y

# Loading each batch
xt_1, Yt_1 = load_batch(db_1)
xt_2, Yt_2 = load_batch(db_2)
xt_3, Yt_3 = load_batch(db_3)
xt_4, Yt_4 = load_batch(db_4)
xt_5, Yt_5 = load_batch(db_5)

# Combining the data into a single training set
x_train = np.concatenate([xt_1, xt_2, xt_3, xt_4, xt_5], axis=0)
Y_train = np.concatenate([Yt_1, Yt_2, Yt_3, Yt_4, Yt_5], axis=0)

# Loading the test batch
tb = unpickle(r"/content/test_batch")
x_test, Y_test = load_batch(tb)

# Preprossing

In [None]:
# Normalize pixel values between 0 and 1
x_train = x_train/255
x_test = x_test/255

# Spliting training data into training and validation sets
from sklearn.model_selection import train_test_split
x_train, x_val, Y_train, Y_val = train_test_split(x_train, Y_train, test_size=0.2, random_state=42)

# Converting labels to categorical (one-hot encoding)
import tensorflow as tf
Y_train =tf.keras.utils.to_categorical(Y_train, 10)
Y_test = tf.keras.utils.to_categorical(Y_test, 10)
Y_val = tf.keras.utils.to_categorical(Y_val, 10)

#Part 2: Building the basic CNN model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, BatchNormalization # Import layers from tensorflow.keras.layers

In [None]:
model = Sequential()

## FIRST SET OF LAYERS

# CONVOLUTIONAL LAYER
model.add(Conv2D(filters=32, kernel_size=(4,4),input_shape=(32, 32, 3), activation='relu',))
# POOLING LAYER
model.add(MaxPool2D(pool_size=(2, 2)))
# Normailzation layer
BatchNormalization()

## SECOND SET OF LAYERS

# CONVOLUTIONAL LAYER
model.add(Conv2D(filters=64, kernel_size=(4,4),input_shape=(32, 32, 3), activation='relu',))
# POOLING LAYER
model.add(MaxPool2D(pool_size=(2, 2)))
# Normailzation layer
BatchNormalization()

## Third SET OF LAYERS

# CONVOLUTIONAL LAYER
model.add(Conv2D(filters=128, kernel_size=(4,4),input_shape=(32, 32, 3), activation='relu',))
# POOLING LAYER
model.add(MaxPool2D(pool_size=(2, 2)))
# Normailzation layer
BatchNormalization()

# FLATTEN IMAGES FROM 28 by 28 to 764 BEFORE FINAL LAYER
model.add(Flatten())

# 256 NEURONS IN DENSE HIDDEN LAYER
model.add(Dense(256, activation='relu'))

# LAST LAYER IS THE CLASSIFIER, THUS 10 POSSIBLE CLASSES
model.add(Dense(10, activation='softmax'))


model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.fit(x_train,Y_train,verbose=1,epochs=10,validation_data=(x_val,Y_val))

Epoch 1/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 59ms/step - accuracy: 0.3090 - loss: 1.8707 - val_accuracy: 0.5356 - val_loss: 1.3063
Epoch 2/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 58ms/step - accuracy: 0.5496 - loss: 1.2602 - val_accuracy: 0.6062 - val_loss: 1.1249
Epoch 3/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 59ms/step - accuracy: 0.6334 - loss: 1.0420 - val_accuracy: 0.6376 - val_loss: 1.0269
Epoch 4/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 59ms/step - accuracy: 0.6835 - loss: 0.9050 - val_accuracy: 0.6600 - val_loss: 0.9870
Epoch 5/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 60ms/step - accuracy: 0.7239 - loss: 0.7935 - val_accuracy: 0.6480 - val_loss: 1.0600
Epoch 6/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 60ms/step - accuracy: 0.7528 - loss: 0.7187 - val_accuracy: 0.6773 - val_loss: 0.9783
Epoc

<keras.src.callbacks.history.History at 0x7cd55e1785e0>

In [None]:
t_loss, t_acc = model.evaluate(x_test,Y_test)
print(f"Test Accuracy for basic CNN model: {t_acc * 100:.2f}%")
print(f"Test Loss for basic CNN model: {t_loss:.4f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.6776 - loss: 1.2430
Test Accuracy for basic CNN model: 67.30%
Test Loss for basic CNN model: 1.2543


# Part 3: Replacing the convolution block with inception block

In [11]:
from tensorflow.keras import layers, models

#Lets create an Inception block
in_layer = layers.Input(shape=(32, 32, 3))

# 1x1 Convolution
branch1x1 = layers.Conv2D(32, (1, 1), padding='same', activation='relu')(in_layer)

# 1x1 Convolution followed by 3x3 Convolution
branch3x3 = layers.Conv2D(32, (1, 1), padding='same', activation='relu')(in_layer)
branch3x3 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(branch3x3)

# 1x1 Convolution followed by 5x5 Convolution
branch5x5 = layers.Conv2D(16, (1, 1), padding='same', activation='relu')(in_layer)
branch5x5 = layers.Conv2D(32, (5, 5), padding='same', activation='relu')(branch5x5)

# 3x3 MaxPooling followed by 1x1 Convolution
branch_pool = layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')(in_layer)
branch_pool = layers.Conv2D(32, (1, 1), padding='same', activation='relu')(branch_pool)

# Concatenate all branches
x = layers.concatenate([branch1x1, branch3x3, branch5x5, branch_pool])
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Flatten()(x)
output_layer = layers.Dense(10, activation='softmax')(x)

# Creating the Inception model
m2 = models.Model(inputs=in_layer, outputs=output_layer)

# Compiling the model
m2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Training the model
m2.fit(x_train, Y_train, epochs=10, validation_data=(x_val, Y_val))

Epoch 1/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m415s[0m 327ms/step - accuracy: 0.5001 - loss: 1.4272 - val_accuracy: 0.6507 - val_loss: 1.0078
Epoch 2/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m442s[0m 326ms/step - accuracy: 0.6931 - loss: 0.8692 - val_accuracy: 0.6912 - val_loss: 0.8793
Epoch 3/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m422s[0m 310ms/step - accuracy: 0.7588 - loss: 0.7019 - val_accuracy: 0.7278 - val_loss: 0.7874
Epoch 4/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m445s[0m 312ms/step - accuracy: 0.7967 - loss: 0.5895 - val_accuracy: 0.7461 - val_loss: 0.7312
Epoch 5/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m465s[0m 331ms/step - accuracy: 0.8441 - loss: 0.4478 - val_accuracy: 0.7322 - val_loss: 0.8474
Epoch 6/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m443s[0m 332ms/step - accuracy: 0.8748 - loss: 0.3586 - val_accuracy: 0.7327 - val_loss:

<keras.src.callbacks.history.History at 0x7cd5542e7b20>

In [12]:
# Evaluating the Inception model
t_loss, t_acc = m2.evaluate(x_test, Y_test)
print(f"Test Accuracy for Inception Model : {t_acc * 100:.2f}%")
print(f"Test Loss for Inception Model: {t_loss:.4f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 85ms/step - accuracy: 0.7260 - loss: 1.1615
Test Accuracy for Inception Model : 72.36%
Test Loss for Inception Model: 1.1665


# Part 4: Building CNN model with Residual Block

In [None]:
# Create a Residual block
in_layer = layers.Input(shape=(32, 32, 3))
x = layers.Conv2D(32, (3, 3), padding='same', activation='relu')(in_layer)

# First set of layers
shortcut = x
x = layers.Conv2D(32, (3, 3), padding='same', activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(32, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.Add()([x, shortcut])
x = layers.Activation('relu')(x)
x = layers.MaxPooling2D((2, 2))(x)

# Second sets of layer
shortcut = x
# Applying 1x1 convolution to match the number of channels
shortcut = layers.Conv2D(64, (1, 1), padding='same')(shortcut) # Adjusting the shortcut to have 64 channels
x = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.Add()([x, shortcut])
x = layers.Activation('relu')(x)
x = layers.MaxPooling2D((2, 2))(x)

# Third sets of layer
shortcut = x
# Applying 1x1 convolution to match the number of channels
shortcut = layers.Conv2D(128, (1, 1), padding='same')(shortcut) # Adjusting the shortcut to have 128 channels
x = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(128, (3, 3), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.Add()([x, shortcut])
x = layers.Activation('relu')(x)
x = layers.Flatten()(x)
output_layer = layers.Dense(10, activation='softmax')(x)

# Creating the Residual model
m3 = models.Model(inputs=in_layer, outputs=output_layer)

# Compiling the model
m3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Training the model
m3.fit(x_train, Y_train, epochs=10, validation_data=(x_val, Y_val))


Epoch 1/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m487s[0m 386ms/step - accuracy: 0.4659 - loss: 1.6379 - val_accuracy: 0.6789 - val_loss: 0.9421
Epoch 2/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m501s[0m 385ms/step - accuracy: 0.7163 - loss: 0.8206 - val_accuracy: 0.6042 - val_loss: 1.2768
Epoch 3/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m453s[0m 363ms/step - accuracy: 0.7858 - loss: 0.6115 - val_accuracy: 0.7096 - val_loss: 0.8549
Epoch 4/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m536s[0m 390ms/step - accuracy: 0.8406 - loss: 0.4570 - val_accuracy: 0.7113 - val_loss: 0.8880
Epoch 5/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m488s[0m 390ms/step - accuracy: 0.8827 - loss: 0.3369 - val_accuracy: 0.7444 - val_loss: 0.7990
Epoch 6/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m502s[0m 391ms/step - accuracy: 0.9184 - loss: 0.2357 - val_accuracy: 0.7650 - val_loss:

<keras.src.callbacks.history.History at 0x7cd554a4bf10>

In [13]:
# Evaluating the Residual model
t_loss, t_acc = m3.evaluate(x_test, Y_test)
print(f"Test Accuracy for Residual Model : {t_acc * 100:.2f}%")
print(f"Test Loss for Residual Model : {t_loss:.4f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 94ms/step - accuracy: 0.7522 - loss: 1.0520
Test Accuracy for Residual Model : 75.20%
Test Loss for Residual Model : 1.0623


# Part 5: Evaluation and Report Result

1.    **Basic CNN Model**: The basic CNN model has three layers of convolution, max-pooling, and batch normalization, followed by dense layers for classification.

*Test Set Results*
* Test Accuracy: 67.30%
* Test Loss: 1.2543

*Analysis*

* The basic CNN model achieves a test accuracy of 67.30%, which is a decent starting point for the CIFAR-10 dataset, known for its difficulty. The test loss of 1.2543 is relatively high, indicating that the model can be improved. Its simple design may not effectively capture the complex features of the diverse CIFAR-10 images, but it serves as a good baseline for comparison with more advanced models.







2. **Inception Model**: This model uses an Inception block that combines multiple convolutional paths with different kernel sizes to capture features at various scales.

*Test Set Results:*

* Test Accuracy: 72.36%
* Test Loss: 1.1665

Analysis

The Inception model shows better performance than the basic CNN, with a test accuracy of 72.36% and a lower test loss of 1.1665. Its ability to capture features at multiple scales contributes to this improvement. However, since it only has one Inception block, its potential for further enhancement is limited. Adding more Inception blocks or increasing the model’s depth could improve performance even more.


3. **Residual Model**: This model uses residual blocks with skip connections, making it easier to train deeper networks.

*Test Set Results*

* Test Accuracy: 75.20%
* Test Loss: 1.0623

*Analysis*

* The Residual model performs the best of the three, achieving a test accuracy of 75.20% and the lowest test loss of 1.0623. The use of residual connections helps improve gradient flow in deeper networks, contributing to its strong performance. The model can learn more complex features thanks to its deeper structure. However, there’s still room for improvement, such as by adding more residual blocks or tweaking the design.

***Conclusion***

* Basic CNN Model: Serves as a solid starting point with reasonable performance, but its simple architecture limits its ability to capture complex features.
* Inception Model: Shows improved performance by effectively capturing features at multiple scales, though its potential could be further realized by adding more Inception blocks.
* Residual Model: Achieves the best results, leveraging residual connections to enable effective training of a deeper network, although there is still potential for further enhancement through architectural tweaks.