In [1]:
!pip install -q transformers datasets evaluate

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/542.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━[0m [32m307.2/542.0 kB[0m [31m9.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m401.2/401.2 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25h

Import Libraries and Load Data

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import accuracy_score
from keras.utils import to_categorical

# Load CIFAR-10 dataset
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


Data Preprocessing

In [3]:
# Normalize data
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Convert class vectors to binary class matrices
Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)

Define the Model Architecture

Configuration 1

In [4]:
model1 = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])


Configuration 2 - Change in Layer Depth and Filters

In [5]:
model2 = Sequential([
    Conv2D(48, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    Conv2D(48, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.2),

    Conv2D(96, (3, 3), activation='relu'),
    Conv2D(96, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Flatten(),
    Dense(1024, activation='relu'),
    Dropout(0.4),
    Dense(10, activation='softmax')
])


Configuration 3 - Introduction of Batch Normalization

In [6]:
from keras.layers import BatchNormalization

model3 = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    BatchNormalization(),
    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])



Configuration 4 - Different Activation Functions and Optimizer

In [7]:
model4 = Sequential([
    Conv2D(32, (3, 3), activation='elu', input_shape=(32, 32, 3)),
    Conv2D(32, (3, 3), activation='elu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='elu'),
    Conv2D(64, (3, 3), activation='elu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(512, activation='elu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])



Configuration 5 - Varying Dropout Rates

In [8]:
model5= Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.2),

    Conv2D(64, (3, 3), activation='relu'),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.35),

    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.45),
    Dense(10, activation='softmax')
])



Configuration 6 - Improving on Configuration 1 (Increased Learning Rate)


In [9]:
model6 = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

Configuration 7 - Improving on Configuration 6 (Increased Learning Rate, Data Augmentation)

In [10]:

model7 = Sequential([
    Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
    Conv2D(64, (3, 3), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Conv2D(128, (3, 3), padding='same', activation='relu'),
    Conv2D(128, (3, 3), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.4),

    Flatten(),
    Dense(1024, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=True,
    fill_mode='nearest',
    zoom_range=0.1
)
datagen.fit(X_train)

Configuration 9 - Improving on Configuration 7 (Batch Normalization, Learning Rate Scheduler)

In [11]:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.regularizers import l2

# Adjusting the model by adding Batch Normalization
model9 = Sequential([
    Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.001), input_shape=(32, 32, 3)),
    BatchNormalization(),
    Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.4),

    Flatten(),
    Dense(1024, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

# Implementing a Learning Rate Scheduler
from keras.callbacks import LearningRateScheduler
from keras.optimizers import Adam

def scheduler(epoch, lr):
    if epoch < 10:
        return float(lr)
    else:
        return float(lr * tf.math.exp(-0.1))

lr_schedule = LearningRateScheduler(scheduler)
optimizer = Adam(learning_rate=0.001)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=True,
    fill_mode='nearest',
    zoom_range=0.1
)
datagen.fit(X_train)

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

model8 = model7
models = [model1, model2, model3, model4, model5, model6, model7, model8, model9]

test_accuracies = {}

for model in models:
    model_num = models.index(model) + 1
    learning_rate = 0.001  # Default learning rate
    if 1 <= model_num <= 3 or model_num == 5:
        learning_rate = 0.0001
    elif model_num == 4 or model_num == 6 or model_num == 7:
        learning_rate = 0.001
    elif model_num == 8:
        learning_rate = 0.01

    # Reinitialize the optimizer with the specific learning rate for each model
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    checkpoint = ModelCheckpoint(f'model{model_num}.keras', save_best_only=True)
    early_stopping = EarlyStopping(monitor='val_loss', verbose=1, patience=10, restore_best_weights=True)
    epochs = 50 if model_num <= 6 else 100

    # Decide the fitting strategy based on model number
    if model_num == 9:
        history = model.fit(X_train, Y_train, batch_size=64, epochs=epochs, validation_data=(X_test, Y_test), callbacks=[checkpoint, early_stopping])
    else:
        history = model.fit(datagen.flow(X_train, Y_train, batch_size=64), epochs=epochs, validation_data=(X_test, Y_test), callbacks=[checkpoint, early_stopping])

    # Evaluate model on test data
    scores = model.evaluate(X_test, Y_test, verbose=1)
    print(f'Model {model_num} Test accuracy: {scores[1]*100}%')
    test_accuracies[model_num] = scores[1] * 100

    # Predict the outputs on the test set
    Y_pred = model.predict(X_test)
    Y_pred_classes = np.argmax(Y_pred, axis=1)
    Y_true = np.argmax(Y_test, axis=1)

    '''

    # Generate the confusion matrix
    cm = confusion_matrix(Y_true, Y_pred_classes)
    # Optionally visualize the confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.title(f'Confusion Matrix for Model {model_num}')
    plt.savefig(f'confusion_matrix_model{model_num}.png')
    plt.close()
    '''


pd.DataFrame(test_accuracies, index=['Test Accuracy']).to_csv('test_accuracies.csv')








Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50

In [None]:
model8 = model7
optimizer = Adam(learning_rate=0.01)
model8.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
checkpoint = ModelCheckpoint('model8.keras', save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', verbose=1, patience=10, restore_best_weights=True)
epochs = 50
history = model8.fit(datagen.flow(X_train, Y_train, batch_size=64), epochs=epochs, validation_data=(X_test, Y_test), callbacks=[checkpoint, early_stopping])
# Evaluate model on test data
scores = model8.evaluate(X_test, Y_test, verbose=1)
print(f'Model 8 Test accuracy: {scores[1]*100}%')
# Predict the outputs on the test set
Y_pred = model8.predict(X_test)
Y_pred_classes = np.argmax(Y_pred, axis=1)
Y_true = np.argmax(Y_test, axis=1)

# Generate the confusion matrix
cm = confusion_matrix(Y_true, Y_pred_classes)
# Optionally visualize the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.title(f'Confusion Matrix for Model 8')
plt.savefig(f'confusion_matrix_model8.png')
plt.close()

In [None]:
import os
from google.colab import files
filename = f'confusion_matrix_model{8}.png'
files.download(filename)

In [None]:
model1.summary()
model2.summary()
model3.summary()
model4.summary()
model5.summary()
model6.summary()
model7.summary()
model8.summary()
model9.summary()