## Imports

In [None]:
from pandas import DataFrame
import matplotlib.pyplot as plt
import numpy as np
import random

## Utilities

In [None]:
from pickle import load

def unpickle(file):
    with open(file, 'rb') as fo:
        dict = load(fo, encoding='latin1')
    return dict

In [None]:
def read_data(filename):
    data = unpickle(f"data/{filename}")
    df = DataFrame(list(data.items()), columns=["Keys", "Values"])
    return data, df

## Data

### Reading Labels

In [None]:
# read labels
labels_data, labels_df = read_data("batches.meta")
labels_df

In [None]:
labels = labels_data["label_names"]
all_labels_df = DataFrame(labels, columns=["Labels"])
all_labels_df

### Reading Data

In [None]:
data1, df1 = read_data("data_batch_1")
data2, df2 = read_data("data_batch_2")
data3, df3 = read_data("data_batch_3")
data4, df4 = read_data("data_batch_4")
data5, df5 = read_data("data_batch_5")

test_data, test_df = read_data("test_batch")

# Combine data
dataset = np.concatenate([data1["data"], data2["data"], data3["data"], data4["data"], data5["data"]], axis=0)
dataset_labels = np.concatenate([data1["labels"], data2["labels"], data3["labels"], data4["labels"], data5["labels"]], axis=0)

print("Combined Data Shape:", dataset.shape)
print("Combined Labels Length:", len(dataset_labels))

test_dataset = test_data["data"]
test_dataset_labels = test_data["labels"]

print("Test Data Shape: ", test_dataset.shape)
print("Test Data Labels Length: ", len(test_dataset_labels))

In [None]:
# reshape images while taking channel first then rearrange to height, width, channel
dataset = dataset.reshape(len(dataset),3,32,32).transpose(0,2,3,1)    
print(dataset.shape)

test_dataset = test_dataset.reshape(len(test_dataset),3,32,32).transpose(0,2,3,1)
print(test_dataset.shape)

In [None]:
def display_dataset_images(temp_data, temp_labels,images_per_label=10 ):
    unique_image_labels = set(temp_labels)

    for current_label in unique_image_labels:
        # get indices of images with current label
        current_label_indices = [current_image_index for current_image_index, current_image_label in enumerate(temp_labels) if current_image_label == current_label]
        
        current_label_random_indices = random.sample(current_label_indices, images_per_label)
        
        fig, axes = plt.subplots(1, images_per_label, figsize=(12, 1.5))
        fig.suptitle(f"Label: {labels[current_label]}")
        
        for i, index in enumerate(current_label_random_indices):
            axes[i].imshow(temp_data[index])
            axes[i].axis('off')
        
        plt.show()

In [None]:
display_dataset_images(dataset, dataset_labels)

## Data Preprocessing

### Normalization

In [None]:
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

X_train_normalized = dataset / 255.0
X_test_normalized = test_dataset / 255.0

y_train = to_categorical(dataset_labels)
y_test = to_categorical(test_dataset_labels)

X_train_normalized = np.array(X_train_normalized)
X_test_normalized = np.array(X_test_normalized)

# create validation set
X_train_normalized, X_validation, y_train, y_validation = train_test_split(X_train_normalized, y_train, test_size=0.2, random_state=42)

# print shapes
print("X_train_normalized shape:", X_train_normalized.shape)
print("X_test_normalized shape:", X_test_normalized.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)
print("X_validation shape:", X_validation.shape)
print("y_validation shape:", y_validation.shape)


## Model Building

### Model Structure

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.regularizers import l2

# Build the model with additional regularization and optimization techniques
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))
model.add(BatchNormalization())

model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())

model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())
model.add(Dropout(0.2))

model.add(Dense(512, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.3))

model.add(Dense(10, activation='softmax', kernel_regularizer=l2(0.001)))

model.summary()

# Implement callbacks for early stopping and learning rate reduction
# early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)

### Model Training

In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

datagen.fit(X_train_normalized)

# Train the model
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train_normalized, y_train, validation_data=(X_validation, y_validation), epochs=30)

### Accuracy Plot

In [None]:

plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)  # Add grid lines
plt.show()

### Loss Plot

### Model Prediction & Testing

In [None]:
# Evaluate on the test data
test_loss, test_acc = model.evaluate(X_test_normalized, y_test)
print(f'Test accuracy: {test_acc * 100}%')