In [1]:
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras import layers, callbacks, Sequential
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
from google.colab import drive
# Load train_images and train_labels using pickle
def load_data():

    drive.mount('/content/drive')
    with open('/content/drive/MyDrive/Colab Notebooks/DL/Project1/636_project1_train_images', 'rb') as f:
        images = pickle.load(f)
    with open('/content/drive/MyDrive/Colab Notebooks/DL/Project1/636_project1_train_labels', 'rb') as f:
        labels = pickle.load(f)
    return images, labels

# Preprocess the data
def preprocess_data(images, labels):
    images = images.numpy()
    labels = labels.numpy()
    images = images.astype('float32') / 255.0
    return images, labels

# Create the convolutional neural network model
def create_model():
    model = Sequential([
        # Convolutional layer with 32 filters of size (3, 3), ReLU activation
        Conv2D(32, (3, 3), activation="relu", input_shape=(28, 28, 1)),
        # Max pooling layer with pool size (2, 2)
        MaxPooling2D((2, 2)),
        # Convolutional layer with 64 filters of size (3, 3), ReLU activation
        Conv2D(64, (3, 3), activation="relu"),
        # Max pooling layer with pool size (2, 2)
        MaxPooling2D((2, 2)),
        # Dropout layer with a dropout rate of 0.4 to reduce overfitting
        Dropout(0.4),
        # Convolutional layer with 128 filters of size (3, 3), ReLU activation
        Conv2D(128, (3, 3), activation="relu"),
        # Flatten layer to convert 2D feature maps into a 1D vector
        Flatten(),
        # Fully connected layer with 128 neurons and ReLU activation
        Dense(128, activation='relu'),
        # Output layer with 10 neurons (for 10 classes) and softmax activation
        Dense(10, activation="softmax")
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# Train the model using k-fold cross-validation with 3 as value of k. in 3rd fold there is overfitting hence stopping there.
def train_model(images, labels, k=3, epochs=20, batch_size=128):
    kfold_cross = KFold(n_splits=k, shuffle=True)
    model = create_model()
    checkpoint_callback = callbacks.ModelCheckpoint(f"FinalModel.keras")
    fold_number = 1
    for train_idx, val_idx in kfold_cross.split(images, labels):
        print(f'Fold {fold_number}')
        train_images, train_labels = images[train_idx], labels[train_idx]
        val_images, val_labels = images[val_idx], labels[val_idx]
        history = model.fit(train_images, train_labels, callbacks=[checkpoint_callback], epochs=epochs,
                            validation_data=(val_images, val_labels), batch_size=batch_size)
        plot_history(history)
        fold_number += 1
    # val_loss,val_acc=model.evaluate(test_images,test_labels)


# Plot training and validation history to check for overfitting in different folds.
def plot_history(history):
    plt.plot(history.history["accuracy"], "bo", label="Training accuracy")
    plt.plot(history.history["val_accuracy"], "b", label="Validation accuracy")
    plt.title("Training and validation accuracy")
    plt.legend()
    plt.figure()
    plt.plot(history.history["loss"], "bo", label="Training loss")
    plt.plot(history.history["val_loss"], "b", label="Validation loss")
    plt.title("Training and validation loss")
    plt.legend()
    plt.show()

if __name__ == "__main__":
    train_images, train_labels = load_data()
    train_images, train_labels = preprocess_data(train_images, train_labels)
    train_model(train_images, train_labels)
    train_images, train_labels = preprocess_data(train_images, train_labels)
    train_images,test_images = train_images[:58000],train_images[58000:]
    train_labels,test_labels = train_labels[:58000],train_labels[58000:]
    train_model(train_images, train_labels)
    test_loss,test_acc=model.evaluate(test_images,test_labels)


KeyboardInterrupt: ignored