## CSC 578 HW\#7 Intel Image Classification Competition (Spring 2024)

### Name: Art Yalovenko

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout
import matplotlib.pyplot as plt
import csv
import pandas as pd

from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization

import os
import cv2

### Load the training data. Split into training 80% and validation 20%.

In [None]:
train_directory = '../input/csc-578-hw-7-spring-2024/train'
train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_directory,
    labels='inferred',        # use names of subdirectories as target labels
    label_mode='categorical', # convert target class (int) to one-hot-vector
    validation_split=0.2,
    subset="training",
    seed=123,                 # use same random seed with valid_set
    class_names=None,
    color_mode='rgb',
    batch_size=32,
    image_size=(150, 150),
)

valid_dataset = tf.keras.utils.image_dataset_from_directory(
    train_directory,
    labels='inferred',
    label_mode='categorical',
    validation_split=0.2,
    subset="validation",
    seed=123,
    class_names=None,
    color_mode='rgb',
    batch_size=32,
    image_size=(150, 150),
)

In [None]:
# Visualize some training examples.
plt.figure(figsize=(10, 12))
class_names = train_dataset.class_names
for images, labels in train_dataset.take(1):
    for i in range(30):
        ax = plt.subplot(6, 5, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[np.argmax(labels[i])])
        plt.axis("off")

## Previously Best model (model5)

## Previously we introduced Learning Scheduler and L2 regularization. Increasing regularization to 0.0025 (tried 0.005 already) and adding a dropout layer

In [None]:
# Configure the ReduceLROnPlateau

# monitor= val_loss is pretty self-explanatory, adjust based on plateu in validation loss
# factor=0.5 is equivalent to learning rate reduction by 1/2 when validation loss stops improving
# patience = 3 sets that LR will be reduced after 3 consecutive epochs of non-improvement
# min_lr= 0.00001 lowers eta we will go to is 0.00001
# verbose = 1 sets to print a message when LR is reduced.

lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=0.00001, verbose=1)

In [None]:
model5 = keras.Sequential()
model5.add(keras.layers.Rescaling(1./255, input_shape=(150, 150, 3)))
model5.add(keras.layers.Conv2D(64, (3,3), activation='relu', kernel_regularizer=l2(0.0025)))  # Increased L2 regularization
model5.add(keras.layers.MaxPooling2D(2,2))
model5.add(Dropout(0.25))  # Added Dropout 
model5.add(keras.layers.Flatten())
model5.add(keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.0025)))  # Increased L2 regularization
model5.add(Dropout(0.25))  # Existing Dropout layer
model5.add(keras.layers.Dense(6, activation='softmax', kernel_regularizer=l2(0.0025)))  # Increased L2 regularization

# Compile
opt = Adam(learning_rate=0.001)  # Using a standard learning rate
model5.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history5 = model5.fit(
    train_dataset, 
    epochs=15, 
    validation_data=valid_dataset, 
    callbacks=[lr_scheduler] 
)

In [None]:
acc = history5.history['accuracy']
val_acc = history5.history['val_accuracy']

loss = history5.history['loss']
val_loss = history5.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

## Experimenting with batch normalization

In [None]:
from tensorflow.keras.layers import BatchNormalization

In [None]:
model6 = keras.Sequential()
model6.add(keras.layers.Rescaling(1./255, input_shape=(150, 150, 3)))

# Convolutional Block with Batch Normalization
model6.add(keras.layers.Conv2D(64, (3,3), use_bias=False))  # Remove bias because BatchNormalization includes a bias component
model6.add(BatchNormalization())
model6.add(keras.layers.Activation('relu'))
model6.add(keras.layers.MaxPooling2D(2,2))
model6.add(Dropout(0.25))  

# Flattening the outputs from the convolutional block to feed into the dense layers
model6.add(keras.layers.Flatten())

# Dense Block with Batch Normalization
model6.add(keras.layers.Dense(128, use_bias=False, kernel_regularizer=l2(0.0025)))  # Remove bias for the same reason
model6.add(BatchNormalization())
model6.add(keras.layers.Activation('relu'))
model6.add(Dropout(0.25))  # Existing Dropout

# Output Layer with Batch Normalization
model6.add(keras.layers.Dense(6, use_bias=False, kernel_regularizer=l2(0.0025)))  # Remove bias
model6.add(BatchNormalization())
model6.add(keras.layers.Activation('softmax'))

# Compile the model 
opt = Adam(learning_rate=0.001)  
model6.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history6 = model6.fit(
    train_dataset, 
    epochs=15, 
    validation_data=valid_dataset, 
    callbacks=[lr_scheduler] 
)

In [None]:
acc = history6.history['accuracy']
val_acc = history6.history['val_accuracy']

loss = history6.history['loss']
val_loss = history6.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

**Extremely unstable and overfit** 

## adding dropout layers after each convolutional and dense layer to provide a more robust regularization effect

In [None]:
model7 = keras.Sequential()
model7.add(keras.layers.Rescaling(1./255, input_shape=(150, 150, 3)))

# Convolutional Block with Batch Normalization and increased Dropout
model7.add(keras.layers.Conv2D(64, (3,3), activation='relu', kernel_regularizer=l2(0.0025), use_bias=False))
model7.add(BatchNormalization())
model7.add(keras.layers.Activation('relu'))
model7.add(keras.layers.MaxPooling2D(2,2))
model7.add(Dropout(0.3))  # Increased dropout rate after MaxPooling

# Additional Convolutional Block for more complex pattern recognition
model7.add(keras.layers.Conv2D(128, (3,3), activation='relu', kernel_regularizer=l2(0.0025), use_bias=False))
model7.add(BatchNormalization())
model7.add(keras.layers.Activation('relu'))
model7.add(keras.layers.MaxPooling2D(2,2))
model7.add(Dropout(0.4))  # Further increased dropout rate after second Convolutional layer

# Flattening the outputs from the convolutional blocks to feed into the dense layers
model7.add(keras.layers.Flatten())

# Dense Block with Batch Normalization and Dropout
model7.add(keras.layers.Dense(128, kernel_regularizer=l2(0.0025), use_bias=False))
model7.add(BatchNormalization())
model7.add(keras.layers.Activation('relu'))
model7.add(Dropout(0.5))  # Increased dropout rate in the dense layer before the output layer

# Output Layer with Batch Normalization
model7.add(keras.layers.Dense(6, use_bias=False, kernel_regularizer=l2(0.0025)))
model7.add(BatchNormalization())
model7.add(keras.layers.Activation('softmax'))

# Compile the model with an optimizer, loss function, and metrics
opt = Adam(learning_rate=0.001)  # Using a standard learning rate
model7.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history7 = model7.fit(
    train_dataset, 
    epochs=15, 
    validation_data=valid_dataset, 
    callbacks=[lr_scheduler] 
)

**that was fascinating! performance was horrible until the learning rate dropped lower and then it started overfitting??**

In [None]:
acc = history7.history['accuracy']
val_acc = history7.history['val_accuracy']

loss = history7.history['loss']
val_loss = history7.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

**While the performance is better across training and validation set, there are signs of overfitting and clear model instability**

## We will dial it back to the better performing and simpler model5 and experiment with dropout layers

### First we increas Dropout after Convolutional Layer

In [None]:
model5_1 = keras.Sequential()
model5_1.add(keras.layers.Rescaling(1./255, input_shape=(150, 150, 3)))
model5_1.add(keras.layers.Conv2D(64, (3,3), activation='relu', kernel_regularizer=l2(0.0025)))  # Increased L2 regularization
model5_1.add(keras.layers.MaxPooling2D(2,2))
model5_1.add(Dropout(0.35))  # Added Dropout 
model5_1.add(keras.layers.Flatten())
model5_1.add(keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.0025)))  # Increased L2 regularization
model5_1.add(Dropout(0.25))  # Existing Dropout layer
model5_1.add(keras.layers.Dense(6, activation='softmax', kernel_regularizer=l2(0.0025)))  # Increased L2 regularization

# Compile
opt = Adam(learning_rate=0.001)  # Using a standard learning rate
model5_1.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history5_1 = model5_1.fit(
    train_dataset, 
    epochs=15, 
    validation_data=valid_dataset, 
    callbacks=[lr_scheduler] 
)

In [None]:
acc = history5_1.history['accuracy']
val_acc = history5_1.history['val_accuracy']

loss = history5_1.history['loss']
val_loss = history5_1.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

### Increase only the dropout before the output layer

In [None]:
model5_2 = keras.Sequential()
model5_2.add(keras.layers.Rescaling(1./255, input_shape=(150, 150, 3)))
model5_2.add(keras.layers.Conv2D(64, (3,3), activation='relu', kernel_regularizer=l2(0.0025)))  
model5_2.add(keras.layers.MaxPooling2D(2,2))
model5_2.add(Dropout(0.25))  
model5_2.add(keras.layers.Flatten())
model5_2.add(keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.0025)))  
model5_2.add(Dropout(0.35))  
model5_2.add(keras.layers.Dense(6, activation='softmax', kernel_regularizer=l2(0.0025)))  

# Compile
opt = Adam(learning_rate=0.001)  # Using a standard learning rate
model5_2.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history5_2 = model5_2.fit(
    train_dataset, 
    epochs=15, 
    validation_data=valid_dataset, 
    callbacks=[lr_scheduler] 
)

In [None]:
acc = history5_2.history['accuracy']
val_acc = history5_2.history['val_accuracy']

loss = history5_2.history['loss']
val_loss = history5_2.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

### increase both dropout rates

In [None]:
model5_3 = keras.Sequential()
model5_3.add(keras.layers.Rescaling(1./255, input_shape=(150, 150, 3)))
model5_3.add(keras.layers.Conv2D(64, (3,3), activation='relu', kernel_regularizer=l2(0.0025)))  
model5_3.add(keras.layers.MaxPooling2D(2,2))
model5_3.add(Dropout(0.35))  
model5_3.add(keras.layers.Flatten())
model5_3.add(keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.0025)))  
model5_3.add(Dropout(0.35))  
model5_3.add(keras.layers.Dense(6, activation='softmax', kernel_regularizer=l2(0.0025)))  

# Compile
opt = Adam(learning_rate=0.001)  # Using a standard learning rate
model5_3.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history5_3 = model5_3.fit(
    train_dataset, 
    epochs=15, 
    validation_data=valid_dataset, 
    callbacks=[lr_scheduler] 
)

In [None]:
acc = history5_3.history['accuracy']
val_acc = history5_3.history['val_accuracy']

loss = history5_3.history['loss']
val_loss = history5_3.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

#### **Model5_2 with increased dropout rate befroe the output layer seems to be the best**

## ** NEW IMPROVED MODEL | No sign of overfitting, stable, and 5% higher accuracy scores across both test and val sets. Much lower loss also | **

### Lets experiment with filter sizes and convolutional layers to try to capture a little bit more complexity since we seemingly have enough regularization

#### also decreased learning rate to 0.0001 and regularization from 0.0025 to 0.001

In [None]:
model5_x = keras.Sequential()
model5_x.add(keras.layers.Rescaling(1./255, input_shape=(150, 150, 3)))

# First convolutional layer with smaller filters to capture fine details
model5_x.add(keras.layers.Conv2D(32, (3,3), activation='relu', kernel_regularizer=l2(0.001)))
model5_x.add(keras.layers.MaxPooling2D(2,2))
model5_x.add(Dropout(0.25))

# Second convolutional layer with larger filters to capture broader features
model5_x.add(keras.layers.Conv2D(64, (5,5), activation='relu', kernel_regularizer=l2(0.001)))
model5_x.add(keras.layers.MaxPooling2D(2,2))
model5_x.add(Dropout(0.25))

# Added a third layer to enhance feature capture
model5_x.add(keras.layers.Conv2D(128, (3,3), activation='relu', kernel_regularizer=l2(0.001)))
model5_x.add(keras.layers.MaxPooling2D(2,2))
model5_x.add(Dropout(0.35))

model5_x.add(keras.layers.Flatten())
model5_x.add(keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
model5_x.add(Dropout(0.35))
model5_x.add(keras.layers.Dense(6, activation='softmax', kernel_regularizer=l2(0.001)))

# Compile the model
opt = Adam(learning_rate=0.0001)
model5_x.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history5_x = model5_x.fit(
    train_dataset, 
    epochs=15, 
    validation_data=valid_dataset, 
    callbacks=[lr_scheduler] 
)

In [None]:
acc = history5_x.history['accuracy']
val_acc = history5_x.history['val_accuracy']

loss = history5_x.history['loss']
val_loss = history5_x.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

### Run model on Test Set

In [None]:
# Load the test_pred data (which has no target labels)

pred_directory = '../input/csc-578-hw-7-spring-2024/test_pred'
result_dict = {} # dictionary to store predictions (keyed by file number)

# iterate over files in that directory
for filename in os.listdir(pred_directory):
    f = os.path.join(pred_directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        fnum = int(filename[:-4]) # filename e.g. '103.jpg" -> 103
        img = cv2.imread(f)
        #img = img/255.0
        img = img.reshape(-1,150,150,3)
        pred = model5_x.predict(img)
        result_dict[fnum] = pred[0]  # [0] because there is only one data
print (len(result_dict))

In [None]:
# Sort the results by file number
sorted_results = sorted(result_dict.items())

# start a new CSV file
with open('submission.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    # add the header
    writer.writerow(['fnum', 'buildings', 'forest', 'glacier', 'mountain', 'sea', 'street'])
    # Write in the predictions
    for fnum, probs in sorted_results:
        writer.writerow([fnum] + list(probs))

print("'submission.csv' created successfully!")