# CNN Training and Inference Guide

**Check Your Dataset Format!!**

Before you begin, refer to the readme to ensure your dataset follows the required format.

## Step 0: Importing Libraries 

In [None]:
import warnings
import os
import json

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image
import cv2

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import tensorflow as tf
import keras

warnings.filterwarnings("ignore")

## Step 1: Dataset Configuration

In [None]:
# Set the path to your dataset
train_data_path = r"path_to_your_dataset/train_images"
train_labels_path = r"path_to_your_dataset/train_labels.csv"

test_data_path = r"path_to_your_dataset/test_images"
test_labels_path = r"path_to_your_dataset/test_labels.csv"

# Set to none of you have no validation data, otherwise set the path
val_data_path = None
val_labels_path = None

# Set size to your current image shape, or the shape you want your images resized at
img_rows, img_cols = 64, 64

# Select 1 for grayscale and 3 for RGB images
channels = 3

# Folder name where trained model and training results will be saved
run_results_folder = "cnn_run_results"

## Step 2: Data Processing and Loading Functions


In [None]:
# Load one single image(helper)
def load_one(path, data_path, size, channels):
    full_path = os.path.join(data_path, path)
    if channels == 1:
        img = cv2.imread(full_path, cv2.IMREAD_GRAYSCALE)
    else:
        img = cv2.imread(full_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    img = cv2.resize(img, (size[1], size[0]))
    return np.array(img, dtype="float32") / 255.0

# Load dataset(helper)
def process_dataset(data_path, labels_path=None, size=(28, 28), channels=1):
    
    # Load the labels into Data Frame
    df = pd.read_csv(labels_path)

    img_id_col = df.columns[0]
    files = df[img_id_col].astype(str).tolist()

    # Load each image in the dataset and put in one array
    images = np.array([load_one(f, data_path, size, channels) for f in files])

    # Return X(images) and Y(labels)
    if len(df.columns) == 1:                   
        return images, None
    else:
        return images, df[df.columns[1]].values  

print("Processing datasets...")

# Load datasets
X_train, Y_train = process_dataset(train_data_path, train_labels_path, (img_rows, img_cols), channels)
X_test, Y_test = process_dataset(test_data_path, test_labels_path, (img_rows, img_cols), channels)
if val_data_path:
    X_val, Y_val = process_dataset(val_data_path, val_labels_path, (img_rows, img_cols), channels)
else :
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.25, random_state=42)


# Get the class labels
unique_labels = np.unique(Y_train)
class_labels = [str(label) for label in sorted(unique_labels)]
print(f"Automatically detected {len(class_labels)} classes: {class_labels}")

# Convert labels to integer indices 
label_to_idx = {str(lbl): idx for idx, lbl in enumerate(class_labels)}
Y_train = np.array([label_to_idx[str(l)] for l in Y_train])
Y_val = np.array([label_to_idx[str(l)] for l in Y_val])
if Y_test is not None:
    Y_test = np.array([label_to_idx[str(l)] for l in Y_test])


# Print out the shapes
print(f"X_train shape: {X_train.shape}")
print(f"Y_train shape: {Y_train.shape}")
print(f"X_val shape:   {X_val.shape}")
print(f"Y_val shape:   {Y_val.shape}")
print(f"X_test shape:  {X_test.shape}")
print(f"Y_test shape:  {Y_test.shape}")

## Step 3: Visualizing Training Samples


In [None]:
# Plot 6 random samples from the training set
if X_train is not None and len(X_train) > 0:
    plt.figure(figsize=(12, 8))
    indices = np.random.choice(len(X_train), 6, replace=False)
    for i, idx in enumerate(indices):
        plt.subplot(2, 3, i + 1)
        img = X_train[idx]
        plt.imshow(img, cmap='gray' if channels == 1 else None)
        plt.title(f"Label: {class_labels[int(Y_train[idx])]}")
        plt.axis('off')
    plt.tight_layout()
    plt.show()

## Step 4: Label Encoding (One-Hot Encoding)


In [None]:
from tensorflow.keras.utils import to_categorical

Y_train = to_categorical(Y_train, num_classes=len(class_labels))
Y_val = to_categorical(Y_val, num_classes=len(class_labels))

## Step 5: Defining the CNN Architecture

**Customize Your CNN Architecture:**

You can modify the architecture by adding, removing, or changing layers:

1. **Convolutional Layers (`Conv2D`)**: 
   - Add more `Conv2D` layers to increase model depth
   - Adjust `filters` to control the number of feature maps
   - Change `kernel_size` to adjust the receptive field
   - Modify `padding` ('Same' or 'valid') to control output size
   - Change `activation` function (e.g., 'relu', 'tanh', 'sigmoid')

2. **Pooling Layers (`MaxPool2D` or `AvgPool2D`)**:
   - Add pooling layers after convolutional layers to reduce spatial dimensions
   - Adjust `pool_size` to control downsampling
   - Modify `strides` to control the step size

3. **Dropout Layers**:
   - Add `Dropout` layers to prevent overfitting
   - Adjust the dropout rate - higher values mean more regularization

4. **Dense (Fully Connected) Layers**:
   - Add more `Dense` layers before the final output layer
   - Adjust the number of units (e.g., 128, 256, 512) to control model capacity
   - The final `Dense` layer must have `len(class_labels)` units with `softmax` activation for classification



In [None]:
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dropout, Flatten, Dense

model = keras.Sequential()

# Conv Layer 1
model.add(Conv2D(filters = 8, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (img_rows, img_cols, channels)))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# Conv Layer 2
model.add(Conv2D(filters = 16, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

# Conv Layer 3
model.add(Conv2D(filters = 32, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))


# fully connected
model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(len(class_labels), activation = "softmax"))

## Step 6: Configuring the Optimizer

**Choose Your Optimizer and Learning Rate:**

You can select from different optimizer functions and adjust the learning rate:

1. **Adam Optimizer**:
   ```python
   optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
   ```
   - Good default choice, adapts learning rate per parameter

2. **RMSprop Optimizer**:
   ```python
   optimizer = RMSprop(learning_rate=0.001, rho=0.9)
   ```
   - Good for recurrent networks and non-stationary objectives

3. **SGD (Stochastic Gradient Descent)**:
   ```python
   optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
   ```
   - Classic optimizer, can work well with proper learning rate scheduling

**Note:**
- If training is unstable (loss explodes), reduce learning rate (e.g., 0.0001)
- If training is too slow, increase learning rate (e.g., 0.01)


In [None]:
from tensorflow.keras.optimizers import Adam

optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)

## Step 7: Compiling the Model

**Choose Your Loss Function:**

Select an appropriate loss function based on your problem type:

1. **`categorical_crossentropy`**:
   - Use when you have multiple classes and one-hot encoded labels

2. **`sparse_categorical_crossentropy`**:
   - Use when you have multiple classes but integer labels

3. **`binary_crossentropy`**:
   - Use for binary classification problems (2 classes)

**Note:** For this demo, since we're using one-hot encoded labels, `categorical_crossentropy` is the default choice. If you change your label encoding method, make sure to update the loss function accordingly.


In [None]:
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

## Step 8: Setting Training Hyperparameters

**Configure Training Parameters:**

1. **Epochs**: 
   - Number of complete passes through the training dataset
   - More epochs = longer training time but potentially better accuracy

2. **Batch Size**:
   - Number of samples processed before the model is updated
   - **Larger batch size**:
     - Faster training per epoch
     - More stable gradients
     - Requires more memory
   - **Smaller batch size**:
     - More frequent updates
     - Can help escape local minima
     - Less memory required

**Note:**
- Increasing number of epochs doesn't always equal better results, it 
might cause overfit 


In [None]:
epochs = 20
batch_size = 250

## Step 9: Training the Model


In [None]:
# Fit the model
history = model.fit(X_train, Y_train, 
                    batch_size=batch_size,
                    epochs=epochs, 
                    validation_data=(X_val, Y_val),
                    verbose=1)

# Save model and results to the folder 
os.makedirs(run_results_folder, exist_ok=True)
model.save(os.path.join(run_results_folder, "model.keras"))
with open(os.path.join(run_results_folder, "history.json"), "w") as f:
    json.dump(history.history, f, indent=2)
with open(os.path.join(run_results_folder, "class_labels.json"), "w") as f:
    json.dump(class_labels, f, indent=2)
    
print(f"Model and results saved to '{run_results_folder}' (model.keras, history.json, class_labels.json).")

## Step 10: Visualizing Training Loss and Accuracy


In [None]:
# Load model results and history
history_path = os.path.join(run_results_folder, "history.json")
with open(history_path) as f:
    hist_dict = json.load(f)
class _History:
    pass
history = _History()
history.history = hist_dict

# Plot the loss and accuracy curves for training and validation 
plt.plot(history.history['val_loss'], color='b', label="validation loss")
plt.title("Test Loss")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

## Step 11: Model Evaluation using Confusion Matrix


In [None]:
# Load model and labels
model_path = os.path.join(run_results_folder, "model.keras")
model = keras.models.load_model(model_path)
labels_path = os.path.join(run_results_folder, "class_labels.json")
with open(labels_path) as f:
    class_labels = json.load(f)

# Predict the values from the validation dataset
Y_pred = model.predict(X_val)
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
# Convert validation observations to one hot vectors
Y_true = np.argmax(Y_val,axis = 1) 
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
# plot the confusion matrix
f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="Greens",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

## Step 12: Final Predictions and Visualization on Test Set


In [None]:
# Predict and visualize 6 random samples from the test set
plt.figure(figsize=(12, 8))

# Select 6 random images from test data
indices = np.random.choice(len(X_test), 6, replace=False)
selected_test = X_test[indices]

# Make the model predit the images
predictions = model.predict(selected_test)
pred_classes = np.argmax(predictions, axis=1)

# Get true labels for selected indices if available
true_classes = Y_test[indices] if Y_test is not None else None

# Plot
for i, idx in enumerate(indices):
    plt.subplot(2, 3, i + 1)
    img = X_test[idx]
    plt.imshow(img, cmap='gray' if channels == 1 else None)
    
    # Show both predicted and true labels if available
    if true_classes is not None:
        true_label = class_labels[true_classes[i]]
        pred_label = class_labels[pred_classes[i]]
        color = 'green' if true_classes[i] == pred_classes[i] else 'red'
        plt.title(f"Predicted: {pred_label}\nTrue: {true_label}", color=color)
    else:
        plt.title(f"Predicted: {class_labels[pred_classes[i]]}")
    plt.axis('off')
plt.tight_layout()
plt.show()


# Citations

Based on "Convolutional Neural Network (CNN) Tutorial" by [kanncaa1](https://www.kaggle.com/kanncaa1), licensed under the Apache License 2.0. Modifications have been made.
