# Lab5 Deep Learning
In this lab there are three parts including a complete implementation pipeline for using deep neural network on multi-class classification task.

For each part, you should read through it and include it to your report.

In [1]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

In [2]:
## download dataset
if not os.path.exists('Celebrity_Faces_Dataset'):
    remote_path = 'https://github.com/Michigan-State-University-CSE-440/logistic-regression/releases/download/v1.0/Celebrity_Faces_Dataset.zip'
    local_path = 'Celebrity_Faces_Dataset.zip'
    if not os.path.exists(local_path):
        os.system(f'wget {remote_path}')
    os.system(f'unzip {local_path}')

## Part 1:  Define Framework

In [3]:
class SimpleCNN(nn.Module):
    def __init__(self, input_channels=1, num_classes=10, image_size=(28, 28)):
        """
        A polished two-layer convolutional neural network with normalization.

        :param input_channels: Number of channels in the input images.
        :param num_classes: Number of output classes.
        :param image_size: Tuple (height, width) of the input images.
        """
        super(SimpleCNN, self).__init__()

        self.layers = nn.Sequential(
            # First convolutional block
            nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),  # Normalization layer
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Second convolutional block
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),  # Normalization layer
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # After two pooling operations, the height and width are reduced by a factor of 4.
        conv_output_height = image_size[0] // 4
        conv_output_width = image_size[1] // 4

        self.classifier = nn.Linear(64 * conv_output_height * conv_output_width, num_classes)

    def forward(self, x):
        x = self.layers(x)
        # Flatten the tensor for the fully connected layer.
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [4]:
# CNN classifier that mimics the API of the from-scratch logistic regression class
class CNNClassifier:
    def __init__(self, learning_rate=0.001, num_epochs=10, batch_size=64, seed=42,
                 input_channels=1, image_size=(28, 28), num_classes=10):
        """
        :param learning_rate: Learning rate for the optimizer.
        :param num_epochs: Number of epochs to train.
        :param batch_size: Batch size for mini-batch gradient descent.
        :param seed: Random seed for reproducibility.
        :param input_channels: Number of channels in the input images.
        :param image_size: Tuple of (height, width) of the input images.
        :param num_classes: Number of output classes.
        """
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.seed = seed
        self.input_channels = input_channels
        self.image_size = image_size
        self.num_classes = num_classes

        torch.manual_seed(seed)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f'Using device: {self.device}')
        self.model = SimpleCNN(input_channels, num_classes, image_size).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.criterion = nn.CrossEntropyLoss()

    def train_epoch(self, X, y):
        """
        Train the CNN classifier.
        :param X: Input images as a NumPy array of shape (N, C, H, W).
        :param y: Labels as a NumPy array of shape (N,).
        """
        # Convert the NumPy arrays to PyTorch tensors.
        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        y_tensor = torch.tensor(y, dtype=torch.long).to(self.device)

        # Create a dataset and dataloader for mini-batch training.
        dataset = TensorDataset(X_tensor, y_tensor)
        dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
        epoch_loss = 0.0

        self.model.train()
        for i, (batch_X, batch_y) in enumerate(dataloader):
            batch_X = batch_X.transpose(3, 1)
            self.optimizer.zero_grad()
            outputs = self.model(batch_X)
            loss = self.criterion(outputs, batch_y)
            loss.backward()
            self.optimizer.step()
            epoch_loss += loss.item()
        avg_loss = epoch_loss / len(dataloader)
        return avg_loss

    def validate(self, X, y):
        """
        Predict class labels for the given input images.
        :param X: Input images as a NumPy array of shape (N, C, H, W).
        :return: NumPy array of predicted labels.
        """

        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        y_tensor = torch.tensor(y, dtype=torch.long).to(self.device)

        dataset = TensorDataset(X_tensor, y_tensor)
        dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=False)

        batch_preds = []
        batch_gt = []
        loss_tot = 0.0

        self.model.eval()
        with torch.no_grad():
            for i, (batch_X, batch_y) in enumerate(dataloader):
                batch_X = batch_X.transpose(3, 1)
                outputs = self.model(batch_X)
                loss = self.criterion(outputs, batch_y)
                pred = torch.argmax(outputs, dim=1)
                batch_preds.append(pred)
                batch_gt.append(batch_y)
                loss_tot += loss.item()
        avg_loss = loss_tot / len(dataloader)
        y_pred = torch.cat(batch_preds, dim=0).cpu().numpy()
        y_gt = torch.cat(batch_gt, dim=0).cpu().numpy()
        accuracy = np.mean(y_pred == y_gt)
        return accuracy, avg_loss



## Part 2: Data Loading

In [5]:
# ---------------------------------------------
# 2) Data Loading for the Celebrity Faces
# ---------------------------------------------
def load_celebrity_faces(root_dir, image_size=(64, 64)):
    """
    Loads images from subfolders of `root_dir` (one folder per celebrity).
    Returns:
        X_gray: Flattened grayscale images for training, shape (num_samples, H*W).
        y:      Label array, shape (num_samples,).
        label_map: {label_idx: 'celebrity_name'}.
        X_color:  Color images (resized but not converted to grayscale),
                  shape (num_samples, H, W, 3) in RGB order for display.
    """
    X_input = []
    X_color = []
    y = []

    # subfolders = celebrity names
    classes = sorted([
        d for d in os.listdir(root_dir)
        if os.path.isdir(os.path.join(root_dir, d))
    ])

    label_map = {idx: celeb for idx, celeb in enumerate(classes)}
    name_to_label = {celeb: idx for idx, celeb in enumerate(classes)}

    for celeb_name in classes:
        celeb_label = name_to_label[celeb_name]
        celeb_folder = os.path.join(root_dir, celeb_name)

        for i, filename in enumerate(os.listdir(celeb_folder)):
            if i >= 100:  # limit to 100 images per class
                break
            filepath = os.path.join(celeb_folder, filename)
            img_bgr = cv2.imread(filepath)
            if img_bgr is None:
                continue

            # Resize color image for display
            img_bgr_vis = cv2.resize(img_bgr, (256,256))
            # Convert BGR (OpenCV) to RGB for matplotlib
            img_rgb_vis = cv2.cvtColor(img_bgr_vis, cv2.COLOR_BGR2RGB)
            X_color.append(img_rgb_vis)

            # Also create a grayscale copy for training
            img_bgr = cv2.resize(img_bgr, image_size)
            X_input.append(img_bgr)

            y.append(celeb_label)

    X_color = np.array(X_color, dtype=np.uint8)  # (num_samples, H, W, 3)
    X_input = np.array(X_input, dtype=np.float32)  # (num_samples, H', W', 3)
    y = np.array(y, dtype=np.int32)

    return X_input, y, label_map, X_color


In [6]:
# ---------------------------------------------------
# 3) Label-Wise k-Fold Splitting (Manual Stratification)
# ---------------------------------------------------
def labelwise_kfold_split(X, y, k=5, shuffle=True, random_state=None):
    """
    Manually perform a label-wise k-fold split.
    Returns list of (train_indices, test_indices) for each fold.
    Ensures each label's samples appear in all folds.
    """
    assert len(X) == len(y), "X and y must have same length."

    label_indices_map = {}
    unique_labels = np.unique(y)

    # gather indices by label
    for label in unique_labels:
        label_indices = np.where(y == label)[0]
        label_indices_map[label] = label_indices

    # shuffle if needed
    rng = np.random.default_rng(random_state)
    if shuffle:
        for label in unique_labels:
            rng.shuffle(label_indices_map[label])

    # partition each label's indices into k folds
    label_folds = {}
    for label in unique_labels:
        indices = label_indices_map[label]
        num_samples_label = len(indices)

        fold_sizes = [num_samples_label // k] * k
        for i in range(num_samples_label % k):
            fold_sizes[i] += 1

        start = 0
        label_folds[label] = []
        for fold_size in fold_sizes:
            end = start + fold_size
            fold_subset = indices[start:end]
            label_folds[label].append(fold_subset)
            start = end

    # combine folds across labels
    folds = []
    for i in range(k):
        test_indices_list = []
        train_indices_list = []
        for label in unique_labels:
            # i-th subset for label -> test
            test_indices_list.append(label_folds[label][i])
            # other subsets -> train
            for j in range(k):
                if j != i:
                    train_indices_list.append(label_folds[label][j])

        test_indices = np.concatenate(test_indices_list)
        train_indices = np.concatenate(train_indices_list)
        folds.append((train_indices, test_indices))

    return folds

In [7]:
root_dir = "Celebrity_Faces_Dataset"
pca_components=200
learning_rate=0.0001
k=5
num_epochs=10
batch_size=32
seed=440

## Part 3: Training & Evaluating Network

In [8]:

# ----------------------------------------
# 4) Main: k-Fold + (Optional) PCA + Viz
# ----------------------------------------

# 1. Load data
X_input, y, label_map, X_color = load_celebrity_faces(root_dir, image_size=(64, 64))
print(f"Loaded {len(X_input)} images for {len(label_map)} celebrities.\n")

# 2. Normalize grayscale features to [0..1] for training
X_input = X_input / 255.0

# 3. Perform manual label-wise k-fold splitting
folds = labelwise_kfold_split(X_input, y, k=k)

accuracies = []

fold_num = 1
accuracy_across_folds = []
for train_indices, test_indices in folds:
    print(f"=== Fold {fold_num}/{k} ===")
    fold_num += 1

    X_train = X_input[train_indices]
    y_train = y[train_indices]
    X_test = X_input[test_indices]
    y_test = y[test_indices]

    print(f"Train: {len(X_train)}, Test: {len(X_test)}")

    # 4. Train from-scratch logistic regression
    model = CNNClassifier(
        learning_rate,
        num_epochs,
        batch_size,
        seed,
        input_channels=3,
        image_size=(64, 64),
        num_classes=len(label_map)
    )

    for epoch in range(num_epochs):
        ave_loss = model.train_epoch(X_train, y_train)
        accuracy, ave_loss = model.validate(X_test, y_test)
        if (epoch+1) % 2 == 0:
            print(f"Epoch {epoch+1:03d}: Train Loss = {ave_loss:.6f}, Test Loss = {ave_loss:.6f}, Test Accuracy = {accuracy*100:.2f}%")

    accuracy_across_folds.append(accuracy)

ave_accuracy = sum(accuracy_across_folds) / len(accuracy_across_folds)
print(f"Average accuracy across {k}-folds: {ave_accuracy*100:.2f}%")

Loaded 1700 images for 17 celebrities.

=== Fold 1/5 ===
Train: 1360, Test: 340
Using device: cpu
Epoch 002: Train Loss = 2.427085, Test Loss = 2.427085, Test Accuracy = 21.47%
Epoch 004: Train Loss = 2.376633, Test Loss = 2.376633, Test Accuracy = 26.47%
Epoch 006: Train Loss = 2.386444, Test Loss = 2.386444, Test Accuracy = 27.94%
Epoch 008: Train Loss = 2.353116, Test Loss = 2.353116, Test Accuracy = 26.76%
Epoch 010: Train Loss = 2.332457, Test Loss = 2.332457, Test Accuracy = 33.24%
=== Fold 2/5 ===
Train: 1360, Test: 340
Using device: cpu
Epoch 002: Train Loss = 2.389485, Test Loss = 2.389485, Test Accuracy = 24.41%
Epoch 004: Train Loss = 2.387722, Test Loss = 2.387722, Test Accuracy = 23.53%
Epoch 006: Train Loss = 2.421104, Test Loss = 2.421104, Test Accuracy = 28.24%
Epoch 008: Train Loss = 2.405987, Test Loss = 2.405987, Test Accuracy = 27.94%
Epoch 010: Train Loss = 2.448110, Test Loss = 2.448110, Test Accuracy = 29.41%
=== Fold 3/5 ===
Train: 1360, Test: 340
Using device: 