# Exercise 1: Linear Image Classifier

In this exercise you will implement a linear image classifier while getting familiar with `numpy` and the benefits of vectorized operations in Python.

## Environment

- Platform: Google Colab, CPU runtime (no GPU).
- Set a global random seed for reproducibility (e.g., np.random.seed(42)).

## Setting up the project folder

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys
import os

# TODO: Replace 'your_project_folder' with the actual path to your project folder in Google Drive
project_folder = '/content/drive/MyDrive/Deep-Learning-HW1/hw1'
sys.path.append(project_folder)
print(f"Added {project_folder} to system path.")

## Imports

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pickle
import urllib.request
import tarfile
import zipfile
from random import randrange
from functools import partial
import itertools
import time
import linear_models
import importlib
importlib.reload(linear_models)
from typing import Dict, Tuple, Iterable, Optional, Any

# Set random seed for reproducibility
np.random.seed(42)

# specify the way plots behave in jupyter notebook
%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 3.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

## Data Download and Processing

In [None]:
def download_and_extract(url, download_dir):
    filename = url.split('/')[-1]
    file_path = os.path.join(download_dir, filename)
    
    if not os.path.exists(file_path):
        if not os.path.exists(download_dir):
            os.makedirs(download_dir)
        print("Downloading, This might take several minutes.")
        file_path, _ = urllib.request.urlretrieve(url=url, filename=file_path)
        print("Download finished. Extracting files.")
        
        if file_path.endswith(".zip"):
            zipfile.ZipFile(file=file_path, mode="r").extractall(download_dir)
        elif file_path.endswith((".tar.gz", ".tgz")):
            tarfile.open(name=file_path, mode="r:gz").extractall(download_dir)
        print("Done. Dataset is ready!")
    else:
        print("Dataset already downloaded and unpacked.")

def load_CIFAR_batch(filename):
    with open(filename, 'rb') as f:
        datadict = pickle.load(f, encoding='latin1')
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float")
        Y = np.array(Y)
        return X, Y

def load(ROOT):
    xs = []
    ys = []
    for b in range(1, 6):
        f = os.path.join(ROOT, 'data_batch_%d' % (b,))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X, Y
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
    return Xtr, Ytr, Xte, Yte

In [None]:
URL = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
PATH = 'datasets/cifar10/'
download_and_extract(URL, PATH)

In [None]:
CIFAR10_PATH = os.path.join(PATH, 'cifar-10-batches-py')
X_train, y_train, X_test, y_test = load(CIFAR10_PATH)

### ❀ Q1: Exploratory Data Analysis (EDA) ❀

**(5 points)**

In [None]:
# (1) Print the shapes of the training and test sets
print("Training set shape:", X_train.shape)
print("Training labels shape:", y_train.shape)
print("Test set shape:", X_test.shape)
print("Test labels shape:", y_test.shape)
print()

# (2) Display the number of classes and their names
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
               'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(class_names)
print(f"Number of classes: {num_classes}")
print(f"Class names: {class_names}")
print()

# (3) Show the class distribution in the training set
unique, counts = np.unique(y_train, return_counts=True)
print("Class distribution in training set:")
for cls_idx, count in zip(unique, counts):
    print(f"  Class {cls_idx} ({class_names[cls_idx]}): {count} samples")
print(f"Total training samples: {len(y_train)}")

## Data Preprocessing Part 1

In [None]:
# Filter the dataset to include only the target classes
TARGET_CLASSES = [2, 3, 4]
classes = ['bird', 'cat', 'deer']

train_mask = np.isin(y_train, TARGET_CLASSES)
test_mask = np.isin(y_test, TARGET_CLASSES)

X_train = X_train[train_mask]
y_train = y_train[train_mask]
X_test = X_test[test_mask]
y_test = y_test[test_mask]

# Relabel to {0,1,2}
label_map = {orig: i for i, orig in enumerate(TARGET_CLASSES)}
y_train = np.vectorize(label_map.get)(y_train)
y_test = np.vectorize(label_map.get)(y_test)

# Define sizes
num_training = 10000
num_validation = 1000
num_testing = 1000

# Create subsets
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]

mask = range(num_validation)
X_val = X_test[mask]
y_val = y_test[mask]

mask = range(num_validation, num_validation + num_testing)
X_test = X_test[mask]
y_test = y_test[mask]

X_train = X_train.astype(np.float64)
X_val = X_val.astype(np.float64)
X_test = X_test.astype(np.float64)

print("Shapes ->",
      "X_train", X_train.shape, "y_train", y_train.shape,
      "X_val", X_val.shape, "y_val", y_val.shape,
      "X_test", X_test.shape, "y_test", y_test.shape)

In [None]:
def get_batch(X, y, n=1000):
    rand_items = np.random.randint(0, X.shape[0], size=n)
    images = X[rand_items]
    labels = y[rand_items]
    return images, labels

def make_random_grid(x, y, n=4, convert_to_image=True, random_flag=True):
    if random_flag:
        rand_items = np.random.randint(0, x.shape[0], size=n)
    else:
        rand_items = np.arange(0, x.shape[0])
    images = x[rand_items]
    labels = y[rand_items]
    if convert_to_image:
        grid = np.hstack(np.array([np.asarray((vec_2_img(i) + mean_image), dtype=np.int64) for i in images]))
    else:
        grid = np.hstack(np.array([np.asarray(i, dtype=np.int64) for i in images]))
    print('\t'.join('%9s' % classes[labels[j]] for j in range(n)))
    return grid

def vec_2_img(x):
    x = np.reshape(x[:-1], (32, 32, 3))
    return x

In [None]:
X_batch, y_batch = get_batch(X_test, y_test, 100)
plt.imshow(make_random_grid(X_batch, y_batch, n=4, convert_to_image=False))
plt.axis("off")
plt.show()

## Data Preprocessing Part 2

In [None]:
# 1) subtract the TRAIN mean image (feature-wise)
mean_image = np.mean(X_train, axis=0, keepdims=True)
X_train = X_train - mean_image
X_val = X_val - mean_image
X_test = X_test - mean_image

# 2) flatten HxWxC -> D
X_train = X_train.reshape(X_train.shape[0], -1)
X_val = X_val.reshape(X_val.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# 3) add a bias term (last feature = 1)
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])

print(f"Shape of Training Set: {X_train.shape}")
print(f"Shape of Validation Set: {X_val.shape}")
print(f"Shape of Test Set: {X_test.shape}")

num_classes = int(np.max(y_train)) + 1
assert set(np.unique(y_train)) <= set(range(num_classes)), "y must be in 0..C-1"

### ❀ Q2: Understanding the Image Preprocessing ❀

**(5 points)**

**1. Mean Subtraction**

We subtract the mean to center our data around zero. This helps with training because it makes the gradient descent work better - when you have data centered around zero, the updates are more balanced and convergence happens faster. It also removes things like average brightness differences between images so the model can focus on actual features instead of lighting conditions.

**2. Flattening**

Linear models need 1D vectors as input, not 2D/3D arrays. So we flatten each 32x32x3 image into a single vector of 3072 numbers. This way we can do the matrix multiplication $XW$ where each pixel becomes its own feature. Basically the model treats every pixel position as an independent input.

**3. Bias Trick**

Instead of keeping track of weights W and bias b separately, we just add a column of 1s to our data. This lets us combine them into one matrix multiplication - the last row of W becomes the bias. Makes the code simpler and cleaner since everything is just one big matrix operation.

## Linear Perceptron

In [None]:
classifier = linear_models.LinearPerceptron(X_train, y_train)
y_pred = classifier.predict(X_test)

In [None]:
num_show = 8
idxs = np.random.choice(len(X_test), size=num_show, replace=False)
fig, axes = plt.subplots(1, num_show, figsize=(16, 3))

mean_flat = mean_image.reshape(-1)
if mean_flat.size > 3072:
    mean_flat = mean_flat[:3072]
mean_img_3d = mean_flat.reshape(32, 32, 3)

for ax, i in zip(axes, idxs):
    vec = X_test[i].reshape(-1)
    if vec.size > 3072:
        vec = vec[:3072]
    img = vec[:3072].reshape(32, 32, 3) + mean_img_3d
    img = (img - img.min()) / (img.max() - img.min())
    
    true_lbl = classes[y_test[i]]
    pred_lbl = classes[y_pred[i]]
    ax.imshow(img)
    ax.axis("off")
    ax.set_title(f"Pred: {pred_lbl}\nTrue: {true_lbl}",
                 color="green" if y_pred[i] == y_test[i] else "red")

plt.tight_layout()
plt.show()

In [None]:
print("Model accuracy (before training):", classifier.calc_accuracy(X_train, y_train))

### ❀ Q3 ❀

**(5 points)**

The accuracy is low because we haven't trained the model yet - the weights are just random numbers right now. With 3 classes, random guessing would give about 33% accuracy, which is basically what we're seeing. The model has no idea what features to look for to distinguish birds from cats from deer. Once we train it with gradient descent, it'll learn which pixel patterns correspond to each class and the accuracy will go up.

## Perceptron Loss

In [None]:
W = np.random.randn(3073, 3) * 1e-4

In [None]:
%%time
loss_naive, grad_naive = linear_models.perceptron_loss_naive(W, X_val, y_val)
print("Loss:", loss_naive)

In [None]:
%%time
loss_vectorized, _ = linear_models.softmax_cross_entropy(W, X_val, y_val)
print('loss: %f' % (loss_vectorized))

In [None]:
%%time
perceptron = linear_models.LinearPerceptron(X_train, y_train)
loss_history = perceptron.train(X_train, y_train, learning_rate=1e-7,
                                num_iters=1500, verbose=True)

In [None]:
plt.plot(loss_history, color='c')
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

In [None]:
print("Training accuracy:", perceptron.calc_accuracy(X_train, y_train))
print("Testing accuracy:", perceptron.calc_accuracy(X_test, y_test))

## Hyperparameter Optimization for Perceptron

In [None]:
learning_rates = [1e-3, 1e-2, 1e-1]
batch_sizes = [32, 64, 128, 256]

results, best_perc, best_val = linear_models.tune_perceptron(
    linear_models.LinearPerceptron,
    X_train, y_train, X_val, y_val,
    learning_rates, batch_sizes,
    num_iters=500,
    model_kwargs=None,
    verbose=True,
)

for (lr, bs), (tr, va) in sorted(results.items()):
    print(f"lr={lr: .1e} bs={bs:4d}  train_acc={tr:.4f}  val_acc={va:.4f}")

print(f"best validation accuracy: {best_val:.4f}")
test_acc = best_perc.calc_accuracy(X_test, y_test)
print(f"final test accuracy: {test_acc:.4f}")

## Logistic Regression

In [None]:
logistic = linear_models.LogisticRegression(X_train, y_train)
y_pred = logistic.predict(X_test)

In [None]:
num_show = 8
idxs = np.random.choice(len(X_test), size=num_show, replace=False)
fig, axes = plt.subplots(1, num_show, figsize=(16, 3))

mean_flat = np.asarray(mean_image).reshape(-1)
mean_img_3d = mean_flat[:3072].reshape(32, 32, 3)

for ax, i in zip(axes, idxs):
    vec = np.asarray(X_test[i]).reshape(-1)[:3072]
    img = vec.reshape(32, 32, 3) + mean_img_3d
    vmin, vmax = img.min(), img.max()
    if vmax > vmin:
        img_disp = (img - vmin) / (vmax - vmin)
    else:
        img_disp = np.zeros_like(img)
    
    true_lbl = classes[y_test[i]]
    pred_lbl = classes[y_pred[i]]
    ax.imshow(img_disp)
    ax.axis("off")
    ax.set_title(
        f"Pred: {pred_lbl}\nTrue: {true_lbl}",
        color=("green" if y_pred[i] == y_test[i] else "red")
    )

plt.tight_layout()
plt.show()

In [None]:
print("Model accuracy (before training):", logistic.calc_accuracy(X_train, y_train))

## Softmax Cross-Entropy Loss

In [None]:
W = np.random.randn(3073, 3) * 1e-4

In [None]:
%%time
loss_val, grad_val = linear_models.softmax_cross_entropy(W, X_val, y_val)
print(f"loss: {loss_val:.6f}")
print("grad shape:", grad_val.shape)

In [None]:
def grad_check(f, x, analytic_grad, num_checks=10, h=1e-5):
    for i in range(num_checks):
        ix = tuple([randrange(m) for m in x.shape])
        oldval = x[ix]
        x[ix] = oldval + h
        fxph = f(x)
        x[ix] = oldval - h
        fxmh = f(x)
        x[ix] = oldval
        grad_numerical = (fxph - fxmh) / (2 * h)
        grad_analytic = analytic_grad[ix]
        rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic))
        print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error))

In [None]:
loss, grad = linear_models.softmax_cross_entropy(W, X_val, y_val)
f = lambda w: linear_models.softmax_cross_entropy(w, X_val, y_val)[0]
grad_numerical = grad_check(f, W, grad)

In [None]:
%%time
logistic = linear_models.LogisticRegression(X_train, y_train)
loss_history = logistic.train(X_train, y_train,
                         learning_rate=1e-7,
                         num_iters=1500,
                         verbose=True)

In [None]:
plt.plot(loss_history, color='pink')
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

In [None]:
print("Training accuracy:", logistic.calc_accuracy(X_train, y_train))
print("Testing accuracy:", logistic.calc_accuracy(X_test, y_test))

## Hyperparameter Optimization for Logistic Regression

In [None]:
learning_rates = [1e-3, 1e-2, 1e-1]
batch_sizes = [32, 64, 128, 256]

results, best_perc, best_val = linear_models.tune_perceptron(
    linear_models.LogisticRegression,
    X_train, y_train, X_val, y_val,
    learning_rates, batch_sizes,
    num_iters=500,
    model_kwargs=None,
    verbose=True,
)

for (lr, bs), (tr, va) in sorted(results.items()):
    print(f"lr={lr: .1e} bs={bs:4d}  train_acc={tr:.4f}  val_acc={va:.4f}")

print(f"best validation accuracy: {best_val:.4f}")
test_acc = best_perc.calc_accuracy(X_test, y_test)
print(f"final test accuracy: {test_acc:.4f}")

# The End!