## Importing Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.utils import resample
import matplotlib.pyplot as plt

## Loading Data

In [None]:
df = pd.read_csv("cifar10_sampled.csv")
print(df.shape)
df.head()

(187, 3073)


Unnamed: 0,label,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,...,pixel_3062,pixel_3063,pixel_3064,pixel_3065,pixel_3066,pixel_3067,pixel_3068,pixel_3069,pixel_3070,pixel_3071
0,1,124,151,223,127,158,230,140,171,235,...,194.0,94.0,138.0,195.0,102.0,147.0,205.0,99.0,144.0,202.0
1,9,218,197,220,221,197,221,222,199,223,...,50.0,37.0,45.0,64.0,38.0,46.0,66.0,38.0,46.0,66.0
2,2,141,184,123,108,171,98,130,176,113,...,65.0,65.0,196.0,69.0,71.0,177.0,81.0,34.0,116.0,52.0
3,8,208,220,229,184,201,216,178,194,209,...,97.0,33.0,65.0,89.0,36.0,69.0,90.0,57.0,94.0,112.0
4,4,8,22,12,42,52,39,44,75,50,...,43.0,129.0,150.0,47.0,155.0,170.0,66.0,204.0,225.0,95.0


In [None]:
def load_data():
    num_classes = 10
    min_samples = min([len(df[df['label'] == label]) for label in range(num_classes)])

    # Safely divide available samples per class
    train_images_per_class = int(min_samples * 0.6)
    validation_images_per_class = int(min_samples * 0.25)
    test_images_per_class = min_samples - train_images_per_class - validation_images_per_class
    total_needed_per_class = train_images_per_class + validation_images_per_class + test_images_per_class

    print(f"Using {total_needed_per_class} samples per class (Train: {train_images_per_class}, Val: {validation_images_per_class}, Test: {test_images_per_class})")

    x_train, y_train, x_val, y_val, x_test, y_test = [], [], [], [], [], []

    for label in range(num_classes):
        class_data = df[df['label'] == label]

        # Resample only the minimum number of safe samples
        class_data = resample(class_data, replace=False, n_samples=total_needed_per_class, random_state=42)

        train = class_data[:train_images_per_class]
        val = class_data[train_images_per_class:train_images_per_class + validation_images_per_class]
        test = class_data[train_images_per_class + validation_images_per_class:]

        x_train.append(train.drop(columns='label').values)
        y_train.append(train['label'].values)

        x_val.append(val.drop(columns='label').values)
        y_val.append(val['label'].values)

        x_test.append(test.drop(columns='label').values)
        y_test.append(test['label'].values)

    # Stack and normalize
    x_train = np.vstack(x_train) / 255.0
    x_val = np.vstack(x_val) / 255.0
    x_test = np.vstack(x_test) / 255.0

    # Append 1 for bias term
    x_train = np.hstack([x_train, np.ones((x_train.shape[0], 1))])
    x_val = np.hstack([x_val, np.ones((x_val.shape[0], 1))])
    x_test = np.hstack([x_test, np.ones((x_test.shape[0], 1))])

    y_train = np.hstack(y_train)
    y_val = np.hstack(y_val)
    y_test = np.hstack(y_test)

    print("Training shape:", x_train.shape)
    print("Validation shape:", x_val.shape)
    print("Testing shape:", x_test.shape)

    return x_train, x_val, x_test, y_train, y_val, y_test


## Calculating Loss

In [None]:
# Loss for a single sample
def Loss_i(W, x_i, y_i, delta):
    score_i = W @ x_i
    correct_class_score = score_i[y_i]
    L_i = 0
    for j in range(W.shape[0]):
        if j == y_i:
            continue
        margin = score_i[j] - correct_class_score + delta
        if margin > 0:
            L_i += margin
    return L_i

In [None]:
# Gradient for one example
def compute_dW(score_i, x_i, y_i, delta):
    correct_class_score = score_i[y_i]
    dW = np.zeros((10, 3072 + 1))
    for j in range(10):
        if j == y_i:
            continue
        margin = score_i[j] - correct_class_score + delta
        if margin > 0:
            dW[j, :] += x_i
            dW[y_i, :] -= x_i
    return dW


In [None]:
# Full loss for a batch
def loss(W, X, y, delta=10.0):
    total_loss = 0
    for i in range(len(y)):
        total_loss += Loss_i(W, X[i], y[i], delta)
    return total_loss / len(y)


## Calculating Accuracy

In [None]:
def accuraccy(W, x_test, y_test):
    scores = W @ x_test.T
    predictions = np.argmax(scores, axis=0)
    acc = np.mean(predictions == y_test)
    return acc


## Training Model

In [None]:
def train(x_train, y_train, learning_rate, epochs):
    num_classes = 10
    num_features = 3072 + 1
    W = np.random.randn(num_classes, num_features) * 0.001
    delta = 10.0

    for epoch in range(epochs):
        dw = np.zeros_like(W)
        total_loss = 0

        for i in range(len(y_train)):
            x_image = x_train[i]
            y_label = y_train[i]
            scores = W @ x_image
            total_loss += Loss_i(W, x_image, y_label, delta)
            dW = compute_dW(scores, x_image, y_label, delta)
            dw += dW

        W -= learning_rate * dw / len(y_train)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(y_train)}")

    return W


## Evaluation of Model

In [None]:
x_train, x_val, x_test, y_train, y_val, y_test = load_data()

learning_rate = 0.025
epochs = 250
w = train(x_train, y_train, learning_rate, epochs)

# Compute losses
training_loss = loss(w, x_train, y_train)
validation_loss = loss(w, x_val, y_val)
testing_loss = loss(w, x_test, y_test)

# Compute accuracies
training_acc = accuraccy(w, x_train, y_train)
validation_acc = accuraccy(w, x_val, y_val)
testing_acc = accuraccy(w, x_test, y_test)

# Print results
print(f"\nTraining Loss: {training_loss:.4f}, Accuracy: {training_acc * 100:.2f}%")
print(f"Validation Loss: {validation_loss:.4f}, Accuracy: {validation_acc * 100:.2f}%")
print(f"Testing Loss: {testing_loss:.4f}, Accuracy: {testing_acc * 100:.2f}%")


Using 12 samples per class (Train: 7, Val: 3, Test: 2)
Training shape: (70, 3073)
Validation shape: (30, 3073)
Testing shape: (20, 3073)
Epoch 1/250, Loss: 89.96290005077684
Epoch 2/250, Loss: 80.96878004990624
Epoch 3/250, Loss: 76.12064591774445
Epoch 4/250, Loss: 69.45059711684236
Epoch 5/250, Loss: 64.27718533431805
Epoch 6/250, Loss: 59.572685159467675
Epoch 7/250, Loss: 55.06498596291524
Epoch 8/250, Loss: 51.571734970445036
Epoch 9/250, Loss: 48.98344437802798
Epoch 10/250, Loss: 46.731161359236346
Epoch 11/250, Loss: 46.12111029776543
Epoch 12/250, Loss: 46.91379793007343
Epoch 13/250, Loss: 44.33982299262832
Epoch 14/250, Loss: 45.96769328458648
Epoch 15/250, Loss: 42.11352658140855
Epoch 16/250, Loss: 44.212258924374254
Epoch 17/250, Loss: 40.15333979320379
Epoch 18/250, Loss: 42.32361352474082
Epoch 19/250, Loss: 37.857060954672605
Epoch 20/250, Loss: 38.325538805612375
Epoch 21/250, Loss: 34.982926536020784
Epoch 22/250, Loss: 36.13757324354823
Epoch 23/250, Loss: 32.057522