# FROM SCRATCH
_custom numpy autograd_

This notebook is intended for the submission to test the code.

The results were achieved running train_wandb.py and train_wandb_2nd_dataset.py those can however train for many hours

Dataset are reduced for faster run speed 
___

In [7]:
# download data
from importlib.resources import path
import numpy as np
import engine
import nn
import optim
import transforms


import torch # ONLY USED FOR DATA DOWNLOAD
from torchvision import datasets, transforms
import numpy as np
import os

def download_and_save_channel_last(save_path='fashion_mnist_channel_last.npz', data_dir='./data'):

    transform = transforms.ToTensor()

    train_set = datasets.FashionMNIST(root=data_dir, train=True, download=True, transform=transform)
    test_set = datasets.FashionMNIST(root=data_dir, train=False, download=True, transform=transform)


    x_train = train_set.data.numpy()
    y_train = train_set.targets.numpy()
    
    x_test = test_set.data.numpy()
    y_test = test_set.targets.numpy()

    x_train = np.expand_dims(x_train, axis=-1)
    x_test = np.expand_dims(x_test, axis=-1)

    print(f"Train images shape: {x_train.shape} (N, H, W, C)")
    print(f"Test images shape:  {x_test.shape} (N, H, W, C)")
    # save to csv
    save_path_train = "fashion-mnist_train.csv"
    save_path_test = "fashion-mnist_test.csv"
    np.savetxt(save_path_train, np.column_stack((y_train, x_train.reshape(x_train.shape[0], -1))), delimiter=",", header="label," + ",".join([f"pixel{i}" for i in range(x_train.shape[1]*x_train.shape[2]*x_train.shape[3])]), comments='')
    np.savetxt(save_path_test, np.column_stack((y_test, x_test.reshape(x_test.shape[0], -1))), delimiter=",", header="label," + ",".join([f"pixel{i}" for i in range(x_test.shape[1]*x_test.shape[2]*x_test.shape[3])]), comments='')
    
    
    
    print("Done!")
download_and_save_channel_last()

Train images shape: (60000, 28, 28, 1) (N, H, W, C)
Test images shape:  (10000, 28, 28, 1) (N, H, W, C)
Done!


In [None]:
class MockWandB:
    def __init__(self, config):
        self.config = type('Config', (), config)
    def log(self, data):
        pass
    def finish(self):
        pass


def load_data():
    try:
        train = np.loadtxt("fashion-mnist_train.csv", delimiter=",", skiprows=1)
        test = np.loadtxt("fashion-mnist_test.csv", delimiter=",", skiprows=1)
        
    except OSError:
        print("Warning: Absolute path failed. Trying local 'datasets' folder...")
        try:
            train = np.loadtxt("fashion-mnist_train.csv", delimiter=",", skiprows=1)
            test = np.loadtxt("fashion-mnist_test.csv", delimiter=",", skiprows=1)
        except OSError:
            print("Error: Dataset not found. download manually.")
            exit()

    # Split X and Y
    y_train_raw = train[:, 0].astype(int)
    x_train = train[:, 1:]
    
    y_test_raw = test[:, 0].astype(int)
    x_test = test[:, 1:]
    # Normalize X (0-255 -> 0-1)
    x_train /= 255.0
    x_test /= 255.0
    x_train = x_train.astype(np.float32)
    x_test = x_test.astype(np.float32)
    # z-score normalization
    mean = np.mean(x_train, axis=0, keepdims=True)
    std = np.std(x_train, axis=0, keepdims=True)
    x_train = (x_train - mean) / std
    x_test = (x_test - mean) / std
    # One-Hot Encode Y
    classes = 10
    y_train = np.eye(classes)[y_train_raw]
    y_test = np.eye(classes)[y_test_raw]

    return x_train, y_train, x_test, y_test

def evaluate(model, x_test, y_test):
    model.eval()
    with engine.no_grad():
        inputs = engine.Tensor(x_test)
        targets = engine.Tensor(y_test)
        preds = model(inputs)
        pred_labels = np.argmax(preds.data, axis=1)
        true_labels = np.argmax(targets.data, axis=1)
        acc = np.mean(pred_labels == true_labels)
    return acc


In [14]:
def main():
    config = {
        "learning_rate": 0.001,
        "epochs": 15,
        "batch_size": 16,
        "architecture": "MLP-Reduced",
        "dataset": "CIFAR10",
        "optimizer": "AdamW",          
        "momentum": 0.9,                
        "weight_decay": 1e-4,           
        "augmentation": "RandomRotation(+/- 15)"
    }
    
    wandb = MockWandB(config)
    
    lr = wandb.config.learning_rate
    batch_size = wandb.config.batch_size
    epochs = wandb.config.epochs
    weight_decay = wandb.config.weight_decay
    optim_name = wandb.config.optimizer
    momentum = wandb.config.momentum
    
    x_train, y_train, x_test, y_test = load_data()
    if x_train is None: return

    num_samples = x_train.shape[0]

    model = nn.Sequential(
        nn.Linear(784, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 10)
    )

    criterion = nn.CrossEntropyLoss()

    if optim_name == "AdamW":
        optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optim_name == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    


    for epoch in range(epochs):
        model.train() 
        epoch_loss = 0
        if epoch % 5 == 0:
            optimizer.lr -= 0.00005  
        
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        x_train = x_train[indices]
        y_train = y_train[indices]
        
        # reduce size for faster testing
        num_samples_red = num_samples // 30
        x_train_reduced = x_train[:num_samples_red]
        y_train_reduced = y_train[:num_samples_red]
        for start in range(0, num_samples_red, batch_size):
            end = start + batch_size
            x_batch_np = x_train_reduced[start:end]
            y_batch = engine.Tensor(y_train_reduced[start:end])
    
            x_batch = engine.Tensor(x_batch_np)

            preds = model(x_batch)
            loss = criterion(preds, y_batch)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += float(loss.data)

        avg_train_loss = epoch_loss / (num_samples // batch_size)
        test_acc = evaluate(model, x_test, y_test)
        
        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.4f} | Test Acc: {test_acc*100:.2f}%")
        
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": avg_train_loss,
            "test_accuracy": test_acc
        })

    wandb.finish()

main()


Epoch 1/15 | Train Loss: 0.0304 | Test Acc: 77.62%
Epoch 2/15 | Train Loss: 0.0226 | Test Acc: 78.39%
Epoch 3/15 | Train Loss: 0.0187 | Test Acc: 81.09%
Epoch 4/15 | Train Loss: 0.0180 | Test Acc: 81.74%
Epoch 5/15 | Train Loss: 0.0178 | Test Acc: 82.36%
Epoch 6/15 | Train Loss: 0.0175 | Test Acc: 82.77%
Epoch 7/15 | Train Loss: 0.0172 | Test Acc: 82.30%
Epoch 8/15 | Train Loss: 0.0161 | Test Acc: 83.32%
Epoch 9/15 | Train Loss: 0.0164 | Test Acc: 83.36%
Epoch 10/15 | Train Loss: 0.0172 | Test Acc: 84.43%
Epoch 11/15 | Train Loss: 0.0154 | Test Acc: 83.91%
Epoch 12/15 | Train Loss: 0.0163 | Test Acc: 83.96%
Epoch 13/15 | Train Loss: 0.0157 | Test Acc: 84.30%
Epoch 14/15 | Train Loss: 0.0145 | Test Acc: 83.80%


KeyboardInterrupt: 