# Lab 3. Parametric approach
by Domrachev Ivan, B20-Ro-01

In [103]:
import numpy as np
from matplotlib import pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms

# Part 1. Loading the dataset

In [104]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True)

Files already downloaded and verified


Since the dataset is loaded from the torchflow, let's convert it to numpy array:

In [105]:
cifar10_data_np = trainset.data
n_samples, w, h, n_colors = cifar10_data_np.shape

cifar10_data_np = (
    cifar10_data_np.reshape(n_samples, -1) / 256
).astype(np.float64)

cifar10_labels_np = np.array(trainset.targets)
n_classes = len(trainset.classes)

cifar10_data_np.shape, cifar10_labels_np.shape

((50000, 3072), (50000,))

Now, let's split it onto train and validation datasets:

In [106]:
n_train = int(0.8 * n_samples)
n_val = n_samples - n_train

cifar10_train_data = cifar10_data_np[:n_train]
cifar10_val_data = cifar10_data_np[n_train:]

cifar10_train_labels = cifar10_labels_np[:n_train]
cifar10_val_labels = cifar10_labels_np[n_train:]

The next cell contains all the required functions. Some of them are modified to work not only elementwise, but also for the arrays of multiple samples.

In [None]:
def labels_encoding(labels: np.ndarray) -> np.ndarray:
    '''One Hot Encoding of the labels'''

    n_classes = len(np.unique(labels))
    encoded = np.zeros((labels.shape[0], n_classes))

    for i in range(labels.shape[0]):
        encoded[i, labels] = 1
    
    return encoded

def f (x: np.ndarray, W: np.ndarray) -> np.ndarray:
    '''Parametric predictor, y_hat = W @ x'''
    
    return (W @ x.T).T


def cross_entropy_loss(y_hat: np.ndarray, y: np.ndarray) -> np.ndarray:
    '''Cross Entropy loss function'''
    
    n = len(y)
    l = 0
    for i in range(n):
        l = (l - np.dot(y[i], np.log(y_hat[i]))) 
    return l / n


def softmax(y: np.ndarray) -> np.ndarray:
    '''Softmax function'''
    return (np.exp(y).T / np.exp(y).sum(axis=1)).T

The final part is to find the optimal matrix W, and validate its accuracy:

In [None]:
best_W, min_loss = None, float('inf')

In [112]:
cifar10_labels_encoded = labels_encoding(cifar10_train_labels)

n = 10**3
for _ in range(n):
    W = np.random.rand(n_classes, w*h*n_colors) / 100
    y_hat = softmax(f(cifar10_train_data, W))
    loss = cross_entropy_loss(y_hat, cifar10_labels_encoded)
    if loss < min_loss:
        min_loss = loss
        best_W = W

In [113]:
preds = softmax(f(cifar10_val_data, best_W))
correct_preds = (np.argmax(preds, axis=1) == cifar10_val_labels).sum()

print(f'Accuracy is {correct_preds}/{n_val}, {correct_preds/n_val}%')

Accuracy is 1108/10000, 0.1108%
