# Import

In [111]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import numpy as np
from torch import nn
import torch

# Hyperparameters

In [3]:
batch_size = 64

# Prepare dataset

In [4]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

# Network Utils

In [105]:
class Linear():
    def __init__(self,input_dim: int, output_dim: int):
        gen = np.random.default_rng(257)
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.k = np.sqrt(1.0 / input_dim)
        #self.weights = gen.uniform(-self.k, self.k, size=(output_dim, input_dim)).astype(np.float32)
        self.weights = gen.uniform(-1, 1, size=(output_dim, input_dim)).astype(np.float32)
        #self.bias = gen.uniform(-self.k, self.k, size=(output_dim)).astype(np.float32)
        self.bias = gen.uniform(-1, 1, size=(output_dim)).astype(np.float32)
    
    def __call__(self, x: np.ndarray) -> np.ndarray:
        y = x @ self.weights.T + self.bias
        return y
    
def ReLU(x: np.ndarray) -> np.ndarray:
    return np.clip(x, min=0)

def CrossEntropyLoss(x: np.ndarray, y: np.ndarray):
    c = x.max(axis=1).reshape(-1, 1)
    lse = c + np.log(np.exp(x - c).sum(axis=1)).reshape(-1, 1)
    log_softmax = x - lse
    loss = -log_softmax[np.arange(log_softmax.shape[0]), y]
    return loss.mean()

In [121]:
a = np.array([[1, 2, 3], [4, 5, 6]])
t = np.array([0, 2])
print(a, "\n")
print(t, "\n")
print(a[np.arange(a.shape[0]), t])

[[1 2 3]
 [4 5 6]] 

[0 2] 

[1 6]


# Model

In [None]:
class Network():
    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int):
        self.layer1 = Linear(input_dim, hidden_dim)
        self.layer2 = Linear(hidden_dim, hidden_dim)
        self.layer3 = Linear(hidden_dim, output_dim)
    
    def __call__(self, input: np.ndarray) -> np.ndarray:
        x = input.reshape(input.shape[0], -1)
        x = ReLU(self.layer1(x))
        x = ReLU(self.layer2(x))
        x = self.layer3(x)

        return x
    
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
        self.linear_relu_stack.apply(init_weights)

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

def init_weights(m):
    gen = np.random.default_rng(257)
    if isinstance(m, nn.Linear):
        m.weight.data = torch.tensor(gen.uniform(-1, 1, size=(m.out_features, m.in_features)), dtype=torch.float32)
        m.bias.data = torch.tensor(gen.uniform(-1, 1, size=(m.out_features)), dtype=torch.float32)


In [120]:
mynet = Network(28*28, 512, 10)
torchnet = NeuralNetwork()
loss_fn = nn.CrossEntropyLoss()
softmax = nn.Softmax()

train_features, train_labels = next(iter(train_dataloader))
print(loss_fn(torchnet(train_features), train_labels))
t = mynet(train_features.numpy())
print(CrossEntropyLoss(t, train_labels.numpy()))

tensor(121.8471, grad_fn=<NllLossBackward0>)
121.84706
