# Import

In [None]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import numpy as np
from torch import nn

# Hyperparameters

In [None]:
batch_size = 64

# Prepare dataset

In [None]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

# Network Utils

In [None]:
class Linear():
    def __init__(self,input_dim: int, output_dim: int, bias: np.ndarray = None):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weights = np.random.rand(self.output_dim, self.input_dim).astype(np.float32)
        self.weights.fill(0.25)
        self.bias = bias
    
    def __call__(self, x: np.ndarray) -> np.ndarray:
        y = x @ self.weights.T
        if self.bias is not None:
            y += self.bias
        return y
    
def ReLU(x: np.ndarray) -> np.ndarray:
    return np.clip(x, min=0)

def CrossEntropyLoss(x: np.ndarray, y: np.ndarray):
    c = x.max(axis=1).reshape(-1, 1)
    lse = c + np.log(np.exp(x - c).sum(axis=1)).reshape(-1, 1)
    log_softmax = x - lse
    loss = -log_softmax[np.arange(log_softmax.shape[0]), y]
    return loss.mean()

In [121]:
a = np.array([[1, 2, 3], [4, 5, 6]])
t = np.array([0, 2])
print(a, "\n")
print(t, "\n")
print(a[np.arange(a.shape[0]), t])

[[1 2 3]
 [4 5 6]] 

[0 2] 

[1 6]


# Model

In [123]:
class Network():
    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int, bias: np.ndarray = None):
        self.layer1 = Linear(input_dim, hidden_dim, bias)
        self.layer2 = Linear(hidden_dim, output_dim, bias)
    
    def __call__(self, input: np.ndarray) -> np.ndarray:
        x = input.reshape(input.shape[0], -1)
        x = ReLU(self.layer1(x))
        x = self.layer2(x)

        return x
    
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
        self.linear_relu_stack.apply(init_weights)

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

def init_weights(m):
    if isinstance(m, nn.Linear):
        m.weight.data.fill_(0.25)
        m.bias.data.fill_(0)


In [147]:
mynet = Network(28*28, 512, 10)
torchnet = NeuralNetwork()
loss_fn = nn.CrossEntropyLoss(reduction="none")
softmax = nn.Softmax()

train_features, train_labels = next(iter(train_dataloader))
print(loss_fn(torchnet(train_features), train_labels))
t = mynet(train_features.numpy())
print(CrossEntropyLoss(t, train_labels.numpy()))

tensor([2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026,
        2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026,
        2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026,
        2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026,
        2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026,
        2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026,
        2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026, 2.3026,
        2.3026], grad_fn=<NllLossBackward0>)
2.302784
