In [None]:
import torch
from torch import nn
from torchvision import datasets
from torchvision import transforms as Tf
from torch.utils.data import Dataset
from torch.utils.data import DataLoader


In [None]:
import sys
import os
import numpy as np

In [None]:
sys.path.append(os.path.abspath('../'))

In [None]:
from embedml.tensor import Tensor
from embedml.nn import Linear
from embedml.nn import Module
from embedml.nn import Softmax, LogSoftmax

In [None]:
ds = datasets.MNIST('data', download=True)

In [None]:
class dataset(Dataset):
    def __init__(self, data, targets):
        super().__init__()
        self.len = len(data)
        self.data = data.view((-1, 28 * 28)).float()
        self.data = (self.data - self.data.mean(axis=-1, keepdim=True)) / self.data.std(axis=-1, keepdim=True)
        self.targets = targets

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

    def __len__(self):
        return self.len

In [None]:
train_ds , test_ds = (ds.data[:50000], ds.targets[:50000]) , (ds.data[50000:], ds.targets[50000:])
train = dataset(*train_ds)
eval = dataset(*test_ds)

t_dl = DataLoader(train, batch_size=64, drop_last=True)
e_dl = DataLoader(eval, batch_size=16)


In [None]:
class simple(Module):
    def __init__(self):
        super().__init__()
        self.l1 = Linear(28*28, 32)
        self.l2 = Linear(32, 10)
        self.ac = LogSoftmax(dim=1)
    def forward(self, data):
        y0 = self.l1(data)
        y1 = self.ac(self.l2(y0))
        return y1

In [None]:
def one_hot(label, num_classes):
    shape = label.shape[0], num_classes
    y = np.zeros(shape)
    y_ptr = y.reshape((-1,))
    idx = label.flatten() + np.arange(0, (np.prod(shape)), shape[1])
    y_ptr[idx] = 1
    return y
    

In [None]:
class optim:
    def __init__(self, params, lr):
        self.params = params
        self.lr = Tensor(np.array(lr))
        
    def step(self):
        for param in self.params:
            param -= param.grad * self.lr
        
    def zero_grad(self):
        for param in self.params:
            param.grad = param.grad * 0

In [None]:
m = simple()

In [None]:
p = m.get_parameters()
opt = optim(p, 0.001)

In [None]:
for i in range(len(p)):
    p[i].data = p[i].data * 0.001


In [None]:
target = np.array(test_ds[1])
t = Tensor(test_ds[0].reshape((-1, 784)), requires_grad=False)

In [None]:
m(t).shape

In [None]:
loss_h = []

for x, l in t_dl:
    x = Tensor(x, requires_grad=False)
    y = m(x)
    T = Tensor(one_hot(l, 10), requires_grad=False)
    loss = (y * T).sum() * -1
    loss.backward()   
    opt.step()
    opt.zero_grad()
    
    label = m(t).data.argmax(axis=-1)
    acc = (label == target).sum() * 100 / target.shape
    loss_h.append((loss.data[0], acc))
print(f"{acc=}")

In [None]:
import matplotlib.pyplot as plt

In [None]:
p = np.array(loss_h)

In [None]:
plt.plot(p[:,:,0])