In [1]:
from tinygrad.densetensor import DenseTensor
from tinygrad.sparsetensor import SparseTensor
import numpy as np
import tinygrad.optim as optim
from extra.utils import get_parameters
from test.test_mnist import fetch_mnist
from tqdm import trange

%load_ext autoreload
%autoreload 2

DEVICE:GPU


In [14]:
dim1 = 32
dim2 = 784
dim3 = 10

np.random.seed(9)

seedmat = np.random.randn(dim2,dim3).astype(np.float32) / 1000

DEVICE:GPU


In [15]:
X_train, Y_train, X_test, Y_test = fetch_mnist()
X_train.shape

(60000, 784)

In [16]:
x_init = np.random.randn(dim1,dim2).astype(np.float32)
x = DenseTensor(x_init)
y = DenseTensor(np.random.randn(dim1,dim3))



In [17]:
class MLP:
  def __init__(self):
    self.W = DenseTensor(seedmat)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.logsoftmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [18]:
class MLP2:
  def __init__(self):
    self.W = SparseTensor(seedmat)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.logsoftmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [19]:
def loss_fn(y, y_pred):
    return ((y-y_pred)**2)**.5

In [20]:
def sparse_categorical_crossentropy(out, Y):
  num_classes = out.shape[-1]
  YY = Y.flatten()
  y = np.zeros((YY.shape[0], num_classes), np.float32)
  # correct loss for NLL, torch NLL loss returns one per row
  y[range(y.shape[0]),YY] = -1.0*num_classes
  y = y.reshape(list(Y.shape)+[num_classes])
  y = DenseTensor(y)
  return out.mul(y).mean()

### Dense

In [21]:
model = MLP()

In [22]:
optimizer = optim.SGD(model.parameters(), lr=.0001)

In [23]:
X_train.shape, Y_train.shape

((60000, 784), (60000,))

In [29]:
DenseTensor(X_train[i*BS:(i+1)*BS])

<DenseTensor <GPUBuffer with shape (32, 784)> with grad None>

In [30]:
loss_fn = sparse_categorical_crossentropy

In [31]:
BS = dim1
epochs = 1

for epoch in range(epochs):
    losses = []
    accs = []
    for i in trange(int(X_train.shape[0]/BS)):
        x = DenseTensor(X_train[i*BS:(i+1)*BS])
        y = Y_train[i*BS:(i+1)*BS]
        optimizer.zero_grad()
        res = model.forward(x)
        loss = loss_fn(res, y)
        #print(res.cpu().data[:4])
        #print(y[:4])
        loss.backward()
        optimizer.step()
        
        cat = np.argmax(res.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss.cpu().data)
        accs.append(accuracy)
        if i % 1 == 0:
            print("LOSS:%.2f  ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []  
            asdf
            
    losses = []
    accs = []
    for i in trange(int(X_test.shape[0]/BS)):
        x = DenseTensor(X_test[i*BS:(i+1)*BS])
        y = Y_test[i*BS:(i+1)*BS]
        res = model.forward(x)
        loss = loss_fn(res, y)
        
        cat = np.argmax(res.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss.cpu().data)
        accs.append(accuracy)
        if i % 32 == 0:
            print("TEST_LOSS:%.2f  TEST_ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []

  0%|                                                                                                                                                               | 0/1875 [00:00<?, ?it/s]


IndexError: list index out of range

In [None]:
model.parameters()

In [None]:
res.cpu().data

In [None]:
loss.cpu().data

In [None]:
model.W.grad.cpu().data

In [None]:
 model.W.cpu().data

### Second

In [None]:
model2 = MLP2()

In [None]:
optimizer2 = optim.SGD(model2.parameters(), lr=.0001)

In [None]:
BS = dim1
epochs = 10

for epoch in range(epochs):
    losses = []
    accs = []
    for i in trange(int(X_train.shape[0]/BS)):
        x = DenseTensor(X_train[i*BS:(i+1)*BS])/255
        y = Y_train[i*BS:(i+1)*BS]
        optimizer2.zero_grad()
        res2 = model2.forward(x)
        loss2 = loss_fn(res2, y)
        #print(res2.cpu().data[:4])
        #print(y[:4])
        loss2.backward()
        optimizer2.step()
        
        cat = np.argmax(res2.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss2.cpu().data)
        accs.append(accuracy)
        if i % 1 == 0:
            print("LOSS:%.2f  ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []  
            asdf
            
    losses = []
    accs = []
    for i in trange(int(X_test.shape[0]/BS)):
        x = DenseTensor(X_test[i*BS:(i+1)*BS])
        y = Y_test[i*BS:(i+1)*BS]
        res2 = model2.forward(x)
        loss2 = loss_fn(res2, y)
        
        cat = np.argmax(res2.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss2.cpu().data)
        accs.append(accuracy)
        if i % 32 == 0:
            print("TEST_LOSS:%.2f  TEST_ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []

In [None]:
res2.cpu().data==res.cpu().data

In [None]:
res2.cpu().data

In [None]:
loss.cpu().data==loss2.cpu().data

In [None]:
loss2.cpu().data

In [None]:
res.grad.cpu().data==res2.grad.cpu().data

In [None]:
res2.grad.cpu().data

In [None]:
model.W.grad, model2.W.grad

In [None]:
model.W.grad.cpu().data==model2.W.grad.cpu().data

In [None]:
model.W.grad.cpu().data

In [None]:
model2.W.grad.cpu().data

In [None]:
model.W.grad

In [None]:
model2.W.cpu().data