In [1]:
from tinygrad.densetensor import DenseTensor
from tinygrad.sparsetensor import SparseTensor
import numpy as np
import tinygrad.optim as optim
from extra.utils import get_parameters
from test.test_mnist import fetch_mnist
from tqdm import trange

%load_ext autoreload
%autoreload 2

DEVICE:GPU


In [2]:
dim1 = 32
dim2 = 784
dim3 = 10

np.random.seed(9)

seedmat = np.random.randn(dim2,dim3).astype(np.float32) / 1000

In [3]:
X_train, Y_train, X_test, Y_test = fetch_mnist()
X_train.shape

(60000, 784)

In [4]:
x_init = np.random.randn(dim1,dim2).astype(np.float32)
x = DenseTensor(x_init)
y = DenseTensor(np.random.randn(dim1,dim3))



In [5]:
class MLP:
  def __init__(self):
    self.W = DenseTensor(seedmat)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.logsoftmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [6]:
class MLP2:
  def __init__(self):
    self.W = SparseTensor(seedmat)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.logsoftmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [7]:
def loss_fn(y, y_pred):
    return ((y-y_pred)**2)**.5

In [8]:
def sparse_categorical_crossentropy(out, Y):
  num_classes = out.shape[-1]
  YY = Y.flatten()
  y = np.zeros((YY.shape[0], num_classes), np.float32)
  # correct loss for NLL, torch NLL loss returns one per row
  y[range(y.shape[0]),YY] = -1.0*num_classes
  y = y.reshape(list(Y.shape)+[num_classes])
  y = DenseTensor(y)
  return out.mul(y).mean()

### Dense

In [9]:
model = MLP()

In [10]:
optimizer = optim.SGD(model.parameters(), lr=.0001)

In [11]:
X_train.shape, Y_train.shape

((60000, 784), (60000,))

In [12]:
loss_fn = sparse_categorical_crossentropy

In [13]:
BS = dim1
epochs = 1

for epoch in range(epochs):
    losses = []
    accs = []
    for i in trange(int(X_train.shape[0]/BS)):
        x = DenseTensor(X_train[i*BS:(i+1)*BS])
        y = Y_train[i*BS:(i+1)*BS]
        optimizer.zero_grad()
        res = model.forward(x)
        loss = loss_fn(res, y)
        #print(res.cpu().data[:4])
        #print(y[:4])
        loss.backward()
        optimizer.step()
        
        cat = np.argmax(res.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss.cpu().data)
        accs.append(accuracy)
        if i % 1 == 0:
            print("LOSS:%.2f  ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []  
            
    losses = []
    accs = []
    for i in trange(int(X_test.shape[0]/BS)):
        x = DenseTensor(X_test[i*BS:(i+1)*BS])
        y = Y_test[i*BS:(i+1)*BS]
        res = model.forward(x)
        loss = loss_fn(res, y)
        
        cat = np.argmax(res.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss.cpu().data)
        accs.append(accuracy)
        if i % 32 == 0:
            print("TEST_LOSS:%.2f  TEST_ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []

  1%|▊                                                                                                                                                     | 10/1875 [00:00<00:19, 96.38it/s]

LOSS:4.38  ACC:0.03
LOSS:39.29  ACC:0.19
LOSS:31.43  ACC:0.19
LOSS:28.64  ACC:0.69
LOSS:37.58  ACC:0.28
LOSS:49.48  ACC:0.44
LOSS:8.22  ACC:0.59
LOSS:13.19  ACC:0.41
LOSS:12.42  ACC:0.47
LOSS:7.54  ACC:0.75
LOSS:6.04  ACC:0.72


  1%|█▋                                                                                                                                                   | 22/1875 [00:00<00:17, 105.74it/s]

LOSS:7.32  ACC:0.59
LOSS:8.17  ACC:0.62
LOSS:10.00  ACC:0.31
LOSS:14.23  ACC:0.62
LOSS:5.23  ACC:0.59
LOSS:13.33  ACC:0.56
LOSS:14.40  ACC:0.41
LOSS:14.08  ACC:0.53
LOSS:23.96  ACC:0.56
LOSS:5.58  ACC:0.72
LOSS:4.85  ACC:0.78
LOSS:7.03  ACC:0.75


  2%|██▌                                                                                                                                                  | 33/1875 [00:00<00:17, 107.18it/s]

LOSS:2.41  ACC:0.81
LOSS:3.54  ACC:0.69
LOSS:3.34  ACC:0.75
LOSS:6.65  ACC:0.75
LOSS:3.66  ACC:0.69
LOSS:8.05  ACC:0.72
LOSS:3.70  ACC:0.75
LOSS:3.78  ACC:0.72
LOSS:5.13  ACC:0.78
LOSS:8.47  ACC:0.69
LOSS:6.55  ACC:0.75


  2%|███▌                                                                                                                                                 | 45/1875 [00:00<00:16, 110.29it/s]

LOSS:5.76  ACC:0.75
LOSS:7.51  ACC:0.59
LOSS:2.66  ACC:0.81
LOSS:4.88  ACC:0.81
LOSS:8.46  ACC:0.72
LOSS:30.91  ACC:0.16
LOSS:49.37  ACC:0.25
LOSS:39.95  ACC:0.66
LOSS:41.52  ACC:0.50
LOSS:30.43  ACC:0.53
LOSS:21.05  ACC:0.44
LOSS:8.39  ACC:0.62


  3%|████▌                                                                                                                                                | 57/1875 [00:00<00:16, 110.33it/s]

LOSS:4.88  ACC:0.72
LOSS:6.48  ACC:0.72
LOSS:4.06  ACC:0.88
LOSS:4.09  ACC:0.75
LOSS:4.78  ACC:0.88
LOSS:1.34  ACC:0.91
LOSS:3.58  ACC:0.88
LOSS:0.05  ACC:0.97
LOSS:2.88  ACC:0.91
LOSS:1.05  ACC:0.94
LOSS:4.36  ACC:0.84


  3%|████▉                                                                                                                                                | 62/1875 [00:00<00:16, 107.26it/s]


LOSS:3.54  ACC:0.84
LOSS:2.17  ACC:0.84
LOSS:3.00  ACC:0.75
LOSS:3.33  ACC:0.88
LOSS:4.25  ACC:0.84


KeyboardInterrupt: 

In [None]:
model.parameters()

In [None]:
res.cpu().data

In [None]:
loss.cpu().data

In [None]:
model.W.grad.cpu().data

In [None]:
 model.W.cpu().data

### Second

In [None]:
model2 = MLP2()

In [None]:
optimizer2 = optim.SGD(model2.parameters(), lr=.0001)

In [None]:
BS = dim1
epochs = 10

for epoch in range(epochs):
    losses = []
    accs = []
    for i in trange(int(X_train.shape[0]/BS)):
        x = DenseTensor(X_train[i*BS:(i+1)*BS])/255
        y = Y_train[i*BS:(i+1)*BS]
        optimizer2.zero_grad()
        res2 = model2.forward(x)
        loss2 = loss_fn(res2, y)
        #print(res2.cpu().data[:4])
        #print(y[:4])
        loss2.backward()
        optimizer2.step()
        
        cat = np.argmax(res2.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss2.cpu().data)
        accs.append(accuracy)
        if i % 1 == 0:
            print("LOSS:%.2f  ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []  
            
    losses = []
    accs = []
    for i in trange(int(X_test.shape[0]/BS)):
        x = DenseTensor(X_test[i*BS:(i+1)*BS])
        y = Y_test[i*BS:(i+1)*BS]
        res2 = model2.forward(x)
        loss2 = loss_fn(res2, y)
        
        cat = np.argmax(res2.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss2.cpu().data)
        accs.append(accuracy)
        if i % 32 == 0:
            print("TEST_LOSS:%.2f  TEST_ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []

In [None]:
res2.cpu().data==res.cpu().data

In [None]:
res2.cpu().data

In [None]:
loss.cpu().data==loss2.cpu().data

In [None]:
loss2.cpu().data

In [None]:
res.grad.cpu().data==res2.grad.cpu().data

In [None]:
res2.grad.cpu().data

In [None]:
model.W.grad, model2.W.grad

In [None]:
model.W.grad.cpu().data==model2.W.grad.cpu().data

In [None]:
model.W.grad.cpu().data

In [None]:
model2.W.grad.cpu().data

In [None]:
model.W.grad

In [None]:
model2.W.cpu().data