In [1]:
from tinygrad.densetensor import DenseTensor
from tinygrad.sparsetensor import SparseTensor
import numpy as np
import tinygrad.optim as optim
from extra.utils import get_parameters
from test.test_mnist import fetch_mnist
from tqdm import trange

%load_ext autoreload
%autoreload 2

DEVICE:GPU


In [2]:
dim1 = 32
dim2 = 784
dim3 = 10

np.random.seed(9)

seedmat = np.random.randn(dim2,dim3).astype(np.float32) / 1000

In [3]:
X_train, Y_train, X_test, Y_test = fetch_mnist()
X_train.shape

(60000, 784)

In [4]:
x_init = np.random.randn(dim1,dim2).astype(np.float32)
x = DenseTensor(x_init)
y = DenseTensor(np.random.randn(dim1,dim3))



In [5]:
class MLP:
  def __init__(self):
    self.W = DenseTensor(seedmat)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.logsoftmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [6]:
class MLP2:
  def __init__(self):
    self.W = SparseTensor(seedmat)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.logsoftmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [7]:
def loss_fn(y, y_pred):
    return ((y-y_pred)**2)**.5

In [8]:
def sparse_categorical_crossentropy(out, Y):
  num_classes = out.shape[-1]
  YY = Y.flatten()
  y = np.zeros((YY.shape[0], num_classes), np.float32)
  # correct loss for NLL, torch NLL loss returns one per row
  y[range(y.shape[0]),YY] = -1.0*num_classes
  y = y.reshape(list(Y.shape)+[num_classes])
  y = DenseTensor(y)
  return out.mul(y).mean()

### Dense

In [9]:
model = MLP()

In [10]:
optimizer = optim.SGD(model.parameters(), lr=.0001)

In [11]:
X_train.shape, Y_train.shape

((60000, 784), (60000,))

In [12]:
loss_fn = sparse_categorical_crossentropy

In [13]:
BS = dim1
epochs = 1

for epoch in range(epochs):
    losses = []
    accs = []
    for i in trange(int(X_train.shape[0]/BS)):
        x = DenseTensor(X_train[i*BS:(i+1)*BS])/255
        y = Y_train[i*BS:(i+1)*BS]
        optimizer.zero_grad()
        res = model.forward(x)
        loss = loss_fn(res, y)
        #print(res.cpu().data[:4])
        #print(y[:4])
        loss.backward()
        optimizer.step()
        
        cat = np.argmax(res.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss.cpu().data)
        accs.append(accuracy)
        if i % 128 == 0:
            print("LOSS:%.2f  ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []  
            
    losses = []
    accs = []
    for i in trange(int(X_test.shape[0]/BS)):
        x = DenseTensor(X_test[i*BS:(i+1)*BS])
        y = Y_test[i*BS:(i+1)*BS]
        res = model.forward(x)
        loss = loss_fn(res, y)
        
        cat = np.argmax(res.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss.cpu().data)
        accs.append(accuracy)
        if i % 32 == 0:
            print("TEST_LOSS:%.2f  TEST_ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []

  0%|                                                                                                                                                               | 0/1875 [00:00<?, ?it/s]

LOSS:2.30  ACC:0.03


  7%|██████████▍                                                                                                                                          | 132/1875 [00:04<01:03, 27.62it/s]

LOSS:2.30  ACC:0.22


 14%|████████████████████▋                                                                                                                                | 261/1875 [00:09<01:00, 26.80it/s]

LOSS:2.28  ACC:0.44


 21%|██████████████████████████████▉                                                                                                                      | 390/1875 [00:14<00:53, 27.67it/s]

LOSS:2.27  ACC:0.54


 28%|█████████████████████████████████████████                                                                                                            | 516/1875 [00:18<00:48, 27.81it/s]

LOSS:2.26  ACC:0.55


 34%|███████████████████████████████████████████████████▎                                                                                                 | 645/1875 [00:23<00:44, 27.54it/s]

LOSS:2.24  ACC:0.61


 41%|█████████████████████████████████████████████████████████████▌                                                                                       | 774/1875 [00:28<00:42, 25.81it/s]

LOSS:2.23  ACC:0.61


 48%|███████████████████████████████████████████████████████████████████████▌                                                                             | 900/1875 [00:32<00:35, 27.32it/s]

LOSS:2.22  ACC:0.67


 55%|█████████████████████████████████████████████████████████████████████████████████▏                                                                  | 1029/1875 [00:37<00:31, 27.17it/s]

LOSS:2.21  ACC:0.64


 62%|███████████████████████████████████████████████████████████████████████████████████████████▍                                                        | 1158/1875 [00:42<00:26, 27.11it/s]

LOSS:2.19  ACC:0.68


 68%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎                                              | 1284/1875 [00:47<00:21, 27.11it/s]

LOSS:2.18  ACC:0.69


 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                    | 1413/1875 [00:51<00:17, 27.03it/s]

LOSS:2.17  ACC:0.68


 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                          | 1542/1875 [00:56<00:12, 27.36it/s]

LOSS:2.15  ACC:0.69


 89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                | 1668/1875 [01:01<00:07, 27.13it/s]

LOSS:2.15  ACC:0.69


 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 1797/1875 [01:05<00:02, 27.25it/s]

LOSS:2.13  ACC:0.72


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:08<00:00, 27.24it/s]
 21%|███████████████████████████████▋                                                                                                                      | 66/312 [00:00<00:00, 321.84it/s]

TEST_LOSS:5.12  TEST_ACC:0.81
TEST_LOSS:6.61  TEST_ACC:0.70
TEST_LOSS:7.92  TEST_ACC:0.67


 42%|██████████████████████████████████████████████████████████████▌                                                                                      | 131/312 [00:00<00:00, 307.10it/s]

TEST_LOSS:7.82  TEST_ACC:0.70
TEST_LOSS:7.05  TEST_ACC:0.68
TEST_LOSS:6.60  TEST_ACC:0.69


 77%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                  | 240/312 [00:00<00:00, 344.25it/s]

TEST_LOSS:6.91  TEST_ACC:0.74
TEST_LOSS:5.18  TEST_ACC:0.76
TEST_LOSS:6.47  TEST_ACC:0.72


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 312/312 [00:00<00:00, 336.48it/s]

TEST_LOSS:5.24  TEST_ACC:0.74





In [14]:
model.parameters()

[<DenseTensor <GPUBuffer with shape (784, 10)> with grad <GPUBuffer with shape (784, 10)>>,
 <DenseTensor <GPUBuffer with shape (784, 10)> with grad None>]

In [15]:
res.cpu().data

array([[ 0.00000000e+00, -2.17443726e+02, -1.52456787e+02,
        -1.28634811e+02, -1.71982361e+02, -1.16445923e+02,
        -1.26471375e+02, -1.80743011e+02, -1.27065163e+02,
        -1.67365753e+02],
       [ 0.00000000e+00, -1.40194092e+02, -6.11183586e+01,
        -8.57498093e+01, -8.44008789e+01, -8.70940552e+01,
        -4.97926788e+01, -8.89710388e+01, -8.11008606e+01,
        -8.14695587e+01],
       [ 0.00000000e+00, -1.63500305e+02, -4.63094406e+01,
        -6.45426788e+01, -9.67461472e+01, -1.07235107e+02,
        -6.92429581e+01, -9.34611130e+01, -5.45257263e+01,
        -9.61897430e+01],
       [-3.58169518e+01, -2.49962120e+01, -6.99073029e+00,
        -2.58131237e+01, -4.54339752e+01, -4.56404152e+01,
        -5.12322464e+01, -6.73234024e+01, -9.19342041e-04,
        -4.79191322e+01],
       [-5.39500122e+01, -9.43679810e-02, -5.76103973e+00,
        -2.92863426e+01, -5.53424835e+01, -5.79841576e+01,
        -4.88875542e+01, -6.00888329e+01, -2.44296265e+00,
        -5.

In [16]:
loss.cpu().data

array([9.882622], dtype=float32)

In [17]:
res.grad.cpu().data

AttributeError: 'NoneType' object has no attribute 'cpu'

In [None]:
model.W.grad.cpu().data

### Second

In [None]:
model2 = MLP2()

In [None]:
optimizer2 = optim.SGD(model2.parameters(), lr=.0001)

In [None]:
BS = dim1
epochs = 10

for epoch in range(epochs):
    losses = []
    accs = []
    for i in trange(int(X_train.shape[0]/BS)):
        x = DenseTensor(X_train[i*BS:(i+1)*BS])/255
        y = Y_train[i*BS:(i+1)*BS]
        optimizer2.zero_grad()
        res2 = model2.forward(x)
        loss2 = loss_fn(res2, y)
        #print(res2.cpu().data[:4])
        #print(y[:4])
        loss2.backward()
        optimizer2.step()
        
        cat = np.argmax(res2.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss2.cpu().data)
        accs.append(accuracy)
        if i % 128 == 0:
            print("LOSS:%.2f  ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []  
            
    losses = []
    accs = []
    for i in trange(int(X_test.shape[0]/BS)):
        x = DenseTensor(X_test[i*BS:(i+1)*BS])
        y = Y_test[i*BS:(i+1)*BS]
        res2 = model2.forward(x)
        loss2 = loss_fn(res2, y)
        
        cat = np.argmax(res2.cpu().data, axis=-1)
        accuracy = (cat == y).mean()
        
        losses.append(loss2.cpu().data)
        accs.append(accuracy)
        if i % 32 == 0:
            print("TEST_LOSS:%.2f  TEST_ACC:%.2f" % (np.array(losses).mean(), np.array(accs).mean()))
            accs, losses = [], []

In [None]:
res2.cpu().data==res.cpu().data

In [None]:
res2.cpu().data

In [None]:
loss.cpu().data==loss2.cpu().data

In [None]:
loss2.cpu().data

In [None]:
res.grad.cpu().data==res2.grad.cpu().data

In [None]:
res2.grad.cpu().data

In [None]:
model.W.grad, model2.W.grad

In [None]:
model.W.grad.cpu().data==model2.W.grad.cpu().data

In [None]:
model.W.grad.cpu().data

In [None]:
model2.W.grad.cpu().data

In [None]:
model.W.grad