In [1]:
from tinygrad.densetensor import DenseTensor
from tinygrad.sparsetensor import SparseTensor
import numpy as np
import tinygrad.optim as optim
from extra.utils import get_parameters


%load_ext autoreload
%autoreload 2

DEVICE:GPU


In [2]:
dim1 = 2
dim2 = 4
dim3 = 3

np.random.seed(9)

seedmat = np.random.randn(dim2,dim3).astype(np.float32)

In [3]:
x_init = np.random.randn(dim1,dim2).astype(np.float32)
x = DenseTensor(x_init)
y = DenseTensor(np.random.randn(dim1,dim3))



In [4]:
class MLP:
  def __init__(self):
    self.W = DenseTensor(seedmat)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [5]:
class MLP2:
  def __init__(self):
    self.W = SparseTensor(seedmat)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [6]:
def loss_fn(y, y_pred):
    return ((y-y_pred)**2)**.5

### Dense

In [7]:
model = MLP()

In [8]:
iters = 1
optimizer = optim.SGD(model.parameters(), lr=.0001)

In [9]:
for i in range(iters):
    optimizer.zero_grad()
    res = model.forward(x)
    loss = loss_fn(res, y)
    loss.backward()
    loss.backward()
    optimizer.step()

GRADt: -3.8146973e-06
GRADt: -1.1444092e-05


In [10]:
model.parameters()

[<DenseTensor <GPUBuffer with shape (4, 3)> with grad <GPUBuffer with shape (4, 3)>>,
 <DenseTensor <GPUBuffer with shape (4, 3)> with grad None>]

In [11]:
res.cpu().data

array([[0.04204695, 0.33576733, 0.62218565],
       [0.09866745, 0.40295017, 0.49838245]], dtype=float32)

In [12]:
loss.cpu().data

array([[0.6332241 , 0.30380926, 0.29596025],
       [0.28184202, 0.11341732, 0.85362184]], dtype=float32)

In [13]:
res.grad.cpu().data

array([[-3.9999998,  4.       , -4.       ],
       [-3.9999998, -4.       ,  3.9999998]], dtype=float32)

In [14]:
model.W.grad.cpu().data

array([[-1.2670451 , -3.7941687 ,  5.0612135 ],
       [ 0.5773096 ,  5.649423  , -6.2267327 ],
       [-0.22847073,  7.5477858 , -7.3193154 ],
       [-1.0991318 , -2.484571  ,  3.5837026 ]], dtype=float32)

### Second

In [15]:
model2 = MLP2()

In [16]:
optimizer2 = optim.SGD(model2.parameters(), lr=.0001)

In [17]:
for i in range(iters):
    optimizer2.zero_grad()
    res2 = model2.forward(x)
    loss2 = loss_fn(res2, y)
    loss2.backward()
    optimizer2.step()

GRADt: 0.0
GRADt: 0.0


In [18]:
res2.cpu().data==res.cpu().data

array([[False, False, False],
       [False, False, False]])

In [19]:
res2.cpu().data

array([[0.04205809, 0.33634186, 0.6216001 ],
       [0.0986051 , 0.40228686, 0.499108  ]], dtype=float32)

In [20]:
loss.cpu().data==loss2.cpu().data

array([[False, False, False],
       [False, False, False]])

In [21]:
loss2.cpu().data

array([[0.63321304, 0.30438375, 0.29654577],
       [0.28190437, 0.11408064, 0.8543474 ]], dtype=float32)

In [22]:
res.grad.cpu().data==res2.grad.cpu().data

array([[False, False, False],
       [False, False, False]])

In [23]:
res2.grad.cpu().data

array([[-0.99999994,  1.0000001 , -0.9999999 ],
       [-1.        , -0.99999994,  0.99999994]], dtype=float32)

In [24]:
model.W.grad, model2.W.grad

(<DenseTensor <GPUBuffer with shape (4, 3)> with grad None>,
 <DenseTensor <GPUBuffer with shape (4, 3)> with grad None>)

In [25]:
model.W.grad.cpu().data==model2.W.grad.cpu().data

array([[False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False]])

In [26]:
model.W.grad.cpu().data

array([[-1.2670451 , -3.7941687 ,  5.0612135 ],
       [ 0.5773096 ,  5.649423  , -6.2267327 ],
       [-0.22847073,  7.5477858 , -7.3193154 ],
       [-1.0991318 , -2.484571  ,  3.5837026 ]], dtype=float32)

In [27]:
model2.W.grad.cpu().data

array([[-0.1602433 , -0.4870642 ,  0.6473075 ],
       [ 0.06864072,  0.67755413, -0.7461949 ],
       [-0.03807362,  0.8688285 , -0.8307549 ],
       [-0.13990688, -0.32875866,  0.46866554]], dtype=float32)

In [28]:
model.W.grad

<DenseTensor <GPUBuffer with shape (4, 3)> with grad None>