In [1]:
from tinygrad.densetensor import DenseTensor, cl_queue
from tinygrad.sparsetensor import SparseTensor
import numpy as np
import tinygrad.optim as optim
from extra.utils import get_parameters


%load_ext autoreload
%autoreload 2

DEVICE:GPU


In [2]:
dim1 = 4
dim2 = 6
dim3 = 3

np.random.seed(9)

In [3]:
x_init = np.random.randn(dim1,dim2).astype(np.float32)
w_init = np.random.randn(dim2,dim3).astype(np.float32)
x = DenseTensor(x_init)
y = DenseTensor(np.random.randn(dim1,dim3))



W_TEST = SparseTensor.uniform(dim2,dim3, randsparsity=0.6).to_numpy()
W_TEST

In [4]:
W_TEST = SparseTensor(w_init).to_numpy()
W_TEST

array([[ 0.5163675 , -0.35523945,  0.208777  ],
       [ 0.32841107, -0.49822477, -2.09177685],
       [-0.08258774,  2.45518255, -2.67211008],
       [-0.91327929, -0.22731435,  0.26931539],
       [ 1.13046122,  1.0423975 ,  1.30381048],
       [ 1.38940072, -0.6564526 , -0.0562573 ]])

In [5]:
class MLP:
  def __init__(self, init_dense):
    #self.W = DenseTensor(init_dense)
    self.W = DenseTensor(W_TEST)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [6]:
class MLP2:
  def __init__(self):
    self.W = SparseTensor(W_TEST)
    #self.W = SparseTensor.uniform(dim2,dim3)
    #self.W = SparseTensor.uniform(dim2,dim3)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [7]:
def loss_fn(y, y_pred):
    return ((y-y_pred)**2)**.5

In [8]:
model2 = MLP2()

In [9]:
model2.W.get_nnzs()

array([3, 3, 3, 3, 3, 3], dtype=uint32)

In [10]:
dense_init = model2.W.to_numpy()
dense_init

array([[ 0.5163675 , -0.35523945,  0.208777  ],
       [ 0.32841107, -0.49822477, -2.09177685],
       [-0.08258774,  2.45518255, -2.67211008],
       [-0.91327929, -0.22731435,  0.26931539],
       [ 1.13046122,  1.0423975 ,  1.30381048],
       [ 1.38940072, -0.6564526 , -0.0562573 ]])

In [11]:
model = MLP(dense_init)

### Dense

In [12]:
iters = 1
LR = 0.0001
optimizer = optim.SGD(model.parameters(), lr=LR)

In [13]:
for i in range(iters):
    res = model.forward(x)
    optimizer.zero_grad()
    loss = loss_fn(res, y)
    loss.backward()
    optimizer.step()

grad: -0.07484675 0.10546382 -0.024239197 0.032281365 1.1175871e-08


In [14]:
model.parameters()

[<DenseTensor <GPUBuffer with shape (6, 3)> with grad <GPUBuffer with shape (6, 3)>>,
 <DenseTensor <GPUBuffer with shape (6, 3)> with grad None>]

In [15]:
res.cpu().data

array([[1.4654074e-02, 3.0029491e-03, 9.8234296e-01],
       [6.8406165e-01, 3.6055233e-02, 2.7988321e-01],
       [3.0643116e-03, 9.9689972e-01, 3.5974052e-05],
       [5.1771337e-01, 4.7943056e-01, 2.8560869e-03]], dtype=float32)

In [16]:
loss.cpu().data

array([[0.5145567 , 0.43341643, 1.3581558 ],
       [1.6071233 , 1.881195  , 0.43018603],
       [0.641794  , 0.17212938, 1.2108748 ],
       [1.0211188 , 1.181346  , 1.9771273 ]], dtype=float32)

In [17]:
res.grad.cpu().data

array([[ 1.        , -1.        ,  0.99999994],
       [ 0.9999999 , -1.0000001 ,  1.        ],
       [ 0.99999994,  1.        ,  1.        ],
       [ 0.99999994,  1.        ,  0.9999999 ]], dtype=float32)

In [18]:
model.W.grad.cpu().data

array([[-0.07484675,  0.10546382, -0.03061706],
       [-0.0242392 ,  0.03585451, -0.01161531],
       [-0.01197051,  0.02341268, -0.01144217],
       [-0.03196306,  0.04511629, -0.01315323],
       [ 0.03133391, -0.04193553,  0.01060162],
       [ 0.0857941 , -0.11807546,  0.03228137]], dtype=float32)

### Second

In [19]:
w_init

array([[ 0.5163675 , -0.35523945,  0.208777  ],
       [ 0.32841107, -0.49822477, -2.0917768 ],
       [-0.08258774,  2.4551826 , -2.67211   ],
       [-0.9132793 , -0.22731435,  0.2693154 ],
       [ 1.1304612 ,  1.0423975 ,  1.3038105 ],
       [ 1.3894007 , -0.6564526 , -0.0562573 ]], dtype=float32)

In [20]:
model2 = MLP2()

In [21]:
optimizer2 = optim.SGD(model2.parameters(), lr=LR)

In [22]:
for i in range(iters):
    res2 = model2.forward(x)
    optimizer2.zero_grad()
    loss2 = loss_fn(res2, y)
    loss2.backward()
    optimizer2.step()


 ADD VAL:0.00,0.00 - (0,0) - (0,0,0)
 ADD VAL:-1.52,0.05 - (0,0) - (0,0,1)
 ADD VAL:0.30,0.00 - (0,0) - (0,0,2)
 ADD VAL:-0.14,0.00 - (0,0) - (0,0,3)
 ADD VAL:-0.29,0.00 - (0,1) - (0,1,0)
 ADD VAL:-0.49,0.05 - (0,1) - (0,1,1)
 ADD VAL:0.71,0.00 - (0,1) - (0,1,2)
 ADD VAL:1.30,0.00 - (0,1) - (0,1,3)
 ADD VAL:-1.12,0.00 - (0,2) - (0,2,0)
 ADD VAL:-0.24,0.05 - (0,2) - (0,2,1)
 ADD VAL:1.82,0.00 - (0,2) - (0,2,2)
 ADD VAL:0.68,0.00 - (0,2) - (0,2,3)
 ADD VAL:-0.01,0.00 - (0,3) - (0,3,0)
 ADD VAL:-0.65,0.05 - (0,3) - (0,3,1)
 ADD VAL:0.43,0.00 - (0,3) - (0,3,2)
 ADD VAL:0.03,0.00 - (0,3) - (0,3,3)
 ADD VAL:-0.38,0.00 - (0,4) - (0,4,0)
 ADD VAL:0.64,0.05 - (0,4) - (0,4,1)
 ADD VAL:1.54,0.00 - (0,4) - (0,4,2)
 ADD VAL:0.92,0.00 - (0,4) - (0,4,3)
 ADD VAL:-0.48,0.00 - (0,5) - (0,5,0)
 ADD VAL:1.74,0.05 - (0,5) - (0,5,1)
 ADD VAL:-0.90,0.00 - (0,5) - (0,5,2)
 ADD VAL:0.38,0.00 - (0,5) - (0,5,3)grad_max: [5.58793545e-09 1.86264515e-09 9.31322575e-10 0.00000000e+00
 9.31322575e-10 3.72529030e-09

NameError: name 'asdf' is not defined

#### fwd

In [None]:
res2.cpu().data==res.cpu().data

In [None]:
res.cpu().data - res2.cpu().data

In [None]:
res.grad.cpu().data - res2.grad.cpu().data

#### loss

In [None]:
loss.cpu().data==loss2.cpu().data

In [None]:
loss2.cpu().data

In [None]:
loss.cpu().data - loss2.cpu().data

#### grad

In [None]:
res.grad.cpu().data==res2.grad.cpu().data

In [None]:
res2.grad.cpu().data

In [None]:
res.grad.cpu().data-res2.grad.cpu().data

#### weight

In [None]:
def to_dense(data, cols, nnzs, ellw, shape):
    out = np.zeros(shape)
    for row in range(shape[0]):
        for icol in range(nnzs[row]):
            #print('idx:',row,cols[row*ellw+icol])
            out[row,cols[row*ellw+icol]] = data[row*ellw+icol]
    return out

In [None]:
model.W.grad, model2.W.grad

In [None]:
model.W.grad.cpu().data

In [None]:
model.W.grad.cpu().data.sum()

In [None]:
model2.W.grad.to_numpy()

In [None]:
model2.W.grad.to_numpy().sum()

In [None]:
model.W.grad.cpu().data == model2.W.grad.to_numpy()

In [None]:
model.W.cpu().data

In [None]:
model2.W.to_numpy()

In [None]:
model.W.cpu().data == model2.W.to_numpy()

In [None]:
model.W.cpu().data == model2.W.to_numpy(dual=True).T

In [None]:
model.W.cpu().data - model2.W.to_numpy()

In [None]:
cols

In [None]:
nnzs

In [None]:
denserec = model.W.cpu().data 
denserec

In [None]:
sparserec = model2.W.to_numpy()
sparserec

In [None]:
denserec - sparserec

In [None]:
sparserec2 = model2.W.to_numpy(dual=True)
sparserec2.T

In [None]:
denserec - sparserec2.T

In [None]:
seedmat - denserec