In [1]:
from tinygrad.densetensor import DenseTensor, cl_queue
from tinygrad.sparsetensor import SparseTensor
import numpy as np
import tinygrad.optim as optim
from extra.utils import get_parameters


%load_ext autoreload
%autoreload 2

DEVICE:GPU


In [2]:
dim1 = 4
dim2 = 4
dim3 = 4

np.random.seed(9)

In [3]:
x_init = np.random.randn(dim1,dim2).astype(np.float32)
w_init = np.random.randn(dim2,dim3).astype(np.float32)
x = DenseTensor(x_init)
y = DenseTensor(np.random.randn(dim1,dim3))



W_TEST = SparseTensor.uniform(dim2,dim3, randsparsity=0.6).to_numpy()
W_TEST

In [4]:
W_TEST = SparseTensor(w_init).to_numpy()
W_TEST

array([[ 1.54272962, -0.90072119, -0.13712502,  1.29757905],
       [ 0.67527115,  0.03195812,  0.9181459 ,  0.38050947],
       [ 0.5163675 , -0.35523945,  0.208777  ,  0.32841107],
       [-0.49822477, -2.09177685, -0.08258774,  2.45518255]])

In [5]:
class MLP:
  def __init__(self, init_dense):
    #self.W = DenseTensor(init_dense)
    self.W = DenseTensor(W_TEST)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [6]:
class MLP2:
  def __init__(self):
    self.W = SparseTensor(W_TEST)
    #self.W = SparseTensor.uniform(dim2,dim3)
    #self.W = SparseTensor.uniform(dim2,dim3)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [7]:
def loss_fn(y, y_pred):
    return ((y-y_pred)**2)**.5

In [8]:
model2 = MLP2()

In [9]:
model2.W.get_nnzs()

array([4, 4, 4, 4], dtype=uint32)

In [10]:
dense_init = model2.W.to_numpy()
dense_init

array([[ 1.54272962, -0.90072119, -0.13712502,  1.29757905],
       [ 0.67527115,  0.03195812,  0.9181459 ,  0.38050947],
       [ 0.5163675 , -0.35523945,  0.208777  ,  0.32841107],
       [-0.49822477, -2.09177685, -0.08258774,  2.45518255]])

In [11]:
model = MLP(dense_init)

### Dense

In [12]:
iters = 1
optimizer = optim.SGD(model.parameters(), lr=1)

In [13]:
for i in range(iters):
    res = model.forward(x)
    optimizer.zero_grad()
    loss = loss_fn(res, y)
    loss.backward()
    optimizer.step()

In [14]:
model.parameters()

[<DenseTensor <GPUBuffer with shape (4, 4)> with grad <GPUBuffer with shape (4, 4)>>,
 <DenseTensor <GPUBuffer with shape (4, 4)> with grad None>]

In [15]:
res.cpu().data

array([[1.5660693e-01, 3.5940760e-01, 9.5464088e-02, 3.8852134e-01],
       [5.0784901e-02, 8.6381847e-01, 3.4803964e-02, 5.0592598e-02],
       [6.7446940e-03, 7.9087674e-04, 2.3246136e-02, 9.6921837e-01],
       [2.7409911e-01, 1.5916763e-02, 5.3959215e-01, 1.7039205e-01]],
      dtype=float32)

In [16]:
loss.cpu().data

array([[2.828717  , 1.272687  , 0.32277843, 0.11920594],
       [1.0796763 , 0.17857905, 1.2690065 , 1.3388082 ],
       [0.6631973 , 0.05704817, 0.5231487 , 0.532799  ],
       [0.6499121 , 0.9389783 , 1.377658  , 0.3206949 ]], dtype=float32)

In [17]:
res.grad.cpu().data

array([[ 0.9999999,  1.       ,  1.       ,  0.9999999],
       [-1.       , -0.9999999, -1.       , -1.       ],
       [ 1.       ,  1.       ,  1.       ,  1.       ],
       [ 1.       ,  1.       , -1.       ,  1.       ]], dtype=float32)

In [18]:
model.W.grad.cpu().data

array([[ 0.08775962,  0.00509613, -0.14741097,  0.05455523],
       [ 0.20928203,  0.01215285, -0.35153374,  0.1300989 ],
       [ 0.5391952 ,  0.03131065, -0.9056932 ,  0.33518744],
       [ 0.12742296,  0.00739936, -0.214034  ,  0.0792117 ]],
      dtype=float32)

### Second

In [19]:
w_init

array([[ 1.5427296 , -0.9007212 , -0.13712502,  1.297579  ],
       [ 0.67527115,  0.03195812,  0.9181459 ,  0.38050947],
       [ 0.5163675 , -0.35523945,  0.208777  ,  0.32841107],
       [-0.49822477, -2.0917768 , -0.08258774,  2.4551826 ]],
      dtype=float32)

In [20]:
model2 = MLP2()

In [21]:
optimizer2 = optim.SGD(model2.parameters(), lr=1)

In [22]:
for i in range(iters):
    res2 = model2.forward(x)
    optimizer2.zero_grad()
    loss2 = loss_fn(res2, y)
    loss2.backward()
    optimizer2.step()

#### fwd

In [23]:
res2.cpu().data==res.cpu().data

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [24]:
res2.cpu().data

array([[1.4759253e-01, 4.7290090e-01, 1.7974024e-01, 1.9976625e-01],
       [3.2619357e-02, 8.9211535e-01, 6.1257318e-02, 1.4007927e-02],
       [4.5565027e-03, 4.3189147e-04, 8.2788356e-03, 9.8673272e-01],
       [3.0022663e-01, 9.8456349e-03, 1.8008390e-01, 5.0984389e-01]],
      dtype=float32)

In [25]:
res.cpu().data - res2.cpu().data

array([[ 9.0143979e-03, -1.1349329e-01, -8.4276147e-02,  1.8875510e-01],
       [ 1.8165544e-02, -2.8296888e-02, -2.6453353e-02,  3.6584672e-02],
       [ 2.1881913e-03,  3.5898527e-04,  1.4967300e-02, -1.7514348e-02],
       [-2.6127517e-02,  6.0711280e-03,  3.5950825e-01, -3.3945185e-01]],
      dtype=float32)

#### loss

In [26]:
loss.cpu().data==loss2.cpu().data

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [27]:
loss2.cpu().data

array([[2.8197026 , 1.3861802 , 0.4070546 , 0.06954914],
       [1.0978417 , 0.15028214, 1.2425531 , 1.3753928 ],
       [0.6610091 , 0.05668918, 0.50818145, 0.5503133 ],
       [0.6760397 , 0.93290716, 1.7371664 , 0.6601467 ]], dtype=float32)

In [28]:
loss.cpu().data - loss2.cpu().data

array([[ 9.01436806e-03, -1.13493204e-01, -8.42761695e-02,
         4.96568009e-02],
       [-1.81654692e-02,  2.82969028e-02,  2.64533758e-02,
        -3.65846157e-02],
       [ 2.18820572e-03,  3.58983874e-04,  1.49672627e-02,
        -1.75142884e-02],
       [-2.61275768e-02,  6.07115030e-03, -3.59508395e-01,
        -3.39451820e-01]], dtype=float32)

#### grad

In [29]:
res.grad.cpu().data==res2.grad.cpu().data

array([[False,  True,  True, False],
       [False, False,  True,  True],
       [False,  True, False, False],
       [False,  True,  True, False]])

In [30]:
res2.grad.cpu().data

array([[ 0.99999994,  1.        ,  1.        , -1.        ],
       [-0.99999994, -0.99999994, -1.        , -1.        ],
       [ 0.99999994,  1.        ,  0.99999994,  0.99999994],
       [ 0.99999994,  1.        , -1.        ,  0.99999994]],
      dtype=float32)

In [31]:
res.grad.cpu().data-res2.grad.cpu().data

array([[-5.9604645e-08,  0.0000000e+00,  0.0000000e+00,  1.9999999e+00],
       [-5.9604645e-08,  5.9604645e-08,  0.0000000e+00,  0.0000000e+00],
       [ 5.9604645e-08,  0.0000000e+00,  5.9604645e-08,  5.9604645e-08],
       [ 5.9604645e-08,  0.0000000e+00,  0.0000000e+00,  5.9604645e-08]],
      dtype=float32)

#### weight

In [32]:
def to_dense(data, cols, nnzs, ellw, shape):
    out = np.zeros(shape)
    for row in range(shape[0]):
        for icol in range(nnzs[row]):
            #print('idx:',row,cols[row*ellw+icol])
            out[row,cols[row*ellw+icol]] = data[row*ellw+icol]
    return out

In [33]:
model.W.grad, model2.W.grad

(<DenseTensor <GPUBuffer with shape (4, 4)> with grad None>,
 <SparseTensor <GPUBuffer with shape (512,)> with grad None>)

In [34]:
model.W.grad.cpu().data

array([[ 0.08775962,  0.00509613, -0.14741097,  0.05455523],
       [ 0.20928203,  0.01215285, -0.35153374,  0.1300989 ],
       [ 0.5391952 ,  0.03131065, -0.9056932 ,  0.33518744],
       [ 0.12742296,  0.00739936, -0.214034  ,  0.0792117 ]],
      dtype=float32)

In [48]:
model2.W.grad.to_numpy()

array([[ 0.03214619,  0.0594299 ,  0.13129239,  0.04582022],
       [ 0.00126151, -0.0521974 , -0.20440495, -0.00090652],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.05412513,  0.22249129,  0.69155049,  0.08322071]])

In [40]:
data

array([0.03214619, 0.00126151, 0.03214619, 0.05412513, 0.03214619,
       0.03214619, 0.03214619, 0.03214619, 0.03214619, 0.03214619,
       0.03214619, 0.03214619, 0.03214619, 0.03214619, 0.03214619,
       0.03214619], dtype=float32)

In [41]:
cols

array([0, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint32)

In [42]:
nnzs

array([128, 128, 128,   0], dtype=uint32)

In [43]:
denserec = model.W.cpu().data 
denserec

array([[ 1.4301615 , -0.90679747,  0.08306126,  1.1960373 ],
       [ 0.42296875,  0.0698323 ,  1.4642829 ,  0.04880069],
       [-0.11322138, -0.19133575,  1.6425145 , -0.6396414 ],
       [-0.66086805, -2.098002  ,  0.23815697,  2.3033063 ]],
      dtype=float32)

In [44]:
sparserec = model2.W.to_numpy()
sparserec

array([[ 1.48577487, -0.90198272, -0.06434972,  1.25059247],
       [ 0.5728209 ,  0.08415551,  1.1127491 ,  0.17889959],
       [ 0.29468143, -0.1508345 ,  0.73682135, -0.30445394],
       [-0.49822477, -2.09177685, -0.08258774,  2.45518255]])

In [45]:
denserec - sparserec

array([[-0.0556134 , -0.00481474,  0.14741097, -0.05455518],
       [-0.14985216, -0.01432322,  0.35153377, -0.13009889],
       [-0.40790281, -0.04050125,  0.90569311, -0.33518746],
       [-0.16264328, -0.00622511,  0.32074472, -0.15187621]])

In [46]:
sparserec2 = model2.W.to_numpy(dual=True)
sparserec2.T

array([[ 1.48577487, -0.90198272, -0.06434972,  1.19646728],
       [ 0.5728209 ,  0.08415551,  1.1127491 , -0.04359171],
       [ 0.5163675 , -0.35523945,  0.208777  ,  0.32841107],
       [-0.49822477, -2.09177685, -0.08258774,  2.45518255]])

In [47]:
denserec - sparserec2.T

array([[-5.56133986e-02, -4.81474400e-03,  1.47410974e-01,
        -4.29987907e-04],
       [-1.49852157e-01, -1.43232197e-02,  3.51533771e-01,
         9.23923999e-02],
       [-6.29588872e-01,  1.63903698e-01,  1.43373747e+00,
        -9.68052477e-01],
       [-1.62643284e-01, -6.22510910e-03,  3.20744716e-01,
        -1.51876211e-01]])

In [None]:
seedmat - denserec