In [1]:
from tinygrad.densetensor import DenseTensor, cl_queue
from tinygrad.sparsetensor import SparseTensor
import numpy as np
import tinygrad.optim as optim
from extra.utils import get_parameters


%load_ext autoreload
%autoreload 2

DEVICE:GPU


In [2]:
dim1 = 4
dim2 = 8
dim3 = 6

np.random.seed(9)

In [3]:
x_init = np.random.randn(dim1,dim2).astype(np.float32)
w_init = np.random.randn(dim2,dim3).astype(np.float32)
x = DenseTensor(x_init)
y = DenseTensor(np.random.randn(dim1,dim3))



W_TEST = SparseTensor.uniform(dim2,dim3, randsparsity=0.6).to_numpy()
W_TEST

In [4]:
W_TEST = SparseTensor(w_init).to_numpy()
W_TEST

array([[-2.67211008e+00, -9.13279295e-01, -2.27314353e-01,
         2.69315392e-01,  1.13046122e+00,  1.04239750e+00],
       [ 1.30381048e+00,  1.38940072e+00, -6.56452596e-01,
        -5.62572964e-02, -4.99902606e-01,  4.36419368e-01],
       [-3.75813037e-01, -9.23061609e-01,  1.91725028e+00,
        -1.50302842e-01, -6.38729751e-01,  8.24770331e-01],
       [-1.21083879e+00, -5.03405392e-01, -7.01915681e-01,
        -1.97427106e+00, -2.65573215e+00, -5.76822497e-02],
       [-6.56186581e-01, -6.61706686e-01,  7.69348443e-01,
        -8.99004877e-01,  1.69363797e+00, -1.69733524e+00],
       [-2.79337025e+00, -2.26150647e-01,  3.97428840e-01,
         1.65970361e+00, -4.93746817e-01, -3.76097679e-01],
       [-1.69739768e-01,  2.41710639e+00, -1.80884051e+00,
         3.39751154e-01, -2.27297600e-02, -9.59997058e-01],
       [-3.83114427e-01,  1.09529994e-01, -8.55162859e-01,
         2.21606664e-04,  6.63855076e-01,  7.49480963e-01]])

In [5]:
class MLP:
  def __init__(self, init_dense):
    #self.W = DenseTensor(init_dense)
    self.W = DenseTensor(W_TEST)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [6]:
class MLP2:
  def __init__(self):
    self.W = SparseTensor(W_TEST)
    #self.W = SparseTensor.uniform(dim2,dim3)
    #self.W = SparseTensor.uniform(dim2,dim3)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [7]:
def loss_fn(y, y_pred):
    return ((y-y_pred)**2)**.5

In [8]:
model2 = MLP2()

In [9]:
model2.W.get_nnzs()

array([6, 6, 6, 6, 6, 6, 6, 6], dtype=uint32)

In [10]:
dense_init = model2.W.to_numpy()
dense_init

array([[-2.67211008e+00, -9.13279295e-01, -2.27314353e-01,
         2.69315392e-01,  1.13046122e+00,  1.04239750e+00],
       [ 1.30381048e+00,  1.38940072e+00, -6.56452596e-01,
        -5.62572964e-02, -4.99902606e-01,  4.36419368e-01],
       [-3.75813037e-01, -9.23061609e-01,  1.91725028e+00,
        -1.50302842e-01, -6.38729751e-01,  8.24770331e-01],
       [-1.21083879e+00, -5.03405392e-01, -7.01915681e-01,
        -1.97427106e+00, -2.65573215e+00, -5.76822497e-02],
       [-6.56186581e-01, -6.61706686e-01,  7.69348443e-01,
        -8.99004877e-01,  1.69363797e+00, -1.69733524e+00],
       [-2.79337025e+00, -2.26150647e-01,  3.97428840e-01,
         1.65970361e+00, -4.93746817e-01, -3.76097679e-01],
       [-1.69739768e-01,  2.41710639e+00, -1.80884051e+00,
         3.39751154e-01, -2.27297600e-02, -9.59997058e-01],
       [-3.83114427e-01,  1.09529994e-01, -8.55162859e-01,
         2.21606664e-04,  6.63855076e-01,  7.49480963e-01]])

In [11]:
model = MLP(dense_init)

### Dense

In [12]:
iters = 4
LR = 0.1
optimizer = optim.SGD(model.parameters(), lr=LR)

In [13]:
for i in range(iters):
    res = model.forward(x)
    optimizer.zero_grad()
    loss = loss_fn(res, y)
    loss.backward()
    optimizer.step()

In [14]:
model.parameters()

[<DenseTensor <GPUBuffer with shape (8, 6)> with grad <GPUBuffer with shape (8, 6)>>,
 <DenseTensor <GPUBuffer with shape (8, 6)> with grad None>]

In [15]:
res.cpu().data

array([[4.1922873e-01, 5.6649712e-03, 1.4283931e-01, 4.0425811e-02,
        9.5227629e-02, 2.9661348e-01],
       [1.4229345e-03, 9.5304364e-01, 1.4277362e-02, 1.9623214e-02,
        2.5746459e-03, 9.0581281e-03],
       [1.6420444e-04, 5.8350306e-02, 3.1487264e-02, 2.6555007e-02,
        8.0411977e-01, 7.9323441e-02],
       [2.6168948e-01, 1.0091750e-02, 9.3293865e-04, 4.4397044e-04,
        1.6311060e-01, 5.6373131e-01]], dtype=float32)

In [16]:
loss.cpu().data

array([[0.8850471 , 0.28310424, 0.10733978, 0.80779535, 0.06777093,
        0.91201013],
       [0.50109786, 2.536866  , 1.0087562 , 0.6726406 , 0.53447044,
        0.01703519],
       [0.92462   , 0.01174197, 0.1105326 , 1.6802164 , 0.5638724 ,
        0.05421719],
       [0.6571717 , 1.2742958 , 0.5603441 , 0.23928154, 0.3167885 ,
        0.50197834]], dtype=float32)

In [17]:
res.grad.cpu().data

array([[ 0.9999999 ,  1.        ,  1.        , -1.        , -1.        ,
        -1.0000001 ],
       [-1.        ,  1.        , -1.        ,  1.        , -0.9999999 ,
         0.99999994],
       [-1.        ,  1.        , -1.        ,  0.99999994, -1.        ,
         1.        ],
       [ 1.        , -0.99999994, -1.        , -1.0000001 , -1.        ,
         1.        ]], dtype=float32)

In [18]:
model.W.grad.cpu().data

array([[ 4.81720753e-02,  1.33489192e-01, -9.86666977e-03,
         6.78764358e-02, -5.45409799e-01,  3.05738747e-01],
       [-1.35541394e-01, -1.05922028e-01, -7.72921881e-03,
        -2.68947240e-02,  3.68136227e-01, -9.20488462e-02],
       [-3.87197256e-01, -1.68300074e-04, -1.54551625e-01,
         4.54462096e-02,  9.74631906e-02,  3.99007767e-01],
       [ 2.04064623e-02,  1.81639612e-01, -6.42970502e-02,
         5.91954142e-02, -4.38550293e-01,  2.41605848e-01],
       [-1.83521017e-01,  8.26444179e-02, -6.12569824e-02,
         4.79193628e-02, -4.78363317e-03,  1.18997864e-01],
       [-3.67487848e-01,  6.02543131e-02, -7.63574019e-02,
         2.55442485e-02,  6.03258967e-01, -2.45212257e-01],
       [-5.62625945e-01,  1.46991342e-01, -2.47841641e-01,
         1.11771137e-01, -6.54106438e-02,  6.17115796e-01],
       [ 4.51983288e-02,  8.81112833e-03, -8.04098323e-02,
         3.79315540e-02, -7.10706949e-01,  6.99175715e-01]], dtype=float32)

### Second

In [19]:
w_init

array([[-2.67211008e+00, -9.13279295e-01, -2.27314353e-01,
         2.69315392e-01,  1.13046122e+00,  1.04239750e+00],
       [ 1.30381048e+00,  1.38940072e+00, -6.56452596e-01,
        -5.62572964e-02, -4.99902606e-01,  4.36419368e-01],
       [-3.75813037e-01, -9.23061609e-01,  1.91725028e+00,
        -1.50302842e-01, -6.38729751e-01,  8.24770331e-01],
       [-1.21083879e+00, -5.03405392e-01, -7.01915681e-01,
        -1.97427106e+00, -2.65573215e+00, -5.76822497e-02],
       [-6.56186581e-01, -6.61706686e-01,  7.69348443e-01,
        -8.99004877e-01,  1.69363797e+00, -1.69733524e+00],
       [-2.79337025e+00, -2.26150647e-01,  3.97428840e-01,
         1.65970361e+00, -4.93746817e-01, -3.76097679e-01],
       [-1.69739768e-01,  2.41710639e+00, -1.80884051e+00,
         3.39751154e-01, -2.27297600e-02, -9.59997058e-01],
       [-3.83114427e-01,  1.09529994e-01, -8.55162859e-01,
         2.21606664e-04,  6.63855076e-01,  7.49480963e-01]], dtype=float32)

In [20]:
model2 = MLP2()

In [21]:
optimizer2 = optim.SGD(model2.parameters(), lr=LR)

In [22]:
for i in range(iters):
    res2 = model2.forward(x)
    optimizer2.zero_grad()
    loss2 = loss_fn(res2, y)
    loss2.backward()
    optimizer2.step()

#### fwd

In [23]:
res2.cpu().data==res.cpu().data

array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]])

In [24]:
res.cpu().data

array([[4.1922873e-01, 5.6649712e-03, 1.4283931e-01, 4.0425811e-02,
        9.5227629e-02, 2.9661348e-01],
       [1.4229345e-03, 9.5304364e-01, 1.4277362e-02, 1.9623214e-02,
        2.5746459e-03, 9.0581281e-03],
       [1.6420444e-04, 5.8350306e-02, 3.1487264e-02, 2.6555007e-02,
        8.0411977e-01, 7.9323441e-02],
       [2.6168948e-01, 1.0091750e-02, 9.3293865e-04, 4.4397044e-04,
        1.6311060e-01, 5.6373131e-01]], dtype=float32)

In [25]:
res2.cpu().data

array([[4.1922873e-01, 5.6649712e-03, 1.4283931e-01, 4.0425811e-02,
        9.5227629e-02, 2.9661348e-01],
       [1.4229345e-03, 9.5304364e-01, 1.4277362e-02, 1.9623214e-02,
        2.5746459e-03, 9.0581281e-03],
       [1.6420444e-04, 5.8350306e-02, 3.1487264e-02, 2.6555007e-02,
        8.0411977e-01, 7.9323441e-02],
       [2.6168948e-01, 1.0091750e-02, 9.3293865e-04, 4.4397044e-04,
        1.6311060e-01, 5.6373131e-01]], dtype=float32)

In [26]:
res.cpu().data - res2.cpu().data

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]], dtype=float32)

#### loss

In [27]:
loss.cpu().data==loss2.cpu().data

array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]])

In [28]:
loss2.cpu().data

array([[0.8850471 , 0.28310424, 0.10733978, 0.80779535, 0.06777093,
        0.91201013],
       [0.50109786, 2.536866  , 1.0087562 , 0.6726406 , 0.53447044,
        0.01703519],
       [0.92462   , 0.01174197, 0.1105326 , 1.6802164 , 0.5638724 ,
        0.05421719],
       [0.6571717 , 1.2742958 , 0.5603441 , 0.23928154, 0.3167885 ,
        0.50197834]], dtype=float32)

In [29]:
loss.cpu().data - loss2.cpu().data

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]], dtype=float32)

#### grad

In [30]:
res.grad.cpu().data==res2.grad.cpu().data

array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]])

In [31]:
res2.grad.cpu().data

array([[ 0.9999999 ,  1.        ,  1.        , -1.        , -1.        ,
        -1.0000001 ],
       [-1.        ,  1.        , -1.        ,  1.        , -0.9999999 ,
         0.99999994],
       [-1.        ,  1.        , -1.        ,  0.99999994, -1.        ,
         1.        ],
       [ 1.        , -0.99999994, -1.        , -1.0000001 , -1.        ,
         1.        ]], dtype=float32)

In [32]:
res.grad.cpu().data-res2.grad.cpu().data

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]], dtype=float32)

#### weight

In [33]:
def to_dense(data, cols, nnzs, ellw, shape):
    out = np.zeros(shape)
    for row in range(shape[0]):
        for icol in range(nnzs[row]):
            #print('idx:',row,cols[row*ellw+icol])
            out[row,cols[row*ellw+icol]] = data[row*ellw+icol]
    return out

In [34]:
model.W.grad, model2.W.grad

(<DenseTensor <GPUBuffer with shape (8, 6)> with grad None>,
 <SparseTensor <GPUBuffer with shape (48,)> with grad None>)

In [35]:
model.W.grad.cpu().data

array([[ 4.81720753e-02,  1.33489192e-01, -9.86666977e-03,
         6.78764358e-02, -5.45409799e-01,  3.05738747e-01],
       [-1.35541394e-01, -1.05922028e-01, -7.72921881e-03,
        -2.68947240e-02,  3.68136227e-01, -9.20488462e-02],
       [-3.87197256e-01, -1.68300074e-04, -1.54551625e-01,
         4.54462096e-02,  9.74631906e-02,  3.99007767e-01],
       [ 2.04064623e-02,  1.81639612e-01, -6.42970502e-02,
         5.91954142e-02, -4.38550293e-01,  2.41605848e-01],
       [-1.83521017e-01,  8.26444179e-02, -6.12569824e-02,
         4.79193628e-02, -4.78363317e-03,  1.18997864e-01],
       [-3.67487848e-01,  6.02543131e-02, -7.63574019e-02,
         2.55442485e-02,  6.03258967e-01, -2.45212257e-01],
       [-5.62625945e-01,  1.46991342e-01, -2.47841641e-01,
         1.11771137e-01, -6.54106438e-02,  6.17115796e-01],
       [ 4.51983288e-02,  8.81112833e-03, -8.04098323e-02,
         3.79315540e-02, -7.10706949e-01,  6.99175715e-01]], dtype=float32)

In [36]:
model2.W.grad.to_numpy()

array([[ 4.81720753e-02,  1.33489192e-01, -9.86666977e-03,
         6.78764358e-02, -5.45409799e-01,  3.05738747e-01],
       [-1.35541394e-01, -1.05922028e-01, -7.72921881e-03,
        -2.68947240e-02,  3.68136227e-01, -9.20488462e-02],
       [-3.87197256e-01, -1.68300074e-04, -1.54551625e-01,
         4.54462096e-02,  9.74631906e-02,  3.99007767e-01],
       [ 2.04064623e-02,  1.81639612e-01, -6.42970502e-02,
         5.91954142e-02, -4.38550293e-01,  2.41605848e-01],
       [-1.83521017e-01,  8.26444179e-02, -6.12569824e-02,
         4.79193628e-02, -4.78363317e-03,  1.18997864e-01],
       [-3.67487848e-01,  6.02543131e-02, -7.63574019e-02,
         2.55442485e-02,  6.03258967e-01, -2.45212257e-01],
       [-5.62625945e-01,  1.46991342e-01, -2.47841641e-01,
         1.11771137e-01, -6.54106438e-02,  6.17115796e-01],
       [ 4.51983288e-02,  8.81112833e-03, -8.04098323e-02,
         3.79315540e-02, -7.10706949e-01,  6.99175715e-01]])

In [37]:
model.W.grad.cpu().data == model2.W.grad.to_numpy()

array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]])

In [38]:
model.W.cpu().data

array([[-2.6839938 , -0.98301244, -0.2165843 ,  0.23769966,  1.3864344 ,
         0.88892674],
       [ 1.3511105 ,  1.4390723 , -0.65807307, -0.04305689, -0.66785085,
         0.4958162 ],
       [-0.22616075, -0.92054766,  1.968188  , -0.16849016, -0.69414824,
         0.69527215],
       [-1.2149247 , -0.58453983, -0.6744994 , -2.0015817 , -2.4456437 ,
        -0.18265593],
       [-0.5925767 , -0.7006933 ,  0.79298484, -0.9202368 ,  1.7368386 ,
        -1.7675637 ],
       [-2.679496  , -0.2479639 ,  0.4228978 ,  1.6492896 , -0.6602503 ,
        -0.31670985],
       [ 0.04279577,  2.3533084 , -1.7218812 ,  0.29192096,  0.03420548,
        -1.2047992 ],
       [-0.37021783,  0.10200752, -0.82640815, -0.01625864,  0.87161887,
         0.5240686 ]], dtype=float32)

In [39]:
model2.W.to_numpy()

array([[-2.68399382, -0.98301244, -0.2165843 ,  0.23769966,  1.38643444,
         0.88892674],
       [ 1.35111046,  1.43907225, -0.65807307, -0.04305689, -0.66785085,
         0.4958162 ],
       [-0.22616075, -0.92054766,  1.96818805, -0.16849016, -0.69414824,
         0.69527215],
       [-1.21492469, -0.58453983, -0.67449939, -2.00158167, -2.44564366,
        -0.18265593],
       [-0.59257668, -0.70069331,  0.79298484, -0.92023683,  1.73683858,
        -1.7675637 ],
       [-2.67949605, -0.24796391,  0.42289779,  1.64928961, -0.66025031,
        -0.31670985],
       [ 0.04279577,  2.35330844, -1.72188115,  0.29192096,  0.03420548,
        -1.20479918],
       [-0.37021783,  0.10200752, -0.82640815, -0.01625864,  0.87161887,
         0.52406859]])

In [45]:
model.W.cpu().data == model2.W.to_numpy()

array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]])

In [44]:
model.W.cpu().data == model2.W.to_numpy(dual=True).T

array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True]])

In [41]:
model.W.cpu().data - model2.W.to_numpy()

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [42]:
cols

NameError: name 'cols' is not defined

In [None]:
nnzs

In [None]:
denserec = model.W.cpu().data 
denserec

In [None]:
sparserec = model2.W.to_numpy()
sparserec

In [None]:
denserec - sparserec

In [None]:
sparserec2 = model2.W.to_numpy(dual=True)
sparserec2.T

In [None]:
denserec - sparserec2.T

In [None]:
seedmat - denserec