In [1]:
from tinygrad.densetensor import DenseTensor, cl_queue
from tinygrad.sparsetensor import SparseTensor
import numpy as np
import tinygrad.optim as optim
from extra.utils import get_parameters


%load_ext autoreload
%autoreload 2

DEVICE:GPU


In [2]:
dim1 = 3
dim2 = 8
dim3 = 4

np.random.seed(9)

In [3]:
x_init = np.random.randn(dim1,dim2).astype(np.float32)
w_init = np.random.randn(dim2,dim3).astype(np.float32)
x = DenseTensor(x_init)
y = DenseTensor(np.random.randn(dim1,dim3))



W_TEST = SparseTensor.uniform(dim2,dim3, randsparsity=0.6).to_numpy()
W_TEST

In [4]:
W_TEST = SparseTensor(w_init).to_numpy()
W_TEST

array([[ 0.5163675 , -0.35523945,  0.208777  ,  0.32841107],
       [-0.49822477, -2.09177685, -0.08258774,  2.45518255],
       [-2.67211008, -0.91327929, -0.22731435,  0.26931539],
       [ 1.13046122,  1.0423975 ,  1.30381048,  1.38940072],
       [-0.6564526 , -0.0562573 , -0.49990261,  0.43641937],
       [-0.37581304, -0.92306161,  1.91725028, -0.15030284],
       [-0.63872975,  0.82477033, -1.21083879, -0.50340539],
       [-0.70191568, -1.97427106, -2.65573215, -0.05768225]])

In [5]:
class MLP:
  def __init__(self, init_dense):
    #self.W = DenseTensor(init_dense)
    self.W = DenseTensor(W_TEST)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [6]:
class MLP2:
  def __init__(self):
    self.W = SparseTensor(W_TEST)
    #self.W = SparseTensor.uniform(dim2,dim3)
    #self.W = SparseTensor.uniform(dim2,dim3)

  def parameters(self):
    return get_parameters(self)

  def forward(self, x):
    out = x.dot(self.W)
    out = out.softmax()
    #out = out.mul(self.m).add(self.m).sum()
    return out

In [7]:
def loss_fn(y, y_pred):
    return ((y-y_pred)**2)**.5

In [8]:
model2 = MLP2()

In [9]:
model2.W.get_nnzs()

array([4, 4, 4, 4, 4, 4, 4, 4], dtype=uint32)

In [10]:
dense_init = model2.W.to_numpy()
dense_init

array([[ 0.5163675 , -0.35523945,  0.208777  ,  0.32841107],
       [-0.49822477, -2.09177685, -0.08258774,  2.45518255],
       [-2.67211008, -0.91327929, -0.22731435,  0.26931539],
       [ 1.13046122,  1.0423975 ,  1.30381048,  1.38940072],
       [-0.6564526 , -0.0562573 , -0.49990261,  0.43641937],
       [-0.37581304, -0.92306161,  1.91725028, -0.15030284],
       [-0.63872975,  0.82477033, -1.21083879, -0.50340539],
       [-0.70191568, -1.97427106, -2.65573215, -0.05768225]])

In [11]:
model = MLP(dense_init)

### Dense

In [12]:
iters = 1
optimizer = optim.SGD(model.parameters(), lr=1)

In [13]:
for i in range(iters):
    res = model.forward(x)
    optimizer.zero_grad()
    loss = loss_fn(res, y)
    loss.backward()
    optimizer.step()

In [14]:
model.parameters()

[<DenseTensor <GPUBuffer with shape (8, 4)> with grad <GPUBuffer with shape (8, 4)>>,
 <DenseTensor <GPUBuffer with shape (8, 4)> with grad None>]

In [15]:
res.cpu().data

array([[0.8585892 , 0.04020737, 0.09639589, 0.00480766],
       [0.01408709, 0.865447  , 0.05977303, 0.06069294],
       [0.25320128, 0.6764083 , 0.03263042, 0.03775999]], dtype=float32)

In [16]:
loss.cpu().data

array([[1.5147759 , 0.701914  , 0.67295253, 0.90381247],
       [1.6795509 , 2.5627823 , 2.8531432 , 0.2868436 ],
       [0.14422758, 0.98329526, 0.52637726, 0.41385764]], dtype=float32)

In [17]:
res.grad.cpu().data

array([[ 0.99999994,  1.        , -0.99999994,  1.        ],
       [-0.99999994,  1.        ,  0.99999994,  1.        ],
       [-1.        , -1.        ,  0.99999994,  1.0000001 ]],
      dtype=float32)

In [18]:
model.W.grad.cpu().data

array([[-0.04812295, -0.152767  ,  0.0929945 ,  0.10789544],
       [ 0.00217728,  0.06772799, -0.00529459, -0.06461067],
       [-0.19751662,  0.01991153,  0.18717895, -0.00957384],
       [-0.09672152, -0.08123256,  0.08389507,  0.094059  ],
       [-0.09494139, -0.06000179,  0.10737976,  0.04756344],
       [-0.10043351,  0.01047844,  0.08694763,  0.00300745],
       [-0.3345231 , -0.05474639,  0.3231013 ,  0.0661682 ],
       [-0.10678267, -0.02953567,  0.1093234 ,  0.02699495]],
      dtype=float32)

### Second

In [19]:
model2 = MLP2()

In [20]:
optimizer2 = optim.SGD(model2.parameters(), lr=1)

In [21]:
for i in range(iters):
    res2 = model2.forward(x)
    optimizer2.zero_grad()
    loss2 = loss_fn(res2, y)
    loss2.backward()
    optimizer2.step()

aval, xval: 0.52,0.17 - 0.09: (0,0) 
aval, xval: -0.36,0.01 - 0.08: (1,1) 
aval, xval: 0.21,-0.17 - 0.05: (2,2) 
aval, xval: 0.33,0.00 - 0.05: (3,3) 


#### fwd

In [22]:
res2.cpu().data==res.cpu().data

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [23]:
res2.cpu().data

array([[0.8585892 , 0.04020737, 0.09639589, 0.00480766],
       [0.01408709, 0.865447  , 0.05977303, 0.06069294],
       [0.25320128, 0.6764083 , 0.03263042, 0.03775999]], dtype=float32)

In [24]:
res.cpu().data - res2.cpu().data

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]], dtype=float32)

#### loss

In [25]:
loss.cpu().data==loss2.cpu().data

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [26]:
loss2.cpu().data

array([[1.5147759 , 0.701914  , 0.67295253, 0.90381247],
       [1.6795509 , 2.5627823 , 2.8531432 , 0.2868436 ],
       [0.14422758, 0.98329526, 0.52637726, 0.41385764]], dtype=float32)

In [27]:
loss.cpu().data - loss2.cpu().data

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]], dtype=float32)

#### grad

In [28]:
res.grad.cpu().data==res2.grad.cpu().data

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [29]:
res2.grad.cpu().data

array([[ 0.99999994,  1.        , -0.99999994,  1.        ],
       [-0.99999994,  1.        ,  0.99999994,  1.        ],
       [-1.        , -1.        ,  0.99999994,  1.0000001 ]],
      dtype=float32)

In [30]:
res.grad.cpu().data-res2.grad.cpu().data

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]], dtype=float32)

#### weight

In [31]:
def to_dense(data, cols, nnzs, ellw, shape):
    out = np.zeros(shape)
    for row in range(shape[0]):
        for icol in range(nnzs[row]):
            #print('idx:',row,cols[row*ellw+icol])
            out[row,cols[row*ellw+icol]] = data[row*ellw+icol]
    return out

In [32]:
model.W.grad, model2.W.grad

(<DenseTensor <GPUBuffer with shape (8, 4)> with grad None>,
 <SparseTensor <GPUBuffer with shape (64,)> with grad None>)

In [33]:
model.W.grad.cpu().data

array([[-0.04812295, -0.152767  ,  0.0929945 ,  0.10789544],
       [ 0.00217728,  0.06772799, -0.00529459, -0.06461067],
       [-0.19751662,  0.01991153,  0.18717895, -0.00957384],
       [-0.09672152, -0.08123256,  0.08389507,  0.094059  ],
       [-0.09494139, -0.06000179,  0.10737976,  0.04756344],
       [-0.10043351,  0.01047844,  0.08694763,  0.00300745],
       [-0.3345231 , -0.05474639,  0.3231013 ,  0.0661682 ],
       [-0.10678267, -0.02953567,  0.1093234 ,  0.02699495]],
      dtype=float32)

In [34]:
model2.W.grad.data.cl

<pyopencl._cl.Buffer at 0x7f9e9608ea90>

In [35]:
import pyopencl as cl

dim = np.prod(model2.W.grad.shape)
data = np.zeros(dim).astype(np.float32)
cl.enqueue_copy(cl_queue, data, model2.W.grad.datat.cl)
cols = np.zeros(dim).astype(np.uint32)
cl.enqueue_copy(cl_queue, cols, model2.W.grad.idxst.cl)
nnzs = np.zeros(model.W.grad.shape[0]).astype(np.uint32)
cl.enqueue_copy(cl_queue, nnzs, model2.W.grad.nnzst.cl)

<pyopencl._cl.NannyEvent at 0x7f9e9609f6d0>

In [36]:
to_dense(data, cols, nnzs, model2.W.grad.ellwt, model2.W.grad.shape)

IndexError: index 32 is out of bounds for axis 0 with size 32

In [None]:
data

In [None]:
cols

In [None]:
nnzs

In [None]:
denserec = model.W.cpu().data 
denserec

In [None]:
sparserec = model2.W.to_numpy()
sparserec

In [None]:
denserec - sparserec

In [None]:
sparserec2 = model2.W.to_numpy(dual=True)
sparserec2.T

In [None]:
denserec - sparserec2.T

In [None]:
seedmat - denserec