### TINYGRAD TESTS

In [10]:
from tinygrad import Device
print(Device.DEFAULT)

NV


In [1]:
from tinygrad import Tensor, nn

class Model:
  def __init__(self):
    self.l1 = nn.Conv2d(1, 32, kernel_size=(3,3))
    self.l2 = nn.Conv2d(32, 64, kernel_size=(3,3))
    self.l3 = nn.Linear(1600, 10)

  def __call__(self, x:Tensor) -> Tensor:
    x = self.l1(x).relu().max_pool2d((2,2))
    x = self.l2(x).relu().max_pool2d((2,2))
    return self.l3(x.flatten(1).dropout(0.5))

In [2]:
from tinygrad.nn.datasets import mnist
X_train, Y_train, X_test, Y_test = mnist()
print(X_train.shape, X_train.dtype, Y_train.shape, Y_train.dtype)
# (60000, 1, 28, 28) dtypes.uchar (60000,) dtypes.uchar

(60000, 1, 28, 28) dtypes.uchar (60000,) dtypes.uchar


In [3]:
model = Model()
acc = (model(X_test).argmax(axis=1) == Y_test).mean()
# NOTE: tinygrad is lazy, and hasn't actually run anything by this point
print(acc.item())  # ~10% accuracy, as expected from a random model

0.10339999943971634


In [4]:
optim = nn.optim.Adam(nn.state.get_parameters(model))
batch_size = 128
def step():
  Tensor.training = True  # makes dropout work
  samples = Tensor.randint(batch_size, high=X_train.shape[0])
  X, Y = X_train[samples], Y_train[samples]
  optim.zero_grad()
  loss = model(X).sparse_categorical_crossentropy(Y).backward()
  optim.step()
  return loss

In [5]:
import timeit
timeit.repeat(step, repeat=5, number=1)
#[0.08268719699981375,
# 0.07478952900009972,
# 0.07714716600003158,
# 0.07785399599970333,
# 0.07605237000007037]

[5.085770697999578,
 0.1501849830001447,
 0.07820817599986185,
 0.07873111899971263,
 0.07625710999946023]

In [6]:
from tinygrad import GlobalCounters, Context
GlobalCounters.reset()
with Context(DEBUG=2): step()

scheduled 63 kernels
memory reduced from 4.21 MB -> 4.05 MB, 4 -> 1 bufs
*** NV         1 E_[90mn11[0m                                     arg  1 mem  0.06 GB tm      1.31us/     0.00ms (     0.00 GFLOPS    0.0|0.0     GB/s) ['__imul__']
*** NV         2 E_[90mn12[0m                                     arg  1 mem  0.06 GB tm      1.44us/     0.00ms (     0.00 GFLOPS    0.0|0.0     GB/s) ['__imul__']
*** NV         3 E_[90mn6[0m                                      arg  1 mem  0.06 GB tm      1.02us/     0.00ms (     0.00 GFLOPS    0.0|0.0     GB/s) ['randint']
*** NV         4 r_[34m625[0m[90m_[0m[36m32[0m[90m_[0m[31m15000[0m[90m_[0m[33m3[0m[90m_[0m[35m4[0m[90m[0m                        arg  1 mem  0.06 GB tm      1.54us/     0.01ms (   299.48 GFLOPS  156.2|156.2   GB/s) ['__getitem__']
*** NV         5 r_[34m5[0m[90m_[0m[36m2[0m[90m_[0m[35m10[0m[90mn1[0m                                arg  1 mem  0.06 GB tm      0.96us/     0.01ms (     0.38 GFLOP

In [7]:
from tinygrad import TinyJit
jit_step = TinyJit(step)

In [8]:
import timeit
timeit.repeat(jit_step, repeat=5, number=1)
# [0.2596786549997887,
#  0.08989566299987928,
#  0.0012115650001760514,
#  0.001010227999813651,
#  0.0012164899999334011]

[0.14179589099967416,
 0.0825958109999192,
 0.004253581999364542,
 0.004792515000190178,
 0.004411459000039031]

In [9]:
for step in range(700):
  loss = jit_step()
  if step%100 == 0:
    Tensor.training = False
    acc = (model(X_test).argmax(axis=1) == Y_test).mean().item()
    print(f"step {step:4d}, loss {loss.item():.2f}, acc {acc*100.:.2f}%")

step    0, loss 4.30, acc 69.85%
step  100, loss 0.41, acc 94.59%
step  200, loss 0.29, acc 96.10%
step  300, loss 0.12, acc 96.74%
step  400, loss 0.10, acc 97.19%
step  500, loss 0.34, acc 97.53%
step  600, loss 0.09, acc 97.50%


### tinygrad conversation.py debugging ###

In [None]:
from phonemizer.backend import EspeakBackend
from phonemizer.punctuation import Punctuation
from phonemizer.separator import Separator

text = "Hello, World! Preprocess deez nuts!"
text = "No, I'm just text-based, I don't have the ability to hear or see you."
text = "Hello, I'm a chat bot"

# remove all the punctuation from the text, considering only the specified
# punctuation marks
#text = Punctuation(';:,.!"?()').remove(text)

# build the set of all the words in the text
#words = {w.lower() for line in text for w in line.strip().split(' ') if w}
words = [word.lower() for word in text.strip().split(' ')]

# initialize the espeak backend for English
backend = EspeakBackend('en-us')

# separate phones by a space and ignoring words boundaries
separator = Separator(phone=' ', word=None)

# build the lexicon by phonemizing each word one by one. The backend.phonemize
# function expect a list as input and outputs a list.
lexicon = {
    word: backend.phonemize([word], separator=separator, strip=True)[0]
    for word in words}

print(lexicon)

{'hello,': 'h ə l oʊ', "i'm": 'aɪ m', 'a': 'eɪ', 'chat': 'tʃ æ t', 'bot': 'b ɑː t'}


In [None]:
import eng_to_ipa as ipa
ipa.convert(words)

'hɛˈloʊ, əm ə ʧæt bot*'

In [1]:
from tinygrad import Tensor, nn, TinyJit
from tinygrad.nn.datasets import mnist

class Model:
  def __init__(self):
    self.l1 = nn.Conv2d(1, 32, kernel_size=(3,3))
    self.l2 = nn.Conv2d(32, 64, kernel_size=(3,3))
    self.l3 = nn.Linear(1600, 10)

  def __call__(self, x:Tensor) -> Tensor:
    x = self.l1(x).relu().max_pool2d((2,2))
    x = self.l2(x).relu().max_pool2d((2,2))
    return self.l3(x.flatten(1).dropout(0.5))
model = Model()

X_train, Y_train, X_test, Y_test = mnist()
print(X_train.shape, X_train.dtype, Y_train.shape, Y_train.dtype)
# (60000, 1, 28, 28) dtypes.uchar (60000,) dtypes.uchar

optim = nn.optim.Adam(nn.state.get_parameters(model))
batch_size = 128
def step():
  Tensor.training = True  # makes dropout work
  samples = Tensor.randint(batch_size, high=X_train.shape[0])
  X, Y = X_train[samples], Y_train[samples]
  optim.zero_grad()
  loss = model(X).sparse_categorical_crossentropy(Y).backward()
  optim.step()
  return loss

jit_step = TinyJit(step)

for step in range(600):
  loss = jit_step()
  if step%100 == 0:
    Tensor.training = False
    acc = (model(X_test).argmax(axis=1) == Y_test).mean().item()
    print(f"step {step:4d}, loss {loss.item():.2f}, acc {acc*100.:.2f}%")

(60000, 1, 28, 28) dtypes.uchar (60000,) dtypes.uchar
step    0, loss 42.38, acc 15.97%
step  100, loss 0.40, acc 94.26%
step  200, loss 0.27, acc 96.04%
step  300, loss 0.19, acc 96.66%
step  400, loss 0.31, acc 96.97%
step  500, loss 0.18, acc 97.41%


In [12]:
from tinygrad import Tensor
import numpy as np
test = Tensor(np.arange(10))
print(test.numpy())
print(test[5:].numpy())

t = Tensor([1, 2, 3, 4])
print(t.unsqueeze(0).numpy())

t0, t1, t2 = Tensor([[1, 2]]), Tensor([[3]]), Tensor([[5, 6]])
print(t0.cat(t1, t2, dim=1).numpy())

print(np.tile(np.arange(10), (3, 1)).reshape(1, 3, 10))

[0 1 2 3 4 5 6 7 8 9]
[5 6 7 8 9]
[[1 2 3 4]]
[[1 2 3 5 6]]
[[[0 1 2 3 4 5 6 7 8 9]
  [0 1 2 3 4 5 6 7 8 9]
  [0 1 2 3 4 5 6 7 8 9]]]


In [4]:
import random
import numpy as np
def make_dataset():
  ds = []
  for i in range(100):
    for j in range(100):
      s = i+j
      ds.append([i//10, i%10, j//10, j%10, s//100, (s//10)%10, s%10])
  random.shuffle(ds)
  ds = np.array(ds).astype(np.float32)
  ds_X = ds[:, 0:6]
  ds_Y = np.copy(ds[:, 1:])
  ds_X_train, ds_X_test = ds_X[0:8000], ds_X[8000:]
  ds_Y_train, ds_Y_test = ds_Y[0:8000], ds_Y[8000:]
  return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test

X_train, Y_train, X_test, Y_test = make_dataset()
print(X_train.shape)
print(X_train[:10])

(8000, 6)
[[9. 5. 8. 8. 1. 8.]
 [0. 2. 5. 1. 0. 5.]
 [4. 3. 2. 3. 0. 6.]
 [8. 7. 6. 2. 1. 4.]
 [5. 9. 0. 6. 0. 6.]
 [5. 5. 1. 8. 0. 7.]
 [8. 1. 8. 8. 1. 6.]
 [6. 6. 9. 3. 1. 5.]
 [3. 4. 0. 7. 0. 4.]
 [1. 2. 7. 7. 0. 8.]]


In [4]:
import random
import numpy as np
with open('littleshakespeare/input.txt') as file:
    text = file.read()    # data loading
vocab = sorted(list(set(text)))    # finding and sorting all unique charaters in the data
ctoi = {c:i for i,c in enumerate(vocab)}    # map characters to integers
itoc = {i:c for i,c in enumerate(vocab)}    # map integers to characters
encode = lambda clist:         [ctoi[c] for c in clist]     # converts string to list of integers
decode = lambda ilist: ''.join([itoc[i] for i in ilist])    # converts list of integers to string

n = int(0.9*len(text))    # first 90% of data is the training set, rest is test set
train_data, val_data = encode(text[:n]), encode(text[n:])    # creates training set and test set

batch_size = 8000    # number of training examples per forward pass
block_size = 6    # max context length for predictions
def get_batch(data):
    xi = [random.randint(0, len(data)-block_size-1) for i in range(batch_size)]
    xs = [data[i:i+block_size] for i in xi]
    ys = [data[i+1:i+1+block_size] for i in xi]
    return np.array(xs), np.array(ys) # (batch_size, block_size)

X_train, Y_train = get_batch(train_data)
print(X_train.shape)
print(len(text))
print(text.count('\n'))

(8000, 6)
1115394
40000


In [14]:
from tinygrad import Tensor
test = [1]*10
print(test)
test.append(2)
print(test)
print(Tensor([test[9:]]).shape)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2]
(1, 2)
