<a href="https://colab.research.google.com/github/Trickshotblaster/nn-practices/blob/main/AIShakespeare.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [97]:
import torch

In [98]:
import requests
url = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'
text = requests.get(url).text

In [99]:
vocab = sorted(set(text))
vocab_size = len(vocab) + 1
block_size = 5
stoi = {' ': 0}
itos = {0: ' '}
stoi.update({char:i+1 for i, char in enumerate(vocab)})
itos.update({i+1:char for i, char in enumerate(vocab)})
def build_dataset(text, block_size):
  xs = []
  ys = []
  block = [0] * block_size
  for char, next in zip(text, text[1:]):
    block = block[1:] + [stoi[char]]
    xs.append(block)
    ys.append(stoi[next])
  return xs, ys
def make_splits(text, block_size, split_amounts):
  train_text = text[0: int(len(text) * split_amounts[0])]
  dev_text = text[int(len(text) * split_amounts[0]): int(len(text) * split_amounts[0]) + int(len(text) * split_amounts[1])]
  test_text = text[int(len(text) * split_amounts[0]) + int(len(text) * split_amounts[1]):]
  Xtr, Ytr = build_dataset(train_text, block_size)
  Xdev, Ydev = build_dataset(dev_text, block_size)
  Xte, Yte = build_dataset(test_text, block_size)
  return torch.tensor(Xtr), torch.tensor(Ytr), torch.tensor(Xdev), torch.tensor(Ydev), torch.tensor(Xte), torch.tensor(Yte)

Xtr, Ytr, Xdev, Ydev, Xte, Yte = make_splits(text, block_size, [0.8, 0.1])
print(Xtr[0], Ytr[0])

tensor([ 0,  0,  0,  0, 19]) tensor(48)


In [100]:
emb_dim = 10
hidden_size = 200
C = torch.randn(vocab_size, emb_dim)
w1 = torch.randn(block_size * emb_dim, hidden_size) / ((block_size * emb_dim)**0.5)
b1 = torch.randn(hidden_size)
w2 = torch.randn(hidden_size, vocab_size) / (hidden_size ** 0.5)
b2 = torch.randn(vocab_size)

params = [C, w1, b1, w2, b2]
for p in params:
  p.requires_grad = True

In [101]:
import torch.nn.functional as F
ins = C[Xtr[20]].view(-1)
l1 = (ins @ w1 + b1).tanh()
out = F.softmax(l1 @ w2 + b2, dim=0)
print(out)

tensor([0.0005, 0.0069, 0.0038, 0.0391, 0.0113, 0.0126, 0.0020, 0.0181, 0.0015,
        0.0095, 0.0008, 0.0145, 0.0132, 0.0022, 0.0434, 0.0064, 0.0072, 0.0014,
        0.0635, 0.0716, 0.0090, 0.0123, 0.0096, 0.0038, 0.0157, 0.0124, 0.0158,
        0.0038, 0.0012, 0.0055, 0.0031, 0.0010, 0.0036, 0.0028, 0.1118, 0.0038,
        0.0059, 0.0019, 0.0059, 0.0200, 0.0106, 0.0110, 0.0115, 0.0157, 0.0089,
        0.0013, 0.0039, 0.0353, 0.0029, 0.0042, 0.0125, 0.0008, 0.0125, 0.0270,
        0.0093, 0.0070, 0.0698, 0.0026, 0.0064, 0.0362, 0.0012, 0.0433, 0.0516,
        0.0190, 0.0165, 0.0006], grad_fn=<SoftmaxBackward0>)


In [102]:
num_epochs = 1
batch_size = 16
train_len = len(Xtr) - 1
max_steps = (train_len // batch_size) * num_epochs
lr = 1

In [103]:
for step in range(max_steps):
  batch_indices = torch.randint(0, train_len, (batch_size,))
  x_batch = Xtr[batch_indices]
  y_batch = Ytr[batch_indices]
  emb = C[x_batch].view(batch_size, -1)
  l1 = (emb @ w1 + b1).tanh()

  out = F.softmax(l1 @ w2 + b2, dim=1)

  loss = -out[torch.arange(batch_size), y_batch].log().mean()


  for p in params:
    p.grad = None

  loss.backward()

  for p in params:
    p.data -= p.grad * lr

  if step % 10000 == 0:
    print("Step", str(step) + ", loss:", loss.item())
  elif step % 100 == 0:
    with torch.no_grad():
      batch_indices = torch.randint(0, train_len, (64,))
      x_batch = Xtr[batch_indices]
      y_batch = Ytr[batch_indices]
      emb = C[x_batch].view(64, -1)
      l1 = (emb @ w1 + b1).tanh()

      out = F.softmax(l1 @ w2 + b2, dim=1)

      loss = -out[torch.arange(64), y_batch].log().mean()

      val_batch_indices = torch.randint(0, len(Xdev) - 1, (64,))
      val_batch = Xdev[val_batch_indices]
      val_ys = Ydev[val_batch_indices]
      val_emb = C[val_batch].view(64, -1)
      val_l1 = (val_emb @ w1 + b1).tanh()
      val_out = (val_l1 @ w2 + b2).softmax(dim=1)
      val_loss = -val_out[torch.arange(64), val_ys].log().mean()
      if (val_loss - loss) > (loss * (1/((step+1)/10000))):
        print("Stopping with train loss", loss, "and val loss", val_loss)
        break
    lr *= 0.9

Step 0, loss: 4.631681442260742
Step 10000, loss: 2.2440900802612305
Step 20000, loss: 2.6413097381591797
Step 30000, loss: 2.8962361812591553
Stopping with train loss tensor(1.9298) and val loss tensor(2.5415)


In [106]:
def generate():
  out_str = ""
  context = [0] * block_size
  for x in range(500):
    c = torch.tensor(context)
    emb = C[c].view(-1)
    l1 = l1 = (emb @ w1 + b1).tanh()
    out = F.softmax(l1 @ w2 + b2, dim=0)
    index = torch.multinomial(out, num_samples=1)
    context = context[1:] + [index.item()]
    out_str += itos[index.item()]
  return out_str
print(generate())

,
K:
The corsurs shist nuls ghee by,
RMmRIOK:
IThe patr andserisd:
He Wheln d poke ney, and l wath rrer some!s-he illo watf tour anel areld of ing ame fouthen's sald alveng're chock of ime th maen'd preriogs!
Juar ofparnome,
Am, kof ther for tig to co wesseskir wiithery che, s istarnow thins gessren fhime bank
Thee his unpoun kiee outh;
I wh aldat ogitt out.

SANI:
Shal m:
Koe ofore, doderin do kerb.
SThe p fosal Marlo fot luvenveoff,
Mitaa, de'dy m athet :ott,
kid, bye kyout fe ye save thy, fie
