<a href="https://colab.research.google.com/github/jithamanyu001/CV/blob/main/WaveNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn,optim
import torch.nn.functional as F
import matplotlib.pyplot as plt # for making figures

In [8]:
!wget https://raw.githubusercontent.com/karpathy/makemore/master/names.txt

--2023-09-04 08:25:01--  https://raw.githubusercontent.com/karpathy/makemore/master/names.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 228145 (223K) [text/plain]
Saving to: ‘names.txt’


2023-09-04 08:25:01 (7.76 MB/s) - ‘names.txt’ saved [228145/228145]



In [11]:
all_names=open("/content/names.txt",'r').read().splitlines()


In [12]:
chars = sorted(list(set(''.join(all_names))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}
vocab_size = len(itos)
print(itos)
print(vocab_size)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}
27


In [13]:
# shuffle up the words
import random
random.seed(42)
random.shuffle(all_names)

In [29]:
# build the dataset
block_size = 16 # context length: how many characters do we take to predict the next one?

def build_dataset(words):
  X, Y = [], []

  for w in words:
    context = [0] * block_size
    for ch in w + '.':
      ix = stoi[ch]
      X.append(context)
      Y.append(ix)
      context = context[1:] + [ix] # crop and append

  X = torch.tensor(X)
  Y = torch.tensor(Y)
  print(X.shape, Y.shape)
  return X, Y
n=int(0.8*len(all_names))
X_train,Y_train=build_dataset(all_names[:n])
X_test,Y_test=build_dataset(all_names[n:])

torch.Size([182625, 16]) torch.Size([182625])
torch.Size([45521, 16]) torch.Size([45521])


In [51]:
def makeBatch(X,Y,batch_size=32):
  n=X.shape[0]
  idx=random.sample(list(range(n)),k=batch_size)
  return X[idx],Y[idx]

In [34]:
class WaveNet(nn.Module):
  def __init__(self,vocab_size,embedding_size,hidden_size,combine,block_size):
    super(WaveNet,self).__init__()
    self.embedding=nn.Embedding(vocab_size,embedding_size)
    self.combine=combine
    self.lin1=nn.Linear(embedding_size*combine,hidden_size)
    self.bn1=nn.BatchNorm1d(block_size//combine)
    self.lin2=nn.Linear(hidden_size*combine,hidden_size)
    self.bn2=nn.BatchNorm1d(block_size//combine**2)
    self.lin3=nn.Linear(hidden_size*combine,hidden_size)
    self.bn3=nn.BatchNorm1d(block_size//combine**3)
    self.lin4=nn.Linear(hidden_size*combine,hidden_size)
    self.bn4=nn.BatchNorm1d(block_size//combine**4)
    self.output=nn.Linear(hidden_size,vocab_size)
  def forward(self,x,y=None):
    embedding=self.embedding(x)
    B,L,emd=embedding.shape
    embedding=embedding.view([B,L//self.combine,emd*self.combine])
    embedding=F.tanh(self.lin1(embedding))
    embedding=self.bn1(embedding)
    B,L,emd=embedding.shape
    embedding=embedding.view([B,L//self.combine,emd*self.combine])
    embedding=F.tanh(self.lin2(embedding))
    embedding=self.bn2(embedding)
    B,L,emd=embedding.shape
    embedding=embedding.view([B,L//self.combine,emd*self.combine])
    embedding=F.tanh(self.lin3(embedding))
    embedding=self.bn3(embedding)
    B,L,emd=embedding.shape
    embedding=embedding.view([B,L//self.combine,emd*self.combine])
    embedding=F.tanh(self.lin4(embedding))
    embedding=self.bn4(embedding)
    embedding=embedding.view(B,-1)
    logits=self.output(embedding)
    if y==None:
      return logits,None
    else:
      loss=F.cross_entropy(logits,y)
      return logits,loss


In [61]:
embedding_size=50
hidden_size=100
combine=2

In [62]:
model=WaveNet(vocab_size,embedding_size,hidden_size,combine,block_size)
model

WaveNet(
  (embedding): Embedding(27, 50)
  (lin1): Linear(in_features=100, out_features=100, bias=True)
  (bn1): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lin2): Linear(in_features=200, out_features=100, bias=True)
  (bn2): BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lin3): Linear(in_features=200, out_features=100, bias=True)
  (bn3): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lin4): Linear(in_features=200, out_features=100, bias=True)
  (bn4): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output): Linear(in_features=100, out_features=27, bias=True)
)

In [63]:
optimizer=optim.AdamW(model.parameters(),0.001)
epochs=10000
batch_size=128

In [64]:
lossTrain=[]
lossTest=[]
for i in range(epochs):
  optimizer.zero_grad()
  x,y=makeBatch(X_train,Y_train,batch_size)
  logits,loss=model(x,y)
  loss.backward()
  optimizer.step()
  print(f"Train loss for epoch{i}: {loss.item()}")
  lossTrain.append(loss.item())
  x,y=makeBatch(X_test,Y_test,batch_size)
  logits,loss=model(x,y)
  print(f"Test loss for epoch{i}: {loss.item()}")
  lossTest.append(loss.item())

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Train loss for epoch7500: 2.1466970443725586
Test loss for epoch7500: 2.2026259899139404
Train loss for epoch7501: 1.738340973854065
Test loss for epoch7501: 2.039677619934082
Train loss for epoch7502: 1.80586576461792
Test loss for epoch7502: 2.089885711669922
Train loss for epoch7503: 2.093270778656006
Test loss for epoch7503: 2.0659708976745605
Train loss for epoch7504: 1.9384955167770386
Test loss for epoch7504: 2.071631908416748
Train loss for epoch7505: 1.867493987083435
Test loss for epoch7505: 2.1748344898223877
Train loss for epoch7506: 2.021557569503784
Test loss for epoch7506: 2.037782907485962
Train loss for epoch7507: 2.024129867553711
Test loss for epoch7507: 1.9927377700805664
Train loss for epoch7508: 1.7914018630981445
Test loss for epoch7508: 2.1429529190063477
Train loss for epoch7509: 1.749616026878357
Test loss for epoch7509: 2.259732961654663
Train loss for epoch7510: 1.8885424137115479
Test loss for

In [66]:
for _ in range(20):

    out = []
    context = [0] * block_size # initialize with all ...
    while True:
      # forward pass the neural net
      logits,_ = model(torch.tensor([context]))
      probs = F.softmax(logits, dim=1)
      # sample from the distribution
      ix = torch.multinomial(probs, num_samples=1).item()
      # shift the context window and track the samples
      context = context[1:] + [ix]
      out.append(ix)
      # if we sample the special '.' token, break
      if ix == 0:
        break

    print(''.join(itos[i] for i in out)) # decode and print the generated word

riston.
breon.
liddeyn.
janob.
hryn.
sedrigk.
ivan.
jaelko.
juressiel.
brsami.
ryeden.
aurielynna.
nosau.
maylana.
zohanobi.
emmik.
ramcarddias.
avyan.
rayder.
hazsa.
