<a href="https://colab.research.google.com/github/el-eshaano/DL-Lab/blob/main/Week-08/Names.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture cell
!wget https://download.pytorch.org/tutorial/data.zip
!unzip data.zip

In [60]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

from tqdm.notebook import tqdm

import glob

device = "cuda" if torch.cuda.is_available() else "cpu"

In [61]:
class NamesDataset(Dataset):

    def __init__(self, device='cpu'):
        self.inputs = []
        self.targets = []
        self.device = device

        ttoi = {} # target language index mapping
        len_targets = 0

        for filename in glob.glob('data/names/*'):

            target = filename.split('/')[-1].split('.')[0]

            ttoi[target] = len_targets
            len_targets += 1

            with open(filename, 'r') as f:
                names = f.read().lower()

            for name in names.split('\n'):

                name = torch.Tensor(list(map(ord, name)))

                self.inputs.append(name)
                self.targets.append(torch.tensor(ttoi[target]))

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.inputs[idx].to(self.device), self.targets[idx].to(self.device)

In [63]:
dataset = NamesDataset(device=device)
train_dataset, test_dataset = random_split(dataset, [0.8, 0.2])

In [64]:
def custom_collate_fn(batch):
    sequences = [item[0] for item in batch]
    targets = [item[1] for item in batch]
    lengths = [len(seq) for seq in sequences]

    padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=0)
    lengths_tensor = torch.tensor(lengths)
    targets_tensor = torch.stack(targets).to(device)

    return padded_sequences, targets_tensor, lengths_tensor

In [65]:
train_data = DataLoader(trainset, batch_size=256, shuffle=True, collate_fn=custom_collate_fn)
test_data = DataLoader(testset, batch_size=1, shuffle=False, collate_fn=custom_collate_fn)

In [87]:
class NameNet(nn.Module):
    def __init__(self):
        super(NameNet, self).__init__()

        self.rnn1 = nn.RNN(input_size=1, hidden_size=10, batch_first=True)
        self.rnn2 = nn.RNN(input_size=10, hidden_size=3, batch_first=True)

        self.classification = nn.Sequential(
            nn.Linear(3, 64),
            nn.ReLU(),
            nn.Linear(64, 18)
        )

    def forward(self, x, lengths):

        x = x.unsqueeze(-1)
        x = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)

        x, _ = self.rnn1(x)
        x, _ = self.rnn2(x)
        x, _ = pad_packed_sequence(x, batch_first=True)

        x = x[torch.arange(x.size(0)), lengths - 1]
        x = self.classification(x)

        return x

In [90]:
learning_rate = 3e-4
num_epochs = 100
# ---


model = NameNet().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in tqdm(range(num_epochs), desc="epochs"):
    for batch, labels, lengths in tqdm(train_data, desc="batches", leave=False):
        outs = model(batch, lengths)
        loss = criterion(outs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch % 10 == 0:
        print(f'after epoch {epoch}, loss is {loss.item()}')

epochs:   0%|          | 0/100 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 0, loss is 2.2893755435943604


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 10, loss is 1.9151639938354492


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 20, loss is 1.7865509986877441


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 30, loss is 1.6107375621795654


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 40, loss is 1.491317868232727


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 50, loss is 1.5545278787612915


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 60, loss is 1.6650992631912231


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 70, loss is 1.6600390672683716


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 80, loss is 1.637357473373413


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

after epoch 90, loss is 1.616753339767456


batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

batches:   0%|          | 0/59 [00:00<?, ?it/s]

In [91]:
model.eval()

total = 0
correct = 0

for input, targets, lengths in tqdm(train_data, desc="inputs"):
    preds = torch.argmax(model(input, lengths), dim=1)

    correct += (preds == targets).sum()
    total += len(targets)

acc = 100 * correct / total

print(f'Accuracy is {acc}')
model.train()

inputs:   0%|          | 0/59 [00:00<?, ?it/s]

Accuracy is 50.50129699707031


NameNet(
  (rnn1): RNN(1, 10, batch_first=True)
  (rnn2): RNN(10, 3, batch_first=True)
  (classification): Sequential(
    (0): Linear(in_features=3, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=18, bias=True)
  )
)