<a href="https://colab.research.google.com/github/dhanush852/intro_to_deeplearning/blob/main/HOMEWORK5_1_C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ipython-autotime
%load_ext autotime

Collecting ipython-autotime
  Downloading ipython_autotime-0.3.2-py2.py3-none-any.whl (7.0 kB)
Collecting jedi>=0.16 (from ipython->ipython-autotime)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, ipython-autotime
Successfully installed ipython-autotime-0.3.2 jedi-0.19.1
time: 434 µs (started: 2024-04-28 00:45:02 +00:00)


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from torchsummary import summary

time: 9.75 s (started: 2024-04-28 00:45:02 +00:00)


In [3]:
import requests
import torch
from torch.utils.data import Dataset, DataLoader, random_split

url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text


sequence_length = 50

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]


sequences = [encoded_text[i:i + sequence_length] for i in range(len(encoded_text) - sequence_length)]
targets = [encoded_text[i + sequence_length] for i in range(len(encoded_text) - sequence_length)]

sequences_tensor = torch.tensor(sequences, dtype=torch.long)
targets_tensor = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences_tensor, targets_tensor)

batch_size = 128
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


time: 15.3 s (started: 2024-04-28 00:45:12 +00:00)


In [4]:
import torch.nn as nn

class CharTransformer(nn.Module):
    def __init__(self, vocab_size, emb_dim, num_classes, layers_count, heads_count):
        super(CharTransformer, self).__init__()

        self.embedding = nn.Embedding(vocab_size, emb_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=emb_dim, nhead=heads_count)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=layers_count)
        self.output_layer = nn.Linear(emb_dim, num_classes)

    def forward(self, inputs):
        embeddings = self.embedding(inputs)
        encoder_outputs = self.transformer_encoder(embeddings)
        final_output = self.output_layer(encoder_outputs[:, -1])
        return final_output

time: 1.03 ms (started: 2024-04-28 00:45:27 +00:00)


In [5]:
hidden_size = 512
num_layers = 2
num_heads = 2
learning_rate = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 10

time: 50.2 ms (started: 2024-04-28 00:45:31 +00:00)


In [None]:
device

In [7]:
import torch.optim as optim
import torch.nn as nn

# Instantiate the model with specific configurations and send to compute device
model = CharTransformer(vocab_size=len(chars), emb_dim=128, num_classes=len(chars),
                        layers_count=3, heads_count=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training and validation loop
for epoch in range(10+1):
    model.train()  # Set model to training mode
    total_train_loss = 0

    # Training phase
    for batch_inputs, batch_targets in train_loader:
        batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)
        optimizer.zero_grad()
        train_outputs = model(batch_inputs)
        train_loss = criterion(train_outputs, batch_targets)
        train_loss.backward()
        optimizer.step()
        total_train_loss += train_loss.item() * batch_inputs.size(0)

    average_train_loss = total_train_loss / len(train_loader.dataset)

    # Validation phase
    model.eval()  # Set model to evaluation mode
    total_val_loss = 0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for batch_inputs, batch_targets in test_loader:
            batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)
            val_outputs = model(batch_inputs)
            val_loss = criterion(val_outputs, batch_targets)
            total_val_loss += val_loss.item() * batch_inputs.size(0)
            _, predictions = torch.max(val_outputs, 1)
            total_predictions += batch_targets.size(0)
            correct_predictions += (predictions == batch_targets).sum().item()

    average_val_loss = total_val_loss / len(test_loader.dataset)
    validation_accuracy = correct_predictions / total_predictions

    # Output training and validation results
    if (epoch + 1) % 1 == 0:
        print(f'Epoch {epoch}, Train Loss: {average_train_loss:.4f}, '
              f'Validation Loss: {average_val_loss:.4f}, Validation Accuracy: {validation_accuracy:.4f}')




Epoch 0, Train Loss: 3.3236, Validation Loss: 3.3237, Validation Accuracy: 0.1527
Epoch 1, Train Loss: 3.3182, Validation Loss: 3.3190, Validation Accuracy: 0.1527
Epoch 2, Train Loss: 3.3179, Validation Loss: 3.3170, Validation Accuracy: 0.1527
Epoch 3, Train Loss: 3.3178, Validation Loss: 3.3193, Validation Accuracy: 0.1527
Epoch 4, Train Loss: 3.3179, Validation Loss: 3.3269, Validation Accuracy: 0.1527
Epoch 5, Train Loss: 3.3179, Validation Loss: 3.3263, Validation Accuracy: 0.1527
Epoch 6, Train Loss: 3.3179, Validation Loss: 3.3197, Validation Accuracy: 0.1527
Epoch 7, Train Loss: 3.3178, Validation Loss: 3.3205, Validation Accuracy: 0.1527
Epoch 8, Train Loss: 3.3178, Validation Loss: 3.3205, Validation Accuracy: 0.1527
Epoch 9, Train Loss: 3.3180, Validation Loss: 3.3236, Validation Accuracy: 0.1527
Epoch 10, Train Loss: 3.3180, Validation Loss: 3.3189, Validation Accuracy: 0.1527
time: 42min 50s (started: 2024-04-28 00:46:01 +00:00)


In [8]:

!pip install torchinfo
import torchinfo


Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0
time: 5.83 s (started: 2024-04-28 01:28:56 +00:00)


In [9]:

dataiter = iter(train_loader)
inputs, labels = next(dataiter)  # Get one batch of data

model.to(inputs.device)
summary = torchinfo.summary(model, input_data=(inputs,))
print(summary)


Layer (type:depth-idx)                        Output Shape              Param #
CharTransformer                               [128, 65]                 --
├─Embedding: 1-1                              [128, 50, 128]            8,320
├─TransformerEncoder: 1-2                     [128, 50, 128]            --
│    └─ModuleList: 2-1                        --                        --
│    │    └─TransformerEncoderLayer: 3-1      [128, 50, 128]            593,024
│    │    └─TransformerEncoderLayer: 3-2      [128, 50, 128]            593,024
│    │    └─TransformerEncoderLayer: 3-3      [128, 50, 128]            593,024
├─Linear: 1-3                                 [128, 65]                 8,385
Total params: 1,795,777
Trainable params: 1,795,777
Non-trainable params: 0
Total mult-adds (M): 204.50
Input size (MB): 0.05
Forward/backward pass size (MB): 380.18
Params size (MB): 6.39
Estimated Total Size (MB): 386.62
time: 950 ms (started: 2024-04-28 01:29:04 +00:00)
