In [1]:
!pip install ipython-autotime
%load_ext autotime

Collecting ipython-autotime
  Downloading ipython_autotime-0.3.2-py2.py3-none-any.whl (7.0 kB)
Collecting jedi>=0.16 (from ipython->ipython-autotime)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, ipython-autotime
Successfully installed ipython-autotime-0.3.2 jedi-0.19.1
time: 418 µs (started: 2024-04-27 23:12:02 +00:00)


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from torchsummary import summary

time: 6.45 s (started: 2024-04-27 23:12:15 +00:00)


In [6]:
import requests
import torch
from torch.utils.data import Dataset, DataLoader, random_split

url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text


sequence_length = 50

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]


sequences = [encoded_text[i:i + sequence_length] for i in range(len(encoded_text) - sequence_length)]
targets = [encoded_text[i + sequence_length] for i in range(len(encoded_text) - sequence_length)]

sequences_tensor = torch.tensor(sequences, dtype=torch.long)
targets_tensor = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences_tensor, targets_tensor)

batch_size = 128
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


time: 9.04 s (started: 2024-04-27 23:14:06 +00:00)


In [7]:
import torch.nn as nn

class CharTransformer(nn.Module):
    def __init__(self, vocab_size, emb_dim, num_classes, layers_count, heads_count):
        super(CharTransformer, self).__init__()

        self.embedding = nn.Embedding(vocab_size, emb_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=emb_dim, nhead=heads_count)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=layers_count)
        self.output_layer = nn.Linear(emb_dim, num_classes)

    def forward(self, inputs):
        embeddings = self.embedding(inputs)
        encoder_outputs = self.transformer_encoder(embeddings)
        final_output = self.output_layer(encoder_outputs[:, -1])
        return final_output

time: 906 µs (started: 2024-04-27 23:14:19 +00:00)


In [8]:
hidden_size = 512
num_layers = 2
num_heads = 2
learning_rate = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 15

time: 1.95 ms (started: 2024-04-27 23:14:31 +00:00)


In [None]:
import torch.optim as optim
import torch.nn as nn

# Instantiate the model with specific configurations and send to compute device
model = CharTransformer(vocab_size=len(chars), emb_dim=128, num_classes=len(chars),
                        layers_count=3, heads_count=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training and validation loop
for epoch in range(10):
    model.train()  # Set model to training mode
    total_train_loss = 0

    # Training phase
    for batch_inputs, batch_targets in train_loader:
        batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)
        optimizer.zero_grad()
        train_outputs = model(batch_inputs)
        train_loss = criterion(train_outputs, batch_targets)
        train_loss.backward()
        optimizer.step()
        total_train_loss += train_loss.item() * batch_inputs.size(0)

    average_train_loss = total_train_loss / len(train_loader.dataset)

    # Validation phase
    model.eval()  # Set model to evaluation mode
    total_val_loss = 0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for batch_inputs, batch_targets in test_loader:
            batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)
            val_outputs = model(batch_inputs)
            val_loss = criterion(val_outputs, batch_targets)
            total_val_loss += val_loss.item() * batch_inputs.size(0)
            _, predictions = torch.max(val_outputs, 1)
            total_predictions += batch_targets.size(0)
            correct_predictions += (predictions == batch_targets).sum().item()

    average_val_loss = total_val_loss / len(test_loader.dataset)
    validation_accuracy = correct_predictions / total_predictions

    # Output training and validation results
    if (epoch + 1) % 1 == 0:
        print(f'Epoch {epoch + 1}, Train Loss: {average_train_loss:.4f}, '
              f'Validation Loss: {average_val_loss:.4f}, Validation Accuracy: {validation_accuracy:.4f}')




In [None]:

!pip install torchinfo
import torchinfo


In [None]:

dataiter = iter(train_loader)
inputs, labels = next(dataiter)  # Get one batch of data

model.to(inputs.device)
summary = torchinfo.summary(model, input_data=(inputs,))
print(summary)
