In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

In [2]:
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=50):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        inputs = self.tokenizer(text, return_tensors='pt', max_length=self.max_len, padding='max_length', truncation=True)
        input_ids = inputs['input_ids'].squeeze(0)
        attention_mask = inputs['attention_mask'].squeeze(0)
        attention_maxk = attention_mask.bool()
        return input_ids, attention_maxk, torch.tensor(label)

In [3]:
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embed_size, num_classes, num_heads, num_layers, max_len):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.pos_encoder = nn.Embedding(max_len, embed_size)
        self.transformer = nn.Transformer(d_model=embed_size, nhead=num_heads, num_encoder_layers=num_layers)
        self.fc = nn.Linear(embed_size, num_classes)
    def forward(self, input_ids, attention_mask):
        seq_len = input_ids.size(1)
        pos = torch.arange(0, seq_len).unsqueeze(0).to(input_ids.device)
        x = self.embedding(input_ids) + self.pos_encoder(pos)
        x = x.transpose(0, 1)
        x = self.transformer(x, x, src_key_padding_mask=attention_mask)
        x = x.mean(dim=0)
        return self.fc(x)

In [4]:
vocab_size = 30522
embed_size = 128
num_classes = 2
num_heads = 8
num_layers = 2
max_len = 50

In [5]:
!pip install sacremoses



In [6]:
texts = ["I love programming",
         "This is a great day",
         "I hate bugs",
         "Debugging is fun",
         "I love this movie, it was fantastic!",
         "This is the worst film I have ever seen.",
         "I am so happy with the service.",
         "I hate waiting in line for so long.",
         "The food was great, I will come again.",
         "It was a terrible experience, I will not return.",
         "Amazing product, highly recommend it.",
         "The staff was very rude and unhelpful.",
         "I am extremely satisfied with my purchase.",
         "This place is awful, never coming back."
         ]
labels = [1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased')

Using cache found in /root/.cache/torch/hub/huggingface_pytorch-transformers_main
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [7]:
train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2)
train_dataset = TextDataset(train_texts, train_labels, tokenizer)
val_dataset = TextDataset(val_texts, val_labels, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
model = TransformerModel(vocab_size, embed_size, num_classes, num_heads, num_layers, max_len).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)



In [9]:
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for input_ids, attention_mask, labels in train_loader:
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}")

Epoch 1/50, Loss: 0.7166368961334229
Epoch 2/50, Loss: 1.4781361818313599
Epoch 3/50, Loss: 0.8576106429100037
Epoch 4/50, Loss: 0.6823650002479553
Epoch 5/50, Loss: 0.8599671721458435
Epoch 6/50, Loss: 0.8127077221870422
Epoch 7/50, Loss: 0.7237362861633301
Epoch 8/50, Loss: 0.6875954270362854
Epoch 9/50, Loss: 0.6774516701698303
Epoch 10/50, Loss: 0.7043421268463135
Epoch 11/50, Loss: 0.7310829162597656
Epoch 12/50, Loss: 0.7214116454124451
Epoch 13/50, Loss: 0.7167315483093262
Epoch 14/50, Loss: 0.6972237229347229
Epoch 15/50, Loss: 0.682255744934082
Epoch 16/50, Loss: 0.6754511594772339
Epoch 17/50, Loss: 0.6774863600730896
Epoch 18/50, Loss: 0.6777464151382446
Epoch 19/50, Loss: 0.6912023425102234
Epoch 20/50, Loss: 0.6986187100410461
Epoch 21/50, Loss: 0.6872559785842896
Epoch 22/50, Loss: 0.6701497435569763
Epoch 23/50, Loss: 0.6718823909759521
Epoch 24/50, Loss: 0.6656598448753357
Epoch 25/50, Loss: 0.6629288792610168
Epoch 26/50, Loss: 0.6604506969451904
Epoch 27/50, Loss: 0.6

In [10]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for input_ids, attention_mask, labels in val_loader:
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        outputs = model(input_ids, attention_mask)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f"Validation Accuracy: {correct / total * 100:.2f}%")

Validation Accuracy: 66.67%


In [11]:
def predict(text, model, tokenizer, max_len=50):
    model.eval()
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors='pt', max_length=max_len, padding='max_length', truncation=True)
        input_ids = inputs['input_ids'].to(device)
        attention_mask = inputs['attention_mask'].to(device)
        attention_mask = attention_mask.bool()
        output = model(input_ids, attention_mask)
        _, predicted = torch.max(output, 1)
    return predicted.item()

In [12]:
input_text = "I love it"
predicted_class = predict(input_text, model, tokenizer, max_len)
print(f"Predicted Class: {predicted_class}")

Predicted Class: 1
