In [2]:
# Import required libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as functional
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from torchmetrics import Accuracy
# from sklearn.metrics import accuracy_score  # uncomment to use sklearn

In [3]:
#Preparing out data: train-test split
sentences = ["I love this product", 
             "This is the worst service ever", 
             "I am very happy with my purchase", 
             "I will never buy this again", 
             "Absolutely fantastic experience"]

labels = [1, 0, 1, 0, 1]  # 1 for positive sentiment, 0 for negative sentiment

#Train-test Split
train_sentences = sentences[:4]
train_labels = labels[:4]
test_sentences = sentences[4:]
test_labels = labels[4:]

In [7]:
#Building the transformer model
class TransformerEncoder(nn.Module):
    def __init__(self, embed_size, heads, num_layers, dropout):
        super(TransformerEncoder, self).__init__()
        self.encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=embed_size, nhead=heads),
            num_layers=num_layers)
        self.fc = nn.Linear(embed_size, 2) # Output = 2 classes (positive/negative)
    def forward(self, x):
        x = self.encoder(x)
        x = x.mean(dim=1)
        return self.fc(x)

model = TransformerEncoder(embed_size=512, heads=8, num_layers=3, dropout=0.5)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
    



In [9]:
#Training the transformers
for epoch in range(5):
    for sentence, label in zip(train_sentences, train_labels):
        tokens = sentence.split()
        data = torch.stack([token_embeddings[tokens] for token in tokens], dim =1)
        output = model(data)
        loss = criterion(output, torch.tensor([label]))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch}, Loss: {loss.item()}")    

NameError: name 'token_embeddings' is not defined

In [None]:
#Predicting the transformers
def predict(sentence):
    model.eval()
    with torch.no_grad():
        tokens = sentence.split()
        data = torch.stack([token_embeddings.get(token, torch.rand((1, 512)))
                            for token in tokens], dim=1)
        output = model(data)
        predicted = torch.argmax(output, dim =1)
        return "Positive" if predicted.item() == 1 else "Negative"

In [None]:
#Predicting on new text
sample_sentence = "This product can be better"
print(f" '{sample_sentence}' is predicted as: {predict(sample_sentence)}")

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim

# ========== Step 1: Tạo vocab ==========
sentences = [
    "I love this product",
    "This is the worst service ever",
    "I am very happy with my purchase",
    "I will never buy this again",
    "Absolutely fantastic experience"
]

labels = [1, 0, 1, 0, 1]

train_sentences = sentences[:4]
train_labels = labels[:4]
test_sentences = sentences[4:]
test_labels = labels[4:]

# Simple tokenizer: lowercase + split
tokenized = [s.lower().split() for s in sentences]
vocab = {word: idx for idx, word in enumerate(set([w for s in tokenized for w in s]))}
vocab_size = len(vocab)
print("Vocab:", vocab)

def encode(sentence):
    return [vocab[w] for w in sentence.lower().split()]

train_data = [encode(s) for s in train_sentences]
test_data = [encode(s) for s in test_sentences]

# ========== Step 2: Embedding + Transformer ==========
embed_size = 32

embedding = nn.Embedding(vocab_size, embed_size)

class TransformerEncoder(nn.Module):
    def __init__(self, embed_size, heads, num_layers, dropout):
        super(TransformerEncoder, self).__init__()
        self.encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=embed_size, nhead=heads, dropout=dropout),
            num_layers=num_layers
        )
        self.fc = nn.Linear(embed_size, 2)  # binary classification

    def forward(self, x):  
        x = self.encoder(x)  # (seq_len, batch, embed)
        x = x.mean(dim=0)    # mean pooling (batch, embed)
        return self.fc(x)

model = TransformerEncoder(embed_size=embed_size, heads=2, num_layers=1, dropout=0.1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ========== Step 3: Training ==========
for epoch in range(5):
    total_loss = 0
    for tokens, label in zip(train_data, train_labels):
        token_ids = torch.tensor(tokens)           # (seq_len,)
        embeds = embedding(token_ids)              # (seq_len, embed)
        embeds = embeds.unsqueeze(1)               # (seq_len, batch=1, embed)

        output = model(embeds)                     # (batch=1, num_classes=2)
        loss = criterion(output, torch.tensor([label]))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Vocab: {'happy': 0, 'i': 1, 'product': 2, 'will': 3, 'am': 4, 'fantastic': 5, 'ever': 6, 'this': 7, 'never': 8, 'with': 9, 'the': 10, 'buy': 11, 'service': 12, 'absolutely': 13, 'my': 14, 'love': 15, 'experience': 16, 'again': 17, 'is': 18, 'worst': 19, 'purchase': 20, 'very': 21}
Epoch 1, Loss: 4.3669
Epoch 2, Loss: 1.9446
Epoch 3, Loss: 1.5617
Epoch 4, Loss: 1.1911
Epoch 5, Loss: 0.8063


