In [4]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
from torch.nn.utils.rnn import pad_sequence
from collections import Counter
from tqdm import tqdm

# 1. Load Data
df = pd.read_csv("/content/emotions.csv")
df['text'] = df['text'].astype(str)

# 2. Label encode emotion
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# 3. Tokenize
def tokenize(text):
    return text.lower().split()

# 4. Build vocabulary with proper special tokens
tokens = [token for text in df['text'] for token in tokenize(text)]
vocab = {"<PAD>": 0, "<UNK>": 1}
vocab.update({w: i + 2 for i, (w, _) in enumerate(Counter(tokens).most_common(8000))})

# 5. Encode text with <UNK> handling
def encode(text):
    return [vocab.get(w, vocab["<UNK>"]) for w in tokenize(text)]

df['input_ids'] = df['text'].apply(encode)

# 6. Prepare splits
X_train, X_test, y_train, y_test = train_test_split(
    df['input_ids'], df['label'], test_size=0.2, stratify=df['label'])

# 7. Dataset & DataLoader
class EmotionDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = [torch.tensor(x) for x in texts]
        self.labels = torch.tensor(labels.values)

    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]

    def __len__(self):
        return len(self.labels)

def pad_collate(batch):
    texts, labels = zip(*batch)
    padded = pad_sequence(texts, batch_first=True)
    return padded, torch.tensor(labels)

train_dl = DataLoader(EmotionDataset(X_train, y_train), batch_size=64, shuffle=True, collate_fn=pad_collate)
test_dl  = DataLoader(EmotionDataset(X_test,  y_test),  batch_size=64, collate_fn=pad_collate)

# 8. Model
class LSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, out_dim):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, out_dim)

    def forward(self, x):
        x = self.emb(x)
        _, (h, _) = self.lstm(x)
        return self.fc(h[-1])

model = LSTM(len(vocab), 100, 128, len(le.classes_))
opt = torch.optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()

# 9. Train
for epoch in range(3):
    model.train()
    for X, y in train_dl:
        opt.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        opt.step()
    print(f"Epoch {epoch+1} complete")

# 10. Evaluate
model.eval()
all_preds, all_true = [], []
with torch.no_grad():
    for X, y in test_dl:
        logits = model(X)
        preds = logits.argmax(dim=1)
        all_preds.extend(preds.tolist())
        all_true.extend(y.tolist())

print("Accuracy:", accuracy_score(all_true, all_preds))
print("F1 Score:", f1_score(all_true, all_preds, average='weighted'))

# 11. Sample Predictions — Improved text display
idx2word = {idx: word for word, idx in vocab.items()}
print("\nSample Predictions:")
X_test_ = X_test.reset_index(drop=True)
y_test_ = y_test.reset_index(drop=True)

for i in range(10):
    text_tokens = X_test_.iloc[i]
    text_tensor = torch.tensor(text_tokens).unsqueeze(0)
    padded = pad_sequence([text_tensor.squeeze()], batch_first=True)
    with torch.no_grad():
        output = model(padded)
        pred_label = output.argmax(dim=1).item()

    input_words = [idx2word.get(idx, '<UNK>') for idx in text_tokens][:15]
    actual_label = le.inverse_transform([y_test_.iloc[i]])[0]
    predicted_label = le.inverse_transform([pred_label])[0]
    print(f"Text: {' '.join(input_words)}...")
    print(f"Actual: {actual_label} | Predicted: {predicted_label}\n")

Epoch 1 complete
Epoch 2 complete
Epoch 3 complete
Accuracy: 0.9360859864206713
F1 Score: 0.9355395413416894

Sample Predictions:
Text: im still feeling unsure...
Actual: 4 | Predicted: 4

Text: i feel so welcomed at...
Actual: 1 | Predicted: 1

Text: i just say that changing grades at my age has truly kicked my b i...
Actual: 5 | Predicted: 5

Text: i really feel valued...
Actual: 1 | Predicted: 1

Text: i feel that even if we mess up while trying to obey him he is...
Actual: 1 | Predicted: 1

Text: i must admit i was feeling apprehensive about our visit to <UNK>...
Actual: 4 | Predicted: 4

Text: i ended up feeling jealous...
Actual: 3 | Predicted: 3

Text: i feel doomed to fail because of the unreasonable <UNK> and poor materials ive been...
Actual: 0 | Predicted: 0

Text: i feel so stupid feeling such rage within me whenever i see those girls whom...
Actual: 0 | Predicted: 0

Text: i was reading i found myself feeling agitated frustrated angry and unbelievably sad that i...
Actual: