In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.nn.utils.rnn import pad_sequence
from transformers import BertTokenizer, BertModel
from torch.utils.tensorboard import SummaryWriter
from transformers import BertTokenizer, BertModel

In [17]:
df = pd.read_excel('merged.xlsx')
df['label'] = pd.to_numeric(df['label'], errors='coerce', downcast='integer')
df.loc[~df['label'].isin([0, 1]), 'label'] = None
df.dropna(subset=['headline', 'label'], inplace=True)

In [18]:
writer = SummaryWriter('runs/rnn_with_attention_experiment')

In [19]:
labeled_data = df[['domain', 'headline', 'content', 'label']]

In [20]:
labeled_data = labeled_data.sample(frac=1, random_state=42).reset_index(drop=True)

In [21]:
labeled_data['text'] = labeled_data['domain'] + ' ' + labeled_data['headline'] + ' ' + labeled_data['content']
texts = labeled_data['text'].values
labels = labeled_data['label'].values

In [22]:
tokenizer = BertTokenizer.from_pretrained('sagorsarker/bangla-bert-base')
tokens = [tokenizer.encode_plus(text, add_special_tokens=True, padding='max_length', max_length=512, truncation=True, return_tensors='pt') for text in texts]
input_ids = torch.cat([token['input_ids'] for token in tokens], dim=0)
attention_masks = torch.cat([token['attention_mask'] for token in tokens], dim=0)
labels = torch.tensor(labels)

In [23]:
X = torch.stack((input_ids, attention_masks), dim=1)
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

In [24]:
class RNNWithAttention(nn.Module):
    def __init__(self, bert_model, hidden_dim, output_dim):
        super(RNNWithAttention, self).__init__()
        self.bert = bert_model
        self.lstm = nn.LSTM(bert_model.config.hidden_size, hidden_dim, batch_first=True, bidirectional=True)
        self.attention = nn.Linear(hidden_dim * 2, 1)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_ids, attention_mask):
        with torch.no_grad():
            embedded = self.bert(input_ids, attention_mask=attention_mask)[0]
        lstm_out, _ = self.lstm(embedded)
        attention_weights = torch.tanh(self.attention(lstm_out)).squeeze(-1)
        attention_weights = torch.softmax(attention_weights, dim=1)
        context_vector = torch.sum(attention_weights.unsqueeze(-1) * lstm_out, dim=1)
        output = self.fc(context_vector)
        return self.sigmoid(output)

In [25]:
device = torch.device('cuda')
bert_model = BertModel.from_pretrained('sagorsarker/bangla-bert-base')
model = RNNWithAttention(bert_model, hidden_dim=128, output_dim=1).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [26]:
num_epochs = 3
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
        input_ids, attention_mask = X_batch[:, 0, :], X_batch[:, 1, :]
        input_ids, attention_mask, y_batch = input_ids.to(device), attention_mask.to(device), y_batch.to(device).float()
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask).squeeze()
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        # Log the gradients of the model's parameters
        for name, param in model.named_parameters():
            if param.grad is not None:
                writer.add_histogram(f'{name}.grad', param.grad, epoch * len(train_loader) + batch_idx)

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')

# Close the TensorBoard writer
writer.close()

Epoch 1/3, Loss: 0.0107
Epoch 2/3, Loss: 0.0357
Epoch 3/3, Loss: 0.1770


In [27]:
model.eval()
y_pred = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        input_ids, attention_mask = X_batch[:, 0, :], X_batch[:, 1, :]
        input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
        outputs = model(input_ids, attention_mask).squeeze()
        y_pred.extend((outputs > 0.5).cpu().numpy())
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [28]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.9840
Precision: 0.7988
Recall: 0.5172
F1 Score: 0.6279


In [29]:
model_save_path = 'rnn_with_attention_model.pth'
torch.save(model.state_dict(), model_save_path)
print(f'Model saved to {model_save_path}')

Model saved to rnn_with_attention_model.pth
