In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm
import numpy as np
import json
import os

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from transformers import BertTokenizer, BertModel, BertConfig

# Your fixed absolute model path (relative to notebook location)
model_path = os.path.abspath("../../Model/sentiment_bert")

# Check and validate path
if not os.path.exists(model_path):
    raise FileNotFoundError(f"sentiment_bert folder not found at: {model_path}")
if not os.path.isfile(os.path.join(model_path, "config.json")):
    raise FileNotFoundError(f"config.json not found in: {model_path}")

print(f"Using BERT model from: {model_path}")

# Load tokenizer and model (offline-safe)
tokenizer = BertTokenizer.from_pretrained(model_path, local_files_only=True)
config = BertConfig.from_pretrained(model_path, output_hidden_states=True, local_files_only=True)
bert = BertModel.from_pretrained(model_path, config=config, local_files_only=True).to(device)
bert.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.3, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.3, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.3, inplace=False

In [None]:
train_data = pd.read_csv("../../Dataset/train_preprocessed.csv")
texts = train_data["Phrase"].astype(str).tolist()
labels = train_data["Sentiment"].tolist()

encodings = tokenizer(
    texts,
    truncation=True,
    padding=True,
    max_length=128,
    return_tensors="pt"
)

FileNotFoundError: [Errno 2] No such file or directory: '../Dataset/train_preprocessed.csv'

In [None]:
class SentimentDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

train_dataset = SentimentDataset(encodings, labels)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

# === Step 1: Define PolicyNetwork ===
class PolicyNetwork(nn.Module):
    def __init__(self, input_dim=768, hidden_dim=128, output_dim=5):
        super(PolicyNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)  # raw logits

# === Step 2: Load model ===
policy_net = PolicyNetwork().to(device)
policy_net.load_state_dict(torch.load("../../Model/policy_net_supervised.pt"))

# === Step 3: Evaluate accuracy on train set ===
policy_net.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for batch in train_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_embeddings = outputs.last_hidden_state[:, 0, :]

        logits = policy_net(cls_embeddings)
        preds = torch.argmax(logits, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

acc = accuracy_score(all_labels, all_preds)
print(f"Accuracy of loaded policy_net_supervised.pt before RL training: {acc:.4f}")

# === Step 4: Switch to training mode for RL ===
policy_net.train()

  policy_net.load_state_dict(torch.load("../Model/policy_net_supervised.pt"))


PolicyNetwork(
  (fc1): Linear(in_features=768, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=5, bias=True)
)

In [None]:
def compute_reward(preds, labels):
    pred_labels = torch.argmax(preds, dim=1)
    correct = (pred_labels == labels).float()
    reward = correct * 2.0 + (1 - correct) * -0.2
    return reward

def compute_entropy(logits):
    prob = torch.softmax(logits, dim=1)
    entropy = -torch.sum(prob * torch.log(prob + 1e-8), dim=1)
    return entropy.mean().item()

def compute_advantage(reward):
    baseline = reward.mean()
    advantage = reward - baseline
    return advantage

In [None]:
optimizer = optim.Adam(policy_net.parameters(), lr=1e-5)
epochs = 7

train_logs = {
    "loss": [],
    "reward": [],
    "accuracy": [],
    "entropy": []
}

for epoch in range(epochs):
    total_loss = 0
    total_reward = 0
    total_entropy = 0
    correct = 0
    total = 0

    for batch in tqdm(train_loader):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        with torch.no_grad():
            output = bert(input_ids=input_ids, attention_mask=attention_mask)
            cls_embeddings = output.last_hidden_state[:, 0, :]

        logits = policy_net(cls_embeddings)

        log_probs = torch.log_softmax(logits, dim=1)
        probs = torch.exp(log_probs)
        sampled_action = torch.multinomial(probs, num_samples=1).squeeze()
        log_prob = log_probs[range(len(sampled_action)), sampled_action]

        reward = compute_reward(logits, labels)
        advantage = compute_advantage(reward)

        loss = - (log_prob * advantage.detach()).mean()

        pred = torch.argmax(logits, dim=1)
        correct += (pred == labels).sum().item()
        total += labels.size(0)

        entropy = compute_entropy(logits)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_reward += reward.mean().item()
        total_entropy += entropy

    epoch_acc = correct / total
    epoch_loss = total_loss
    epoch_reward = total_reward / len(train_loader)
    epoch_entropy = total_entropy / len(train_loader)

    train_logs["loss"].append(epoch_loss)
    train_logs["reward"].append(epoch_reward)
    train_logs["accuracy"].append(epoch_acc)
    train_logs["entropy"].append(epoch_entropy)

    print(f"[Epoch {epoch+1}] Loss: {epoch_loss:.4f} | Reward: {epoch_reward:.4f} | Accuracy: {epoch_acc:.4f} | Entropy: {epoch_entropy:.4f}")

100%|██████████| 5853/5853 [05:08<00:00, 18.96it/s]


[Epoch 1] Loss: -408.9232 | Reward: 0.9309 | Accuracy: 0.5140 | Entropy: 0.7632


100%|██████████| 5853/5853 [05:08<00:00, 18.97it/s]


[Epoch 2] Loss: -354.0552 | Reward: 0.9343 | Accuracy: 0.5156 | Entropy: 0.8506


100%|██████████| 5853/5853 [05:08<00:00, 18.96it/s]


[Epoch 3] Loss: -406.3332 | Reward: 0.9332 | Accuracy: 0.5151 | Entropy: 0.9193


100%|██████████| 5853/5853 [05:07<00:00, 19.03it/s]


[Epoch 4] Loss: -413.2162 | Reward: 0.9351 | Accuracy: 0.5159 | Entropy: 0.8675


100%|██████████| 5853/5853 [05:07<00:00, 19.04it/s]


[Epoch 5] Loss: -371.2289 | Reward: 0.9336 | Accuracy: 0.5153 | Entropy: 0.8737


100%|██████████| 5853/5853 [05:06<00:00, 19.07it/s]


[Epoch 6] Loss: -403.9673 | Reward: 0.9144 | Accuracy: 0.5065 | Entropy: 0.9811


100%|██████████| 5853/5853 [05:07<00:00, 19.03it/s]

[Epoch 7] Loss: -387.1488 | Reward: 0.9045 | Accuracy: 0.5021 | Entropy: 1.0179





In [None]:
torch.save(policy_net.state_dict(), "../../Model/V1/policy_net_rl_reinforce_baseline.pt")

os.makedirs("../../Logs/V1", exist_ok=True)
with open("../../Logs/V1/reinforce_baseline.json", "w") as f:
    json.dump(train_logs, f, indent=2)