In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm
import numpy as np
import json
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from pathlib import Path
from transformers import BertTokenizer, BertModel, BertConfig

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dynamically resolve path relative to this notebook
notebook_dir = Path().resolve()
model_path = notebook_dir / "Model/sentiment_bert"

if not model_path.exists():
    raise FileNotFoundError(f"sentiment_bert not found at: {model_path}")

print(f"Using BERT model from: {model_path}")

tokenizer = BertTokenizer.from_pretrained(str(model_path), local_files_only=True)
config = BertConfig.from_pretrained(str(model_path), output_hidden_states=True, local_files_only=True)
bert = BertModel.from_pretrained(str(model_path), config=config, local_files_only=True).to(device)
bert.eval()


FileNotFoundError: sentiment_bert not found at: C:\Users\jsj31\Desktop\Graduation Project\RL-BERT-Opinion\Code\V1\Model\sentiment_bert

In [None]:
train_data = pd.read_csv("../../Dataset/train_preprocessed.csv")
texts = train_data["Phrase"].astype(str).tolist()
labels = train_data["Sentiment"].tolist()

encodings = tokenizer(
    texts,
    truncation=True,
    padding=True,
    max_length=128,
    return_tensors="pt"
)

FileNotFoundError: [Errno 2] No such file or directory: '../Dataset/train_preprocessed.csv'

In [None]:
class SentimentDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

train_dataset = SentimentDataset(encodings, labels)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

In [None]:
# Policy (Actor) network
class PolicyNetwork(nn.Module):
    def __init__(self, input_dim=768, hidden_dim=128, output_dim=5):
        super(PolicyNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)  # logits

# Value (Critic) network
class ValueNetwork(nn.Module):
    def __init__(self, input_dim=768, hidden_dim=128):
        super(ValueNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x).squeeze()

In [None]:
policy_net = PolicyNetwork().to(device)
policy_net.load_state_dict(torch.load("../../Model/policy_net_supervised.pt"))
policy_net.train()

value_net = ValueNetwork().to(device)

actor_optimizer = optim.Adam(policy_net.parameters(), lr=1e-5)
critic_optimizer = optim.Adam(value_net.parameters(), lr=1e-5)

In [None]:
def compute_reward(preds, labels):
    pred_labels = torch.argmax(preds, dim=1)
    correct = (pred_labels == labels).float()
    reward = correct * 2.0 + (1 - correct) * -0.2
    return reward

def compute_entropy(logits):
    prob = torch.softmax(logits, dim=1)
    entropy = -torch.sum(prob * torch.log(prob + 1e-8), dim=1)
    return entropy.mean().item()

In [None]:
epochs = 7
train_logs = {
    "loss": [],
    "reward": [],
    "accuracy": [],
    "entropy": []
}

for epoch in range(epochs):
    total_loss = 0
    total_reward = 0
    total_entropy = 0
    correct = 0
    total = 0

    for batch in tqdm(train_loader):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        with torch.no_grad():
            output = bert(input_ids=input_ids, attention_mask=attention_mask)
            cls_embeddings = output.last_hidden_state[:, 0, :]

        # ---- Actor forward
        logits = policy_net(cls_embeddings)
        log_probs = torch.log_softmax(logits, dim=1)
        probs = torch.exp(log_probs)
        sampled_action = torch.multinomial(probs, num_samples=1).squeeze()
        log_prob = log_probs[range(len(sampled_action)), sampled_action]

        # ---- Critic forward
        value = value_net(cls_embeddings)  # [B]
        reward = compute_reward(logits, labels)
        advantage = reward - value.detach()

        # ---- Losses
        policy_loss = - (log_prob * advantage).mean()
        value_loss = F.mse_loss(value, reward)
        total_batch_loss = policy_loss + value_loss

        # ---- Accuracy and entropy
        pred = torch.argmax(logits, dim=1)
        correct += (pred == labels).sum().item()
        total += labels.size(0)
        entropy = compute_entropy(logits)

        # ---- Backprop
        actor_optimizer.zero_grad()
        critic_optimizer.zero_grad()
        total_batch_loss.backward()
        actor_optimizer.step()
        critic_optimizer.step()

        total_loss += total_batch_loss.item()
        total_reward += reward.mean().item()
        total_entropy += entropy

    epoch_acc = correct / total
    epoch_loss = total_loss
    epoch_reward = total_reward / len(train_loader)
    epoch_entropy = total_entropy / len(train_loader)

    train_logs["loss"].append(epoch_loss)
    train_logs["reward"].append(epoch_reward)
    train_logs["accuracy"].append(epoch_acc)
    train_logs["entropy"].append(epoch_entropy)

    print(f"[Epoch {epoch+1}] Loss: {epoch_loss:.4f} | Reward: {epoch_reward:.4f} | Accuracy: {epoch_acc:.4f} | Entropy: {epoch_entropy:.4f}")

In [None]:
torch.save(policy_net.state_dict(), "../../Model/V1/policy_net_rl_a2c.pt")
torch.save(value_net.state_dict(), "../../Model/V1/value_net_rl_a2c.pt")

os.makedirs("../../Logs/V1", exist_ok=True)
with open("../Logs/V1/a2c.json", "w") as f:
    json.dump(train_logs, f, indent=2)