In [10]:
import os
import re
import torch
import numpy as np
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoModel
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset as TorchDataset, DataLoader
from sklearn.preprocessing import MultiLabelBinarizer

# -----------------------------
# Setup: device, model_name, label mappings
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "vinai/phobert-base"

label_map = {"negative": 0, "neutral": 1, "positive": 2}
inv_label_map = {v: k for k, v in label_map.items()}

# -----------------------------
# Data Processing Functions
# -----------------------------
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read().strip()
    except Exception as e:
        print(f"Error: {e}")
        return ""

# For Stage 1, we extract the review (line 2) and a list of aspect names (from line 3).
def list_data(file_content):
    # We use a regex that splits on patterns like "#1" with optional whitespace after the number.
    sample_texts = re.split(r'#\d+\s*', file_content)[1:]
    list_sa = []
    for sample in sample_texts:
        # Split on newline
        lines = sample.strip().splitlines()
        if len(lines) < 2:
            continue        
        # Assume: line 1 is the review and line 2 is the aspect-sentiment pairs
        review = lines[0].strip().lower()
        aspects_line = lines[1].strip()
        # Split by "}, {" after stripping curly braces
        aspect_pairs = re.split(r'},\s*{', aspects_line.strip('{}'))
        # Extract aspect names (the first element before the comma)
        aspects = [pair.split(',')[0].strip() for pair in aspect_pairs if ',' in pair]
        if aspects:
            list_sa.append({"sentence": review, "aspects": aspects})
    return list_sa

# For Stage 2, we create one example per {aspect, sentiment} pair.
def load_and_preprocess_sentiment(file_path):
    examples = []
    content = read_file(file_path)
    # Use the same splitting as in Stage 1.
    sample_texts = re.split(r'#\d+\s*', content)[1:]
    for sample in sample_texts:
        lines = sample.strip().splitlines()
        if len(lines) < 2:
            continue
        # Use the same review as Stage 1 (line 1)
        review = lines[0].strip().lower()
        aspects_line = lines[1].strip()
        matches = re.findall(r"\{([^}]+)\}", aspects_line)
        for match in matches:
            parts = [p.strip() for p in match.split(",")]
            if len(parts) >= 2:
                aspect = parts[0]
                sentiment = parts[1].lower()
                if sentiment in label_map:
                    examples.append({
                        "review": review,
                        "aspect": aspect,
                        "label": label_map[sentiment]
                    })
    return examples

# -----------------------------
# Create Data from file (for both stages)
# -----------------------------
data_path = "/kaggle/input/food-review/final_data.txt"  # Adjust path if necessary
file_content = read_file(data_path)

# Stage 1: List data with reviews and aspect names
all_data_stage1 = list_data(file_content)

# Stage 2: Create examples for each aspect-sentiment pair
all_data_stage2 = load_and_preprocess_sentiment(data_path)

# -----------------------------
# Split Stage 1 into train/validation and use validation reviews as common set
# -----------------------------
train_data_stage1, val_data_stage1 = train_test_split(all_data_stage1, test_size=0.2, random_state=42)
# Use validation review texts (lowercased) as the common validation set.
val_reviews = set(item["sentence"] for item in val_data_stage1)
print(f"Tập validation (Stage 1) có {len(val_reviews)} review.")

# For Stage 2, keep only examples whose review is in the common validation set.
val_data_stage2 = [ex for ex in all_data_stage2 if ex["review"] in val_reviews]
train_data_stage2 = [ex for ex in all_data_stage2 if ex["review"] not in val_reviews]
print(f"Tập Stage 2 - train: {len(train_data_stage2)}, validation: {len(val_data_stage2)}")

# -----------------------------
# Stage 1: Prepare labels using MultiLabelBinarizer
# -----------------------------
all_aspects = ["AMBIENCE", "PRICE", "FOOD", "SERVICE", "DELIVERY"]
multi_aspect_binary = MultiLabelBinarizer(classes=all_aspects)
aspects_encoded = multi_aspect_binary.fit_transform([item["aspects"] for item in all_data_stage1])
print("Các khía cạnh (Stage 1):", multi_aspect_binary.classes_)

# -----------------------------
# Dataset for Stage 1
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained(model_name)

class MultiAspectFeedbackDataset(TorchDataset):
    def __init__(self, data, labels):
        self.encodings = tokenizer([item["sentence"] for item in data],
                                   padding=True, truncation=True, max_length=128, return_tensors="pt")
        self.labels = torch.tensor(labels, dtype=torch.float)
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["aspects"] = self.labels[idx]
        return item

# To maintain order, compute labels over all Stage 1 data and then select those corresponding to train and validation.
all_labels = multi_aspect_binary.transform([item["aspects"] for item in all_data_stage1])
train_labels_stage1 = [all_labels[i] for i, item in enumerate(all_data_stage1) if item in train_data_stage1]
val_labels_stage1 = [all_labels[i] for i, item in enumerate(all_data_stage1) if item in val_data_stage1]

train_dataset_stage1 = MultiAspectFeedbackDataset(train_data_stage1, train_labels_stage1)
val_dataset_stage1 = MultiAspectFeedbackDataset(val_data_stage1, val_labels_stage1)

train_dataloader_stage1 = DataLoader(train_dataset_stage1, batch_size=16, shuffle=True)
val_dataloader_stage1 = DataLoader(val_dataset_stage1, batch_size=16, shuffle=False)

# -----------------------------
# Model for Stage 1: PhoBERTMultiLabelClassifier
# -----------------------------
class PhoBERTMultiLabelClassifier(nn.Module):
    def __init__(self, num_labels):
        super(PhoBERTMultiLabelClassifier, self).__init__()
        self.phobert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(self.phobert.config.hidden_size, num_labels)
    def forward(self, input_ids, attention_mask):
        outputs = self.phobert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0, :]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

# -----------------------------
# Load Stage 1 checkpoint (do not retrain if already trained)
# -----------------------------
model_stage1_demo = PhoBERTMultiLabelClassifier(num_labels=len(all_aspects)).to(device)
checkpoint_stage1 = "/kaggle/input/checkpoint-2-stage/absa_aspect_model.pt"  # Adjust checkpoint path if needed
model_stage1_demo.load_state_dict(torch.load(checkpoint_stage1), strict=False)
model_stage1_demo.to(device)
print("Model Stage 1 đã được load từ checkpoint.")

def extract_aspects_stage1(review_text, model, tokenizer, device, threshold=0.5):
    inputs = tokenizer(review_text, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
    inputs.pop("token_type_ids", None)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    model.eval()
    with torch.no_grad():
        logits = model(**inputs)
    probs = torch.sigmoid(logits)[0]
    predicted_indices = (probs > threshold).nonzero(as_tuple=True)[0].tolist()
    predicted_labels = [multi_aspect_binary.classes_[i] for i in predicted_indices]
    aspects = list(set(label.split("_")[0] for label in predicted_labels))
    return aspects

# -----------------------------
# Stage 2: Prepare sentiment classification data
# -----------------------------
def load_and_preprocess_sentiment(file_path):
    examples = []
    content = read_file(file_path)
    # Use same splitting as Stage 1: split on patterns like "#1"
    sample_texts = re.split(r'#\d+\s*', content)[1:]
    for sample in sample_texts:
        lines = sample.strip().splitlines()
        if len(lines) < 2:
            continue
        # Use line 1 as review (same as Stage 1)
        review = lines[0].strip().lower()
        aspects_line = lines[1].strip()
        matches = re.findall(r"\{([^}]+)\}", aspects_line)
        for match in matches:
            parts = [p.strip() for p in match.split(",")]
            if len(parts) >= 2:
                aspect = parts[0]
                sentiment = parts[1].lower()
                if sentiment in label_map:
                    examples.append({
                        "review": review,
                        "aspect": aspect,
                        "label": label_map[sentiment]
                    })
    return examples

all_sentiment_examples = load_and_preprocess_sentiment(data_path)
print(f"Stage 2: Đã tải {len(all_sentiment_examples)} ví dụ cho phân loại cảm xúc.")

# Use the common validation set from Stage 1: filter Stage 2 examples accordingly.
val_data_stage2 = [ex for ex in all_sentiment_examples if ex["review"] in val_reviews]
train_data_stage2 = [ex for ex in all_sentiment_examples if ex["review"] not in val_reviews]
print(f"Tập Stage 2 - train: {len(train_data_stage2)}, validation: {len(val_data_stage2)}")

def build_ground_truth(val_examples):
    gt_dict = {}
    for ex in val_examples:
        review = ex["review"]
        aspect = ex["aspect"]
        sentiment_str = inv_label_map[ex["label"]]
        if review not in gt_dict:
            gt_dict[review] = {}
        gt_dict[review][aspect] = sentiment_str
    return gt_dict

val_gt = build_ground_truth(val_data_stage2)
val_df = pd.DataFrame(list(val_gt.items()), columns=["review", "ground_truth"])
print("Tập validation ground truth (Stage 2):")
print(val_df.head())

# For Stage 2 training (if needed), we create a Hugging Face Dataset from all Stage 2 examples.
raw_dataset_stage2 = Dataset.from_dict({
    "review": [ex["review"] for ex in all_sentiment_examples],
    "aspect": [ex["aspect"] for ex in all_sentiment_examples],
    "label": [ex["label"] for ex in all_sentiment_examples]
})

def combine_review_aspect(example):
    example["text"] = "Review: " + example["review"] + " | Aspect: " + example["aspect"]
    return example

dataset_stage2 = raw_dataset_stage2.map(combine_review_aspect)
print(dataset_stage2)

# Tokenize Stage 2 data
tokenizer = AutoTokenizer.from_pretrained(model_name)
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)
tokenized_dataset_stage2 = dataset_stage2.map(tokenize_function, batched=True)
# We won’t re-split Stage 2 for evaluation; we use val_data_stage2 (filtered from our common validation set).
print("Tập Stage 2 - train (if needed):", len([ex for ex in all_sentiment_examples if ex["review"] not in val_reviews]))
print("Tập Stage 2 - validation (common):", len(val_data_stage2))

# -----------------------------
# Load Stage 2 checkpoint from directory
# -----------------------------
checkpoint_dir = "/kaggle/input/checkpoint-2-stage/aspect_sentiment_model/checkpoint-8685"
print("Loading Stage 2 model from checkpoint directory:", checkpoint_dir)
model_stage2 = AutoModelForSequenceClassification.from_pretrained(
    checkpoint_dir,
    num_labels=3,
    problem_type="single_label_classification"
).to(device)

# -----------------------------
# Demo pipeline ABSA (Stage 1 + Stage 2)
# -----------------------------
def predict_sentiment_stage2(review_text, aspect, model_stage2, tokenizer, device):
    input_text = "Review: " + review_text.lower() + " | Aspect: " + aspect
    inputs = tokenizer(input_text, truncation=True, padding="max_length", max_length=256, return_tensors="pt")
    inputs.pop("token_type_ids", None)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    model_stage2.eval()
    with torch.no_grad():
        outputs = model_stage2(**inputs)
    pred = outputs.logits.argmax(dim=-1).item()
    return inv_label_map[pred]

def demo_full_pipeline(review_text, model_stage1, model_stage2, tokenizer, device, threshold=0.5):
    aspects = extract_aspects_stage1(review_text, model_stage1, tokenizer, device, threshold)
    if not aspects:
        return {}
    sentiments = {}
    for aspect in aspects:
        sentiment = predict_sentiment_stage2(review_text, aspect, model_stage2, tokenizer, device)
        sentiments[aspect] = sentiment
    return sentiments

# Demo pipeline for a sample review
sample_review = "Khẩu vị vừa ăn hợp vệ sinh, không gian quán rộng view cũng tạm được. Đặc biệt là phục vụ rất nhiệt tình và vui vẻ."
demo_result = demo_full_pipeline(sample_review, model_stage1_demo, model_stage2, tokenizer, device, threshold=0.5)
print("Demo pipeline ABSA cho review mẫu:")
print("Review:", sample_review)
print("Kết quả dự đoán:", demo_result)

# -----------------------------
# Evaluate combined pipeline on Stage 2 validation set
# -----------------------------
def build_ground_truth(val_examples):
    gt_dict = {}
    for ex in val_examples:
        review = ex["review"]
        aspect = ex["aspect"]
        sentiment_str = inv_label_map[ex["label"]]
        if review not in gt_dict:
            gt_dict[review] = {}
        gt_dict[review][aspect] = sentiment_str
    return gt_dict

val_gt = build_ground_truth(val_data_stage2)
val_df = pd.DataFrame(list(val_gt.items()), columns=["review", "ground_truth"])
print("Tập validation ground truth (Stage 2):")
print(val_df.head())

pred_list = []
for review in val_df["review"]:
    pred = demo_full_pipeline(review, model_stage1_demo, model_stage2, tokenizer, device, threshold=0.5)
    pred_list.append({"review": review, "predicted": pred})
pred_df = pd.DataFrame(pred_list)
print("Tập dự đoán của pipeline:")
print(pred_df.head())

matches = 0
true_positive = 0
false_positive = 0
false_negative = 0
total = len(val_df)

for idx in range(total):
    gt = val_df.loc[idx, "ground_truth"]  # Ground truth (dữ liệu thực tế)
    pred = pred_df.loc[idx, "predicted"]  # Dự đoán của mô hình
    
    # Kiểm tra nếu cả ground_truth và predicted đều đúng cho tất cả các aspects trong review
    all_correct = True
    for aspect in gt.keys():  # Duyệt qua tất cả các aspects trong ground_truth và predicted
        gt_aspect = gt[aspect]
        pred_aspect = pred.get(aspect, 'negative')  # Nếu không có, mặc định là 'negative'
        
        # Nếu bất kỳ aspect nào không khớp, đánh dấu là không đúng
        if gt_aspect != pred_aspect:
            all_correct = False
            # Tính các lỗi sai (false positives, false negatives)
            if gt_aspect == 'positive' and pred_aspect != 'positive':
                false_negative += 1  # Nếu thực tế là 'positive' mà dự đoán sai
            if gt_aspect != 'positive' and pred_aspect == 'positive':
                false_positive += 1  # Nếu thực tế không phải 'positive' mà dự đoán là 'positive'
    
    if all_correct:  # Nếu tất cả các aspects đều đúng
        matches += 1
        if gt == pred:  # Khi cả ground_truth và predicted đều đúng cho toàn bộ review
            true_positive += 1

# Tính Accuracy
if total > 0:
    accuracy = matches / total
    print(f"Accuracy kết hợp trên tập validation: {accuracy:.4f}")
    print(f"Số review trong tập validation: {total}, Số review dự đoán đúng: {matches}")
else:
    print("Không có review nào trong tập validation Stage 2.")

# Tính Precision, Recall và F1-score
if (true_positive + false_positive) > 0:
    precision = true_positive / (true_positive + false_positive)
else:
    precision = 0.0

if (true_positive + false_negative) > 0:
    recall = true_positive / (true_positive + false_negative)
else:
    recall = 0.0

if (precision + recall) > 0:
    f1_score = 2 * (precision * recall) / (precision + recall)
else:
    f1_score = 0.0

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1_score:.4f}")




Tập validation (Stage 1) có 2477 review.
Tập Stage 2 - train: 23130, validation: 5818
Các khía cạnh (Stage 1): ['AMBIENCE' 'PRICE' 'FOOD' 'SERVICE' 'DELIVERY']


  model_stage1_demo.load_state_dict(torch.load(checkpoint_stage1), strict=False)


Model Stage 1 đã được load từ checkpoint.
Stage 2: Đã tải 28948 ví dụ cho phân loại cảm xúc.
Tập Stage 2 - train: 23130, validation: 5818
Tập validation ground truth (Stage 2):
                                              review  \
0  ăn rất ngon được phục vụ chu đáo với 2 cô chú ...   
1           nha hang phuc vu cac mon an rat ngon nha   
2  một buổi chiều trước giờ đi làm được ăn bún đậ...   
3  trở lại quán vào buổi trưa nên không có khách ...   
4  đúng là cà phê rang mộc không tầm trộn khi uốn...   

                                        ground_truth  
0  {'FOOD': 'positive', 'SERVICE': 'positive', 'P...  
1                               {'FOOD': 'positive'}  
2                               {'FOOD': 'positive'}  
3         {'FOOD': 'positive', 'SERVICE': 'neutral'}  
4       {'FOOD': 'positive', 'AMBIENCE': 'positive'}  


Map:   0%|          | 0/28948 [00:00<?, ? examples/s]

Dataset({
    features: ['review', 'aspect', 'label', 'text'],
    num_rows: 28948
})


Map:   0%|          | 0/28948 [00:00<?, ? examples/s]

Tập Stage 2 - train (if needed): 23130
Tập Stage 2 - validation (common): 5818
Loading Stage 2 model from checkpoint directory: /kaggle/input/checkpoint-2-stage/aspect_sentiment_model/checkpoint-8685
Demo pipeline ABSA cho review mẫu:
Review: Khẩu vị vừa ăn hợp vệ sinh, không gian quán rộng view cũng tạm được. Đặc biệt là phục vụ rất nhiệt tình và vui vẻ.
Kết quả dự đoán: {'FOOD': 'neutral', 'AMBIENCE': 'positive', 'SERVICE': 'positive'}
Tập validation ground truth (Stage 2):
                                              review  \
0  ăn rất ngon được phục vụ chu đáo với 2 cô chú ...   
1           nha hang phuc vu cac mon an rat ngon nha   
2  một buổi chiều trước giờ đi làm được ăn bún đậ...   
3  trở lại quán vào buổi trưa nên không có khách ...   
4  đúng là cà phê rang mộc không tầm trộn khi uốn...   

                                        ground_truth  
0  {'FOOD': 'positive', 'SERVICE': 'positive', 'P...  
1                               {'FOOD': 'positive'}  
2                

In [6]:
matches = 0
true_positive = 0
false_positive = 0
false_negative = 0
total = len(val_df)

# Duyệt qua các dòng trong df và tính precision, recall, f1 cho cả pipeline
for idx in range(total):
    gt = val_df.loc[idx, "ground_truth"]
    pred = pred_df.loc[idx, "predicted"]
    
    # Kiểm tra nếu cả aspects và sentiments đều đúng (tính cho toàn bộ pipeline)
    if gt == pred:
        matches += 1
        # Kiểm tra các trường hợp true positives cho các aspect/sentiment chính xác
        for aspect in gt.keys():  # Lặp qua tất cả các aspects
            if gt[aspect] == 'positive' and pred[aspect] == 'positive':
                true_positive += 1
            elif gt[aspect] != 'positive' and pred[aspect] == 'positive':
                false_positive += 1
            elif gt[aspect] != 'positive' and pred[aspect] != 'positive':
                false_negative += 1
    else:
        # Nếu không đúng, kiểm tra các trường hợp false positives và false negatives
        for aspect in gt.keys():
            if pred[aspect] == 'positive':
                false_positive += 1
            elif gt[aspect] == 'positive':
                false_negative += 1

if total > 0:
    combined_accuracy = matches / total
    print(f"Accuracy kết hợp trên tập validation: {combined_accuracy:.4f}")
    print(f"Số review trong tập validation: {total}, Số review dự đoán đúng: {matches}")

    # Tính Precision, Recall và F1-score
    if (true_positive + false_positive) > 0:
        precision = true_positive / (true_positive + false_positive)
    else:
        precision = 0.0

    if (true_positive + false_negative) > 0:
        recall = true_positive / (true_positive + false_negative)
    else:
        recall = 0.0

    if (precision + recall) > 0:
        f1_score_val = 2 * (precision * recall) / (precision + recall)
    else:
        f1_score_val = 0.0

    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1_score_val:.4f}")

else:
    print("Không có review nào trong tập validation Stage 2.")

KeyError: 'PRICE'

In [11]:
import os
import re
import torch
import numpy as np
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoModel
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset as TorchDataset, DataLoader
from sklearn.preprocessing import MultiLabelBinarizer

# -----------------------------
# Setup: device, model_name, and label mappings
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "vinai/phobert-base"

label_map = {"negative": 0, "neutral": 1, "positive": 2}
inv_label_map = {v: k for k, v in label_map.items()}

# -----------------------------
# Data Processing Functions
# -----------------------------
def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read().strip()
    except Exception as e:
        print(f"Error: {e}")
        return ""

# For Stage 1: we use the first line as the review and the second line contains the aspect-sentiment pairs.
def list_data(file_content):
    # Split using patterns like "#1" (ignoring the '#' and number)
    sample_texts = re.split(r'#\d+\s*', file_content)[1:]
    list_sa = []
    for sample in sample_texts:
        lines = sample.strip().splitlines()
        if len(lines) < 2:
            continue        
        # Assume:
        #   line 1: review text
        #   line 2: aspect-sentiment pairs (e.g. "{FOOD, positive}, {SERVICE, positive}")
        review = lines[0].strip().lower()
        aspects_line = lines[1].strip()
        # Remove outer braces and split by "}, {"
        aspect_pairs = re.split(r'},\s*{', aspects_line.strip('{}'))
        aspects = [pair.split(',')[0].strip() for pair in aspect_pairs if ',' in pair]
        if aspects:
            list_sa.append({"sentence": review, "aspects": aspects})
    return list_sa

# For Stage 2: Create one example per {aspect, sentiment} pair.
def load_and_preprocess_sentiment(file_path):
    examples = []
    content = read_file(file_path)
    sample_texts = re.split(r'#\d+\s*', content)[1:]
    for sample in sample_texts:
        lines = sample.strip().splitlines()
        if len(lines) < 2:
            continue
        # Use the same review as Stage 1 (line 1)
        review = lines[0].strip().lower()
        aspects_line = lines[1].strip()
        matches = re.findall(r"\{([^}]+)\}", aspects_line)
        for match in matches:
            parts = [p.strip() for p in match.split(",")]
            if len(parts) >= 2:
                aspect = parts[0]
                sentiment = parts[1].lower()
                if sentiment in label_map:
                    examples.append({
                        "review": review,
                        "aspect": aspect,
                        "label": label_map[sentiment]
                    })
    return examples

# -----------------------------
# Create Data from File (common for both stages)
# -----------------------------
data_path = "/kaggle/input/food-review/final_data.txt"  # Adjust path if necessary
file_content = read_file(data_path)

# Stage 1: Get review and list of aspect names.
all_data_stage1 = list_data(file_content)

# Stage 2: Get individual examples for each {aspect, sentiment} pair.
all_data_stage2 = load_and_preprocess_sentiment(data_path)

# -----------------------------
# Split Stage 1 into train/validation using indices so that label order is preserved
# -----------------------------
all_indices = np.arange(len(all_data_stage1))
train_indices, val_indices = train_test_split(all_indices, test_size=0.2, random_state=42)
train_data_stage1 = [all_data_stage1[i] for i in train_indices]
val_data_stage1 = [all_data_stage1[i] for i in val_indices]
print(f"Tập Stage 1 - train: {len(train_data_stage1)}, validation: {len(val_data_stage1)}")

# Derive the common validation set as the set of review texts from Stage 1 validation.
val_reviews = set(item["sentence"] for item in val_data_stage1)
print(f"Tập validation (Stage 1) có {len(val_reviews)} review.")

# For Stage 2, keep only examples whose review is in the common validation set.
val_data_stage2 = [ex for ex in all_data_stage2 if ex["review"] in val_reviews]
train_data_stage2 = [ex for ex in all_data_stage2 if ex["review"] not in val_reviews]
print(f"Tập Stage 2 - train: {len(train_data_stage2)}, validation: {len(val_data_stage2)}")

# -----------------------------
# Stage 1: Prepare labels using MultiLabelBinarizer
# -----------------------------
all_aspects = ["AMBIENCE", "PRICE", "FOOD", "SERVICE", "DELIVERY"]
multi_aspect_binary = MultiLabelBinarizer(classes=all_aspects)
all_labels = multi_aspect_binary.fit_transform([item["aspects"] for item in all_data_stage1])
print("Các khía cạnh (Stage 1):", multi_aspect_binary.classes_)

# -----------------------------
# Dataset for Stage 1
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained(model_name)

class MultiAspectFeedbackDataset(TorchDataset):
    def __init__(self, data, labels):
        self.encodings = tokenizer([item["sentence"] for item in data],
                                   padding=True, truncation=True, max_length=128, return_tensors="pt")
        self.labels = torch.tensor(labels, dtype=torch.float)
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["aspects"] = self.labels[idx]
        return item

train_labels_stage1 = all_labels[train_indices]
val_labels_stage1 = all_labels[val_indices]

train_dataset_stage1 = MultiAspectFeedbackDataset(train_data_stage1, train_labels_stage1)
val_dataset_stage1 = MultiAspectFeedbackDataset(val_data_stage1, val_labels_stage1)

train_dataloader_stage1 = DataLoader(train_dataset_stage1, batch_size=16, shuffle=True)
val_dataloader_stage1 = DataLoader(val_dataset_stage1, batch_size=16, shuffle=False)

# -----------------------------
# Model for Stage 1: PhoBERTMultiLabelClassifier
# -----------------------------
class PhoBERTMultiLabelClassifier(nn.Module):
    def __init__(self, num_labels):
        super(PhoBERTMultiLabelClassifier, self).__init__()
        self.phobert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(self.phobert.config.hidden_size, num_labels)
    def forward(self, input_ids, attention_mask):
        outputs = self.phobert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0, :]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

# -----------------------------
# Load Stage 1 checkpoint (already trained)
# -----------------------------
model_stage1_demo = PhoBERTMultiLabelClassifier(num_labels=len(all_aspects)).to(device)
checkpoint_stage1 = "/kaggle/input/checkpoint-2-stage/absa_aspect_model.pt"  # Adjust checkpoint path if needed
model_stage1_demo.load_state_dict(torch.load(checkpoint_stage1), strict=False)
model_stage1_demo.to(device)
print("Model Stage 1 đã được load từ checkpoint.")

def extract_aspects_stage1(review_text, model, tokenizer, device, threshold=0.5):
    inputs = tokenizer(review_text, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
    inputs.pop("token_type_ids", None)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    model.eval()
    with torch.no_grad():
        logits = model(**inputs)
    probs = torch.sigmoid(logits)[0]
    predicted_indices = (probs > threshold).nonzero(as_tuple=True)[0].tolist()
    predicted_labels = [multi_aspect_binary.classes_[i] for i in predicted_indices]
    aspects = list(set(label.split("_")[0] for label in predicted_labels))
    return aspects

# -----------------------------
# Stage 2: Prepare Sentiment Classification Data
# -----------------------------
def load_and_preprocess_sentiment(file_path):
    examples = []
    content = read_file(file_path)
    sample_texts = re.split(r'#\d+\s*', content)[1:]
    for sample in sample_texts:
        lines = sample.strip().splitlines()
        if len(lines) < 2:
            continue
        # Use line 1 as review (same as Stage 1)
        review = lines[0].strip().lower()
        aspects_line = lines[1].strip()
        matches = re.findall(r"\{([^}]+)\}", aspects_line)
        for match in matches:
            parts = [p.strip() for p in match.split(",")]
            if len(parts) >= 2:
                aspect = parts[0]
                sentiment = parts[1].lower()
                if sentiment in label_map:
                    examples.append({
                        "review": review,
                        "aspect": aspect,
                        "label": label_map[sentiment]
                    })
    return examples

all_sentiment_examples = load_and_preprocess_sentiment(data_path)
print(f"Stage 2: Đã tải {len(all_sentiment_examples)} ví dụ cho phân loại cảm xúc.")

# Use the common validation set from Stage 1: filter Stage 2 examples accordingly.
val_data_stage2 = [ex for ex in all_sentiment_examples if ex["review"] in val_reviews]
train_data_stage2 = [ex for ex in all_sentiment_examples if ex["review"] not in val_reviews]
print(f"Tập Stage 2 - train: {len(train_data_stage2)}, validation: {len(val_data_stage2)}")

def build_ground_truth(val_examples):
    gt_dict = {}
    for ex in val_examples:
        review = ex["review"]
        aspect = ex["aspect"]
        sentiment_str = inv_label_map[ex["label"]]
        if review not in gt_dict:
            gt_dict[review] = {}
        gt_dict[review][aspect] = sentiment_str
    return gt_dict

val_gt = build_ground_truth(val_data_stage2)
val_df = pd.DataFrame(list(val_gt.items()), columns=["review", "ground_truth"])
print("Tập validation ground truth (Stage 2):")
print(val_df.head())

# For Stage 2 training (if needed), create a Hugging Face Dataset from all Stage 2 examples.
raw_dataset_stage2 = Dataset.from_dict({
    "review": [ex["review"] for ex in all_sentiment_examples],
    "aspect": [ex["aspect"] for ex in all_sentiment_examples],
    "label": [ex["label"] for ex in all_sentiment_examples]
})

def combine_review_aspect(example):
    example["text"] = "Review: " + example["review"] + " | Aspect: " + example["aspect"]
    return example

dataset_stage2 = raw_dataset_stage2.map(combine_review_aspect)
print(dataset_stage2)

# Tokenize Stage 2 data
tokenizer = AutoTokenizer.from_pretrained(model_name)
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)
tokenized_dataset_stage2 = dataset_stage2.map(tokenize_function, batched=True)
# We do not re-split here for evaluation; we already have val_data_stage2.
print("Tập Stage 2 - train (if needed):", len([ex for ex in all_sentiment_examples if ex["review"] not in val_reviews]))
print("Tập Stage 2 - validation (common):", len(val_data_stage2))

# -----------------------------
# Evaluate Combined Pipeline on Stage 2 Validation Set
# -----------------------------
def build_ground_truth(val_examples):
    gt_dict = {}
    for ex in val_examples:
        review = ex["review"]
        aspect = ex["aspect"]
        sentiment_str = inv_label_map[ex["label"]]
        if review not in gt_dict:
            gt_dict[review] = {}
        gt_dict[review][aspect] = sentiment_str
    return gt_dict

val_gt = build_ground_truth(val_data_stage2)
val_df = pd.DataFrame(list(val_gt.items()), columns=["review", "ground_truth"])
print("Tập validation ground truth (Stage 2):")
print(val_df.head())

pred_list = []
for review in val_df["review"]:
    pred = demo_full_pipeline(review, model_stage1_demo, model_stage2, tokenizer, device, threshold=0.5)
    pred_list.append({"review": review, "predicted": pred})
pred_df = pd.DataFrame(pred_list)
print("Tập dự đoán của pipeline:")
print(pred_df.head())

matches = 0
total = len(val_df)
for idx in range(total):
    gt = val_df.loc[idx, "ground_truth"]
    pred = pred_df.loc[idx, "predicted"]
    if gt == pred:
        matches += 1
if total > 0:
    combined_accuracy = matches / total
    print(f"Accuracy kết hợp trên tập validation: {combined_accuracy:.4f}")
    print(f"Số review trong tập validation: {total}, Số review dự đoán đúng: {matches}")
else:
    print("Không có review nào trong tập validation Stage 2.")

# -----------------------------
# Separate Evaluation: Stage 1 Only
# -----------------------------
def evaluate_stage1(model, data, labels, batch_size=16):
    model.eval()
    all_preds = []
    all_true = []
    dataset = MultiAspectFeedbackDataset(data, labels)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    with torch.no_grad():
        for batch in tqdm(loader, desc="Evaluating Stage 1"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            true_labels = batch["aspects"].cpu().numpy()
            outputs = model(input_ids, attention_mask)
            # Since our model returns logits directly (a Tensor), use it.
            probs = torch.sigmoid(outputs).cpu().numpy()
            all_preds.append(probs)
            all_true.append(true_labels)
    all_preds = np.vstack(all_preds)
    all_true = np.vstack(all_true)
    threshold = 0.5
    preds_binary = (all_preds > threshold).astype(int)
    f1 = f1_score(all_true, preds_binary, average="micro")
    acc = accuracy_score(all_true, preds_binary)
    precision = precision_score(all_true, preds_binary, average="micro", zero_division=0)
    recall = recall_score(all_true, preds_binary, average="micro", zero_division=0)
    print(f"Stage 1 - F1 Score: {f1:.4f}, Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")
    model.train()

print("\nĐánh giá riêng mô hình Stage 1 trên tập validation:")
evaluate_stage1(model_stage1_demo, val_data_stage1, val_labels_stage1)


Tập Stage 1 - train: 9904, validation: 2477
Tập validation (Stage 1) có 2477 review.
Tập Stage 2 - train: 23130, validation: 5818
Các khía cạnh (Stage 1): ['AMBIENCE' 'PRICE' 'FOOD' 'SERVICE' 'DELIVERY']


  model_stage1_demo.load_state_dict(torch.load(checkpoint_stage1), strict=False)


Model Stage 1 đã được load từ checkpoint.
Stage 2: Đã tải 28948 ví dụ cho phân loại cảm xúc.
Tập Stage 2 - train: 23130, validation: 5818
Tập validation ground truth (Stage 2):
                                              review  \
0  ăn rất ngon được phục vụ chu đáo với 2 cô chú ...   
1           nha hang phuc vu cac mon an rat ngon nha   
2  một buổi chiều trước giờ đi làm được ăn bún đậ...   
3  trở lại quán vào buổi trưa nên không có khách ...   
4  đúng là cà phê rang mộc không tầm trộn khi uốn...   

                                        ground_truth  
0  {'FOOD': 'positive', 'SERVICE': 'positive', 'P...  
1                               {'FOOD': 'positive'}  
2                               {'FOOD': 'positive'}  
3         {'FOOD': 'positive', 'SERVICE': 'neutral'}  
4       {'FOOD': 'positive', 'AMBIENCE': 'positive'}  


Map:   0%|          | 0/28948 [00:00<?, ? examples/s]

Dataset({
    features: ['review', 'aspect', 'label', 'text'],
    num_rows: 28948
})


Map:   0%|          | 0/28948 [00:00<?, ? examples/s]

Tập Stage 2 - train (if needed): 23130
Tập Stage 2 - validation (common): 5818
Tập validation ground truth (Stage 2):
                                              review  \
0  ăn rất ngon được phục vụ chu đáo với 2 cô chú ...   
1           nha hang phuc vu cac mon an rat ngon nha   
2  một buổi chiều trước giờ đi làm được ăn bún đậ...   
3  trở lại quán vào buổi trưa nên không có khách ...   
4  đúng là cà phê rang mộc không tầm trộn khi uốn...   

                                        ground_truth  
0  {'FOOD': 'positive', 'SERVICE': 'positive', 'P...  
1                               {'FOOD': 'positive'}  
2                               {'FOOD': 'positive'}  
3         {'FOOD': 'positive', 'SERVICE': 'neutral'}  
4       {'FOOD': 'positive', 'AMBIENCE': 'positive'}  
Tập dự đoán của pipeline:
                                              review  \
0  ăn rất ngon được phục vụ chu đáo với 2 cô chú ...   
1           nha hang phuc vu cac mon an rat ngon nha   
2  một buổi chiều trư

Evaluating Stage 1: 100%|██████████| 155/155 [00:08<00:00, 18.66it/s]

Stage 1 - F1 Score: 0.9625, Accuracy: 0.8345, Precision: 0.9297, Recall: 0.9978



