In [1]:
import re
import torch
import numpy as np
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset as TorchDataset, DataLoader
from sklearn.preprocessing import MultiLabelBinarizer
from transformers import AutoModel

# -----------------------------
# ƒê·ªãnh nghƒ©a bi·∫øn device v√† model_name
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "vinai/phobert-base"

# -----------------------------
# Khai b√°o √°nh x·∫° nh√£n c·∫£m x√∫c (Stage 2)
# -----------------------------
label_map = {"negative": 0, "neutral": 1, "positive": 2}
inv_label_map = {v: k for k, v in label_map.items()}

# =============================
# Stage 1: Hu·∫•n luy·ªán m√¥ h√¨nh tr√≠ch xu·∫•t kh√≠a c·∫°nh
# =============================

def read_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read().strip()
    except Exception as e:
        print(f"Error: {e}")
        return ""

def split_aspect(aspects):
    list_sa = []
    aspect_pairs = aspects.split('}, {')
    for pair in aspect_pairs:
        pair = pair.replace('{', '').replace('}', '')
        parts = pair.split(', ')
        if len(parts) == 2:
            aspect, sentiment = parts
            list_sa.append([aspect.strip(), sentiment.strip()])
        else:
            continue
    return list_sa

def list_data(file, aspect_list):
    opinions = file.split('\n\n')
    list_sa = []
    for opinion in opinions:
        if not opinion.strip():
            continue
        lines = opinion.split("\n")
        if len(lines) < 3:
            continue        
        num, sentence, aspects = lines[0], lines[1], lines[2]
        aspects = split_aspect(aspects)
        if not aspects or len(aspects) < 2:
            continue           
        list_temp = [aspect[0] for aspect in aspects]
        for aspect, sentiment in aspects:
            item = {"sentence": sentence, "aspects": list_temp}
            list_sa.append(item)
    return list_sa

# ƒê∆∞·ªùng d·∫´n t·ªõi file d·ªØ li·ªáu (Stage 1 v√† Stage 2 d√πng chung)
data_path = "/kaggle/input/food-review/final_data.txt"
data_content = read_file(data_path)
all_aspects = ["AMBIENCE", "PRICE", "FOOD", "SERVICE", "DELIVERY"]
all_data = list_data(data_content, all_aspects)

# S·ª≠ d·ª•ng MultiLabelBinarizer ƒë·ªÉ m√£ h√≥a c√°c nh√£n kh√≠a c·∫°nh (Stage 1)
tokenizer = AutoTokenizer.from_pretrained(model_name)
multi_aspect_binary = MultiLabelBinarizer(classes=all_aspects)
aspects_encoded = multi_aspect_binary.fit_transform([item["aspects"] for item in all_data])
print("C√°c kh√≠a c·∫°nh:", multi_aspect_binary.classes_)

# ƒê·ªãnh nghƒ©a Dataset cho Stage 1
class MultiAspectFeedbackDataset(TorchDataset):
    def __init__(self, data, labels):
        self.encodings = tokenizer([item["sentence"] for item in data],
                                   padding=True, truncation=True, max_length=128, return_tensors="pt")
        self.labels = torch.tensor(labels, dtype=torch.float)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["aspects"] = self.labels[idx]
        return item

# Chia d·ªØ li·ªáu th√†nh train v√† validation cho Stage 1
train_sentences, val_sentences, train_aspects, val_aspects = train_test_split(
    all_data, aspects_encoded, test_size=0.2, random_state=42
)
train_dataset_stage1 = MultiAspectFeedbackDataset(train_sentences, train_aspects)
val_dataset_stage1 = MultiAspectFeedbackDataset(val_sentences, val_aspects)

train_dataloader_stage1 = DataLoader(train_dataset_stage1, batch_size=16, shuffle=True)
val_dataloader_stage1 = DataLoader(val_dataset_stage1, batch_size=16, shuffle=False)

# ƒê·ªãnh nghƒ©a m√¥ h√¨nh PhoBERT v·ªõi multi-label classification cho Stage 1
class PhoBERTMultiLabelClassifier(nn.Module):
    def __init__(self, num_labels):
        super(PhoBERTMultiLabelClassifier, self).__init__()
        self.phobert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(self.phobert.config.hidden_size, num_labels)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.phobert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0, :]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

# Kh·ªüi t·∫°o model Stage 1
model_stage1 = PhoBERTMultiLabelClassifier(num_labels=len(all_aspects)).to(device)
criterion_stage1 = nn.BCEWithLogitsLoss()
optimizer_stage1 = optim.AdamW(model_stage1.parameters(), lr=5e-5)

def train_stage1(model, train_dataloader, device="cuda", epochs=5):
    model.train()
    model.to(device)
    for epoch in range(epochs):
        total_loss = 0
        print(f"\nEpoch {epoch+1}/{epochs} - Stage 1")
        progress_bar = tqdm(train_dataloader, total=len(train_dataloader), desc=f"Epoch {epoch+1}")
        for batch in progress_bar:
            optimizer_stage1.zero_grad()
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["aspects"].to(device)
            logits = model(input_ids, attention_mask)
            loss = criterion_stage1(logits, labels)
            loss.backward()
            optimizer_stage1.step()
            total_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())
        avg_loss = total_loss / len(train_dataloader)
        print(f"Stage 1 - Epoch {epoch+1} - Avg Loss: {avg_loss:.4f}")
    return model

# Hu·∫•n luy·ªán Stage 1 (b·ªè qua b∆∞·ªõc n√†y n·∫øu ƒë√£ hu·∫•n luy·ªán)
model_stage1 = train_stage1(model_stage1, train_dataloader_stage1, device=device, epochs=5)
# L∆∞u model Stage 1 ƒë√£ hu·∫•n luy·ªán
torch.save(model_stage1.state_dict(), "./absa_aspect_model.pt")

# ----------------------------
# H√†m tr√≠ch xu·∫•t kh√≠a c·∫°nh s·ª≠ d·ª•ng model Stage 1 ƒë√£ hu·∫•n luy·ªán
# ----------------------------
def extract_aspects_stage1(review_text, model, tokenizer, device, threshold=0.5):
    """
    S·ª≠ d·ª•ng model Stage 1 ƒë√£ hu·∫•n luy·ªán ƒë·ªÉ tr√≠ch xu·∫•t kh√≠a c·∫°nh t·ª´ review.
    - Token h√≥a review.
    - D·ª± ƒëo√°n c√°c nh√£n multi-label (v√≠ d·ª•: "FOOD_positive", "SERVICE_negative", ...)
    - √Åp d·ª•ng sigmoid v√† threshold ƒë·ªÉ x√°c ƒë·ªãnh nh√£n k√≠ch ho·∫°t.
    - T·ª´ ƒë√≥, tr√≠ch xu·∫•t t√™n kh√≠a c·∫°nh (ph·∫ßn tr∆∞·ªõc d·∫•u g·∫°ch d∆∞·ªõi).
    """
    inputs = tokenizer(review_text, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
    if "token_type_ids" in inputs:
        inputs.pop("token_type_ids")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    model.eval()
    with torch.no_grad():
        logits = model(**inputs)
    probs = torch.sigmoid(logits)[0]
    predicted_indices = (probs > threshold).nonzero(as_tuple=True)[0].tolist()
    predicted_labels = [multi_aspect_binary.classes_[i] for i in predicted_indices]
    aspects = list(set(label.split("_")[0] for label in predicted_labels))
    return aspects

# =============================
# Stage 2: Hu·∫•n luy·ªán m√¥ h√¨nh ph√¢n lo·∫°i c·∫£m x√∫c
# =============================

def load_and_preprocess_sentiment(file_path):
    """
    ƒê·ªçc v√† ti·ªÅn x·ª≠ l√Ω d·ªØ li·ªáu t·ª´ final_data.txt.
    V·ªõi m·ªói c·∫∑p {aspect, sentiment}, t·∫°o v√≠ d·ª• v·ªõi: "review", "aspect", "label".
    """
    examples = []
    with open(file_path, encoding="utf-8") as f:
        content = f.read().strip()
    sample_texts = re.split(r'#\d+\s*\n', content)[1:]
    for sample in sample_texts:
        lines = sample.strip().splitlines()
        if len(lines) < 2:
            continue
        review = " ".join(lines[:-1]).strip().lower()
        labels_line = lines[-1].strip()
        matches = re.findall(r"\{([^}]+)\}", labels_line)
        for match in matches:
            parts = [p.strip() for p in match.split(",")]
            if len(parts) >= 2:
                aspect = parts[0]
                sentiment = parts[1].lower()
                if sentiment in label_map:
                    examples.append({
                        "review": review,
                        "aspect": aspect,
                        "label": label_map[sentiment]
                    })
    return examples

sentiment_examples = load_and_preprocess_sentiment(data_path)
print(f"Stage 2: ƒê√£ t·∫£i {len(sentiment_examples)} v√≠ d·ª• cho ph√¢n lo·∫°i c·∫£m x√∫c.")

if len(sentiment_examples) == 0:
    raise ValueError("Kh√¥ng c√≥ v√≠ d·ª• cho Stage 2. Ki·ªÉm tra l·∫°i file d·ªØ li·ªáu v√† h√†m ti·ªÅn x·ª≠ l√Ω.")

train_sentiment, val_sentiment = train_test_split(sentiment_examples, test_size=0.2, random_state=42)
print(f"T·∫≠p Stage 2 - train: {len(train_sentiment)}, val: {len(val_sentiment)}")

def build_ground_truth(val_examples):
    gt_dict = {}
    for ex in val_examples:
        review = ex["review"]
        aspect = ex["aspect"]
        sentiment_str = inv_label_map[ex["label"]]
        if review not in gt_dict:
            gt_dict[review] = {}
        gt_dict[review][aspect] = sentiment_str
    return gt_dict

val_gt = build_ground_truth(val_sentiment)
val_df = pd.DataFrame(list(val_gt.items()), columns=["review", "ground_truth"])
print("T·∫≠p validation ground truth (Stage 2):")
print(val_df.head())

raw_dataset_stage2 = Dataset.from_dict({
    "review": [ex["review"] for ex in sentiment_examples],
    "aspect": [ex["aspect"] for ex in sentiment_examples],
    "label": [ex["label"] for ex in sentiment_examples]
})

def combine_review_aspect(example):
    example["text"] = "Review: " + example["review"] + " | Aspect: " + example["aspect"]
    return example

dataset_stage2 = raw_dataset_stage2.map(combine_review_aspect)
print(dataset_stage2)

tokenizer = AutoTokenizer.from_pretrained(model_name)
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)
tokenized_dataset_stage2 = dataset_stage2.map(tokenize_function, batched=True)
split_dataset_stage2 = tokenized_dataset_stage2.train_test_split(test_size=0.2, seed=42)
train_dataset_stage2 = split_dataset_stage2["train"]
val_dataset_stage2 = split_dataset_stage2["test"]

# Load model Stage 2 t·ª´ base model v√† load tr·ªçng s·ªë encoder t·ª´ checkpoint Stage 1.
checkpoint_path = "./absa_aspect_model.pt"
model_stage2 = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    problem_type="single_label_classification",
    ignore_mismatched_sizes=True
)
state_dict = torch.load(checkpoint_path)
new_state_dict = {k: v for k, v in state_dict.items() if not k.startswith("classifier")}
model_stage2.load_state_dict(new_state_dict, strict=False)
model_stage2.to(device)

training_args_stage2 = TrainingArguments(
    output_dir="./aspect_sentiment_model",
    num_train_epochs=5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    report_to=[]
)

def compute_metrics_stage2(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    accuracy = (predictions == labels).mean()
    return {"accuracy": accuracy}

trainer_stage2 = Trainer(
    model=model_stage2,
    args=training_args_stage2,
    train_dataset=train_dataset_stage2,
    eval_dataset=val_dataset_stage2,
    compute_metrics=compute_metrics_stage2
)

trainer_stage2.train()
trainer_stage2.save_model("./aspect_sentiment_model")

# =============================
# Demo pipeline ABSA t√≠ch h·ª£p (Stage 1 + Stage 2)
# =============================

def predict_sentiment_stage2(review_text, aspect, model_stage2, tokenizer, device):
    input_text = "Review: " + review_text.lower() + " | Aspect: " + aspect
    inputs = tokenizer(input_text, truncation=True, padding="max_length", max_length=256, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    model_stage2.eval()
    with torch.no_grad():
        outputs = model_stage2(**inputs)
    pred = outputs.logits.argmax(dim=-1).item()
    return inv_label_map[pred]

def demo_full_pipeline(review_text, model_stage1, model_stage2, tokenizer, device, threshold=0.5):
    # Stage 1: S·ª≠ d·ª•ng model Stage 1 ƒë√£ hu·∫•n luy·ªán ƒë·ªÉ tr√≠ch xu·∫•t kh√≠a c·∫°nh
    aspects = extract_aspects_stage1(review_text, model_stage1, tokenizer, device, threshold)
    if not aspects:
        return {}
    sentiments = {}
    # Stage 2: D·ª± ƒëo√°n c·∫£m x√∫c cho t·ª´ng kh√≠a c·∫°nh
    for aspect in aspects:
        sentiment = predict_sentiment_stage2(review_text, aspect, model_stage2, tokenizer, device)
        sentiments[aspect] = sentiment
    return sentiments

# Load l·∫°i model Stage 1 cho ph·∫ßn demo t·ª´ checkpoint
model_stage1_demo = PhoBERTMultiLabelClassifier(num_labels=len(all_aspects)).to(device)
model_stage1_demo.load_state_dict(torch.load("./absa_aspect_model.pt"), strict=False)
model_stage1_demo.to(device)

# Demo pipeline cho m·ªôt review m·∫´u
sample_review = "Kh·∫©u v·ªã v·ª´a ƒÉn h·ª£p v·ªá sinh, kh√¥ng gian qu√°n r·ªông view c≈©ng t·∫°m ƒë∆∞·ª£c. ƒê·∫∑c bi·ªát l√† ph·ª•c v·ª• r·∫•t nhi·ªát t√¨nh v√† vui v·∫ª."
demo_result = demo_full_pipeline(sample_review, model_stage1_demo, model_stage2, tokenizer, device, threshold=0.5)
print("Demo pipeline ABSA cho review m·∫´u:")
print("Review:", sample_review)
print("K·∫øt qu·∫£ d·ª± ƒëo√°n:", demo_result)

# ----------------------------
# ƒê√°nh gi√° k·∫øt h·ª£p tr√™n t·∫≠p validation (Stage 2)
# ----------------------------
def build_ground_truth(val_examples):
    gt_dict = {}
    for ex in val_examples:
        review = ex["review"]
        aspect = ex["aspect"]
        sentiment_str = inv_label_map[ex["label"]]
        if review not in gt_dict:
            gt_dict[review] = {}
        gt_dict[review][aspect] = sentiment_str
    return gt_dict

val_gt = build_ground_truth(val_sentiment)
val_df = pd.DataFrame(list(val_gt.items()), columns=["review", "ground_truth"])
print("T·∫≠p validation ground truth (Stage 2):")
print(val_df.head())

pred_list = []
for review in val_df["review"]:
    pred = demo_full_pipeline(review, model_stage1_demo, model_stage2, tokenizer, device, threshold=0.5)
    pred_list.append({"review": review, "predicted": pred})
pred_df = pd.DataFrame(pred_list)
print("T·∫≠p d·ª± ƒëo√°n c·ªßa pipeline:")
print(pred_df.head())

matches = 0
total = len(val_df)
for idx in range(total):
    gt = val_df.loc[idx, "ground_truth"]
    pred = pred_df.loc[idx, "predicted"]
    if gt == pred:
        matches += 1
combined_accuracy = matches / total
print(f"Accuracy k·∫øt h·ª£p tr√™n t·∫≠p validation: {combined_accuracy:.4f}")
print(f"S·ªë review trong t·∫≠p validation: {total}, S·ªë review d·ª± ƒëo√°n ƒë√∫ng: {matches}")

# ----------------------------
# ƒê√°nh gi√° ri√™ng Stage 1 tr√™n t·∫≠p validation (Stage 1)
# ----------------------------
def evaluate_stage1(model, dataloader, device="cuda"):
    model.eval()
    model.to(device)
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating Stage 1"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["aspects"].cpu().numpy()
            outputs = model(input_ids, attention_mask)
            probs = torch.sigmoid(outputs.logits).cpu().numpy()
            all_preds.append(probs)
            all_labels.append(labels)
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    threshold = 0.5
    preds_binary = (all_preds > threshold).astype(int)
    f1 = f1_score(all_labels, preds_binary, average="micro")
    acc = accuracy_score(all_labels, preds_binary)
    print(f"Stage 1 - F1 Score: {f1:.4f}, Accuracy: {acc:.4f}")
    model.train()

print("\nƒê√°nh gi√° ri√™ng m√¥ h√¨nh Stage 1 tr√™n t·∫≠p validation:")
evaluate_stage1(model_stage1, val_dataloader_stage1, device=device)


config.json:   0%|          | 0.00/557 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/895k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.14M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.13M [00:00<?, ?B/s]

C√°c kh√≠a c·∫°nh: ['AMBIENCE' 'PRICE' 'FOOD' 'SERVICE' 'DELIVERY']


pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]


Epoch 1/5 - Stage 1


Epoch 1: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1322/1322 [04:23<00:00,  5.02it/s, loss=0.198]


Stage 1 - Epoch 1 - Avg Loss: 0.1675

Epoch 2/5 - Stage 1


Epoch 2: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1322/1322 [04:22<00:00,  5.04it/s, loss=0.108]


Stage 1 - Epoch 2 - Avg Loss: 0.0843

Epoch 3/5 - Stage 1


Epoch 3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1322/1322 [04:22<00:00,  5.04it/s, loss=0.0764]


Stage 1 - Epoch 3 - Avg Loss: 0.0577

Epoch 4/5 - Stage 1


Epoch 4: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1322/1322 [04:22<00:00,  5.04it/s, loss=0.0947]


Stage 1 - Epoch 4 - Avg Loss: 0.0332

Epoch 5/5 - Stage 1


Epoch 5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1322/1322 [04:22<00:00,  5.03it/s, loss=0.0753]


Stage 1 - Epoch 5 - Avg Loss: 0.0273
Stage 2: ƒê√£ t·∫£i 28948 v√≠ d·ª• cho ph√¢n lo·∫°i c·∫£m x√∫c.
T·∫≠p Stage 2 - train: 23158, val: 5790
T·∫≠p validation ground truth (Stage 2):
                                              review          ground_truth
0  l·∫ßn n√†o ƒëi ƒël m√¨nh c≈©ng gh√© ƒë√¢y ƒÉn tr∆∞a th·ª©c ƒÉ...  {'FOOD': 'positive'}
1  qu√°n n√†y l·∫ßn ƒë·∫ßu ƒÉn ƒëi h∆°i kh√≥ t√¨m qu√†n n·∫±m tr...  {'FOOD': 'positive'}
2  c·∫£nh ƒë·∫πp n∆∞·ªõc u·ªëng kh√¥ng ngon minh u·ªëng n∆∞·ªõc s...  {'FOOD': 'negative'}
3  qu√°n l√∫c n√†o c≈©ng ƒë√¥ng kkk l·∫©u b√≤ ngon l√∫c ƒë·∫ßu...  {'FOOD': 'positive'}
4  ƒë√† l·∫°t c√≥ r·∫•t nhi·ªÅu qu√°n ph·ªü trong ƒë√≥ t√¥i ƒë√£ ƒë...  {'FOOD': 'positive'}


Map:   0%|          | 0/28948 [00:00<?, ? examples/s]

Dataset({
    features: ['review', 'aspect', 'label', 'text'],
    num_rows: 28948
})


Map:   0%|          | 0/28948 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  state_dict = torch.load(checkpoint_path)


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7482,0.669337,0.749396
2,0.4823,0.480324,0.866149
3,0.2934,0.359359,0.888428
4,0.4263,0.389795,0.897582
5,0.1613,0.379433,0.907081


  model_stage1_demo.load_state_dict(torch.load("./absa_aspect_model.pt"), strict=False)


Demo pipeline ABSA cho review m·∫´u:
Review: Kh·∫©u v·ªã v·ª´a ƒÉn h·ª£p v·ªá sinh, kh√¥ng gian qu√°n r·ªông view c≈©ng t·∫°m ƒë∆∞·ª£c. ƒê·∫∑c bi·ªát l√† ph·ª•c v·ª• r·∫•t nhi·ªát t√¨nh v√† vui v·∫ª.
K·∫øt qu·∫£ d·ª± ƒëo√°n: {'FOOD': 'neutral', 'AMBIENCE': 'positive', 'SERVICE': 'positive'}
T·∫≠p validation ground truth (Stage 2):
                                              review          ground_truth
0  l·∫ßn n√†o ƒëi ƒël m√¨nh c≈©ng gh√© ƒë√¢y ƒÉn tr∆∞a th·ª©c ƒÉ...  {'FOOD': 'positive'}
1  qu√°n n√†y l·∫ßn ƒë·∫ßu ƒÉn ƒëi h∆°i kh√≥ t√¨m qu√†n n·∫±m tr...  {'FOOD': 'positive'}
2  c·∫£nh ƒë·∫πp n∆∞·ªõc u·ªëng kh√¥ng ngon minh u·ªëng n∆∞·ªõc s...  {'FOOD': 'negative'}
3  qu√°n l√∫c n√†o c≈©ng ƒë√¥ng kkk l·∫©u b√≤ ngon l√∫c ƒë·∫ßu...  {'FOOD': 'positive'}
4  ƒë√† l·∫°t c√≥ r·∫•t nhi·ªÅu qu√°n ph·ªü trong ƒë√≥ t√¥i ƒë√£ ƒë...  {'FOOD': 'positive'}
T·∫≠p d·ª± ƒëo√°n c·ªßa pipeline:
                                              review  \
0  l·∫ßn n√†o ƒëi ƒël m√¨nh c≈©ng gh√© ƒë√¢y ƒÉn t

Evaluating Stage 1:   0%|          | 0/331 [00:00<?, ?it/s]


AttributeError: 'Tensor' object has no attribute 'logits'