<a href="https://colab.research.google.com/github/go-hyun77/ABSA/blob/f1-scoring-optimize/ABSA_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [57]:
# Aspect-Based Sentiment Analysis (ABSA) with T5
# --------------------------------------------------
# This notebook shows how to fine-tune a T5 model for ABSA using HuggingFace.
# SemEval2014 dataset (aspect + sentiment annotations).

!pip install transformers datasets sentencepiece -q
!pip install datasets==3.6.0

import pandas as pd
import numpy as np
from datasets import load_dataset
from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [58]:
# Load Dataset

dataset = load_dataset("alexcadillon/SemEval2014Task4", "restaurants")

In [59]:
# examine dataset
train_data = dataset["train"]

# print first 10 entries of train split
for i in range(10):
    print(f"{i+1}: {train_data[i]}")


1: {'sentenceId': '3121', 'text': 'But the staff was so horrible to us.', 'aspectTerms': [{'term': 'staff', 'polarity': 'negative', 'from': '8', 'to': '13'}], 'aspectCategories': [{'category': 'service', 'polarity': 'negative'}]}
2: {'sentenceId': '2777', 'text': "To be completely fair, the only redeeming factor was the food, which was above average, but couldn't make up for all the other deficiencies of Teodora.", 'aspectTerms': [{'term': 'food', 'polarity': 'positive', 'from': '57', 'to': '61'}], 'aspectCategories': [{'category': 'food', 'polarity': 'positive'}, {'category': 'anecdotes/miscellaneous', 'polarity': 'negative'}]}
3: {'sentenceId': '1634', 'text': "The food is uniformly exceptional, with a very capable kitchen which will proudly whip up whatever you feel like eating, whether it's on the menu or not.", 'aspectTerms': [{'term': 'food', 'polarity': 'positive', 'from': '4', 'to': '8'}, {'term': 'kitchen', 'polarity': 'positive', 'from': '55', 'to': '62'}, {'term': 'menu', 'p

In [60]:
# flatten dataset
indexes = [train_data[i] for i in range(20)]  # first 20 entries


rows = []
for i in indexes:
    sentence_id = i["sentenceId"]
    text = i["text"]

    # If aspect terms exist, iterate through them
    if i["aspectTerms"]:
        for asp in i["aspectTerms"]:
            rows.append({
                "sentenceId": sentence_id,
                "text": text,
                "aspect_term": asp["term"],
                "term_polarity": asp["polarity"],
                "category": None,  # Add these to maintain consistent columns
                "category_polarity": None # Add these to maintain consistent columns
            })
    # If no explicit aspect terms, still record categories
    if i["aspectCategories"]:
        for cat in i["aspectCategories"]:
            rows.append({
                "sentenceId": sentence_id,
                "text": text,
                "aspect_term": None, # Add these to maintain consistent columns
                "term_polarity": None, # Add these to maintain consistent columns
                "category": cat["category"],
                "category_polarity": cat["polarity"]
            })


# Convert to DataFrame
df = pd.DataFrame(rows)
print(df.head(10))

  sentenceId                                               text aspect_term  \
0       3121               But the staff was so horrible to us.       staff   
1       3121               But the staff was so horrible to us.        None   
2       2777  To be completely fair, the only redeeming fact...        food   
3       2777  To be completely fair, the only redeeming fact...        None   
4       2777  To be completely fair, the only redeeming fact...        None   
5       1634  The food is uniformly exceptional, with a very...        food   
6       1634  The food is uniformly exceptional, with a very...     kitchen   
7       1634  The food is uniformly exceptional, with a very...        menu   
8       1634  The food is uniformly exceptional, with a very...        None   
9       2534  Where Gabriela personaly greets you and recomm...        None   

  term_polarity                 category category_polarity  
0      negative                     None              None  
1       

In [61]:
#define model

model_name = "t5-small" #try "google/flan-t5-base" for better results
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

In [62]:
#create aspect-sentiment pairs from dataset
import json

def format_target(ex):
    """
    Build a T5-safe target string like:
      "aspect=food, sentiment=positive; aspect=service, sentiment=negative"
    or "none" if no aspects.
    """
    pairs = []
    for asp in ex.get("aspectTerms", []):
        term = asp.get("term")
        pol = asp.get("polarity")
        if term is None or pol is None:
            continue
        # normalize
        pairs.append(f"aspect={term.strip()}, sentiment={pol.strip().lower()}")
    return "; ".join(pairs) if pairs else "none"



In [93]:
#function to tokenize inputs (as in the plain sentences + aspect terms/values) for model to train on

def preprocess(ex):
    instruction = (
        "Extract aspect-based sentiment. "
        "Return outputs in the exact format: "
        "'aspect=<term>, sentiment=<positive|negative|neutral>' "
        "separated by '; ' for multiple aspects. If no aspects, return 'none'.\n\n"
    )

    # INCLUDE instruction in the input
    input_text = instruction + "ABSA: " + ex["text"]

    target_text = format_target(ex)

    model_inputs = tokenizer(
        input_text,
        text_target=target_text,
        padding="max_length",
        truncation=True,
        max_length=128
    )

    # Add raw text and aspectTerms to the processed example for evaluation
    model_inputs["raw_text"] = ex["text"]
    model_inputs["raw_aspects"] = ex.get("aspectTerms", [])

    return model_inputs

In [None]:
#apply preprocess function to each entry in training and validation test splits
train_dataset = dataset["train"].map(preprocess, remove_columns=[])
valid_dataset = dataset["test"].map(preprocess, remove_columns=[])

In [95]:
# Set PyTorch format (so Trainer can use them directly)
train_dataset.set_format(type="torch")
valid_dataset.set_format(type="torch")

In [96]:
# quick verify format_target on one raw example
print("RAW example:", dataset["train"][0])               #loaded HF dataset
print("FORMATTED target:", format_target(dataset["train"][0]))

RAW example: {'sentenceId': '3121', 'text': 'But the staff was so horrible to us.', 'aspectTerms': [{'term': 'staff', 'polarity': 'negative', 'from': '8', 'to': '13'}], 'aspectCategories': [{'category': 'service', 'polarity': 'negative'}]}
FORMATTED target: aspect=staff, sentiment=negative


In [97]:
# Quick decode check (ensure tokenizer didn't strip or change the target)
print("Decoded input (train[0]):")
print(tokenizer.decode(train_dataset[0]["input_ids"], skip_special_tokens=True))
print("Decoded target (train[0]):")
print(tokenizer.decode(train_dataset[0]["labels"], skip_special_tokens=True))

Decoded input (train[0]):
Extract aspect-based sentiment. Return outputs in the exact format: 'aspect=term>, sentiment=positive|negative|neutral>' separated by ';'for multiple aspects. If no aspects, return 'none'. ABSA: But the staff was so horrible to us.
Decoded target (train[0]):
aspect=staff, sentiment=negative


In [68]:
#sanity check, labels of first index of training data set
print(tokenizer.decode(train_dataset[0]["labels"], skip_special_tokens=True))

aspect=staff, sentiment=negative


In [69]:
#sanity check, input of training data post-preprocessing
print(tokenizer.decode(train_dataset[0]["input_ids"], skip_special_tokens=True))

Extract aspect-based sentiment. Return outputs in the exact format: 'aspect=term>, sentiment=positive|negative|neutral>' separated by ';'for multiple aspects. If no aspects, return 'none'. ABSA: But the staff was so horrible to us.


In [70]:
#sanity check, raw format_target output
print(format_target(dataset["train"][0]))


aspect=staff, sentiment=negative


In [71]:
#load model
model = T5ForConditionalGeneration.from_pretrained(model_name)

In [72]:
#training setup and parameters

args = TrainingArguments(
    output_dir="./absa_t5",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=6,
    weight_decay=0.01,
    save_total_limit=2,
    logging_steps=50,
    fp16=False,    # set True if your GPU supports it
    push_to_hub=False,
)


trainer = Trainer(
  model=model,
  args=args,
  train_dataset=train_dataset,
  eval_dataset=valid_dataset,
)

In [73]:
#train model, no need to execute this block if loading saved model
trainer.train()



Epoch,Training Loss,Validation Loss
1,0.1632,0.086505
2,0.0765,0.058833
3,0.0693,0.051112
4,0.0713,0.048222
5,0.0546,0.046494
6,0.0567,0.045654




TrainOutput(global_step=2286, training_loss=0.3016708865044728, metrics={'train_runtime': 25072.7909, 'train_samples_per_second': 0.728, 'train_steps_per_second': 0.091, 'total_flos': 617361627414528.0, 'train_loss': 0.3016708865044728, 'epoch': 6.0})

In [74]:
#mount drive folder for saving trained model
#!fusermount -u /content/drive
#!rm -rf /content/drive
#from google.colab import drive
#drive.mount('/content/drive')

model_dir = "/content/drive/MyDrive/ABSA_T5_Model"
!ls /content/drive/MyDrive

'3rd Iteration Document'  'CPSC 301'	  'CPSC 439'  'CPSC 566'
 ABSA_T5_Model		  'CPSC 311'	  'CPSC 440'  'CPSC 585'
'AP GOV'		  'CPSC 315'	  'CPSC 452'  'CPSC 589'
 BIO101			  'CPSC 323'	  'CPSC 471'  'EGCP 401'
 Books			  'CPSC 332'	  'CPSC 481'  'EVO Food Places.xlsx'
'Colab Notebooks'	  'CPSC 335'	  'CPSC 485'   MATH338
'CPSC 121'		  'CPSC 351'	  'CPSC 531'   Misc.
'CPSC 223J'		  'CPSC 353 458'  'CPSC 544'  'Oct Genesis.png'
'CPSC 240'		  'CPSC 362'	  'CPSC 548'  'PSC Biotech'
'CPSC 254'		  'CPSC 375'	  'CPSC 552'  'Test Folder'


In [75]:
#save model
model.save_pretrained(model_dir)
tokenizer.save_pretrained(model_dir)


('/content/drive/MyDrive/ABSA_T5_Model/tokenizer_config.json',
 '/content/drive/MyDrive/ABSA_T5_Model/special_tokens_map.json',
 '/content/drive/MyDrive/ABSA_T5_Model/spiece.model',
 '/content/drive/MyDrive/ABSA_T5_Model/added_tokens.json')

In [76]:
# Load tokenizer and model from your Drive
tokenizer = T5Tokenizer.from_pretrained(model_dir, local_files_only=True)
model = T5ForConditionalGeneration.from_pretrained(model_dir, local_files_only=True)


print("Model path:", model.config._name_or_path)
print("Number of parameters:", sum(p.numel() for p in model.parameters()) // 1e6, "M")

Model path: /content/drive/MyDrive/ABSA_T5_Model
Number of parameters: 60.0 M


In [77]:
#test model with text input

# 1️⃣ Confirm model path
print("Model path:", model.config._name_or_path)

# 2️⃣ Confirm the prefix was used during training
print("Example training input:", dataset["train"][0]["text"])

# 3️⃣ Try inference without prefix (if you didn't train with one)
def absa_predict(text):
    instruction = (
        "Extract aspect-based sentiment. "
        "Return outputs in the exact format: "
        "'aspect=<term>, sentiment=<positive|negative|neutral>' "
        "separated by '; ' for multiple aspects. If no aspects, return 'none'.\n\n"
    )
    full_input = instruction + "ABSA: " + text

    inputs = tokenizer(full_input, return_tensors="pt", padding=True)
    outputs = model.generate(
        inputs["input_ids"],
        max_length=128,
        num_beams=4,
        early_stopping=True
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

#pass sample text input to test absa
print(absa_predict("The food was amazing but the service was terrible."))


Model path: /content/drive/MyDrive/ABSA_T5_Model
Example training input: But the staff was so horrible to us.
aspect=food, sentiment=positive; aspect=service, sentiment=positive


In [78]:
print("Decoded training label:", tokenizer.decode(train_dataset[0]["labels"], skip_special_tokens=True))


Decoded training label: aspect=staff, sentiment=negative


In [79]:
print("Decoded training input:", tokenizer.decode(train_dataset[0]["input_ids"], skip_special_tokens=True))

Decoded training input: Extract aspect-based sentiment. Return outputs in the exact format: 'aspect=term>, sentiment=positive|negative|neutral>' separated by ';'for multiple aspects. If no aspects, return 'none'. ABSA: But the staff was so horrible to us.


In [80]:
import re

#parse absa text outputs into structured data

# load the saved model (if in a new session)!!!
# tokenizer = T5Tokenizer.from_pretrained("/content/drive/MyDrive/ABSA_T5_Model")
# model = T5ForConditionalGeneration.from_pretrained("/content/drive/MyDrive/ABSA_T5_Model")

def absa_generate(text, max_length=128):
    instruction = (
        "Extract aspect-based sentiment. "
        "Return outputs in format: 'aspect=<term>, sentiment=<positive|negative|neutral>' "
        "separated by '; ' or 'none' if no aspects.\n\n"
    )
    input_text = instruction + "ABSA: " + text
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(model.device)
    outputs = model.generate(inputs["input_ids"], max_new_tokens=max_length, num_beams=4, early_stopping=True)
    raw = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return raw

def parse_aspect_string(s):
    """
    Parse outputs like:
      'aspect=food, sentiment=positive; aspect=service, sentiment=negative'
    into list of dicts [{"aspect":"food","sentiment":"positive"}, ...]
    Returns [] if s indicates 'none' or empty output.
    """
    s = s.strip()
    if not s or s.lower() in {"none", "no aspects", "[]"}:
        return []
    pairs = []
    # split on ';' then parse each part
    for part in s.split(";"):
        part = part.strip()
        if not part:
            continue
        # match aspect=<term>, sentiment=<polarity>
        m = re.search(r"aspect\s*=\s*(.+?)\s*,\s*sentiment\s*=\s*(positive|negative|neutral)", part, flags=re.I)
        if m:
            aspect = m.group(1).strip()
            sentiment = m.group(2).strip().lower()
            pairs.append({"aspect": aspect, "sentiment": sentiment})
        else:
            # fallback: try `X was Y` style (rare) or ignore
            m2 = re.search(r"(.+?)\s+was\s+(positive|negative|neutral)", part, flags=re.I)
            if m2:
                pairs.append({"aspect": m2.group(1).strip(), "sentiment": m2.group(2).lower()})
    return pairs

In [81]:
#correct pairs
def extract_good_pairs(example):
    """
    Convert dataset example with `aspectTerms` into our tuple list form
    """
    pairs = []
    for asp in example.get("aspectTerms", []):
        term = asp.get("term")
        pol = asp.get("polarity")
        if term and pol:
            pairs.append({"aspect": term, "sentiment": pol.lower()})
    return pairs

In [82]:
#output test
for s in [
    "The food was amazing but the service was terrible.",
    "I like the ambiance, but the drinks are overpriced."
]:
    raw = absa_generate(s)
    print("RAW:", raw)
    print("PARSED:", parse_aspect_string(raw))
    print()

RAW: aspect=food, sentiment=positive; aspect=service, sentiment=positive
PARSED: [{'aspect': 'food', 'sentiment': 'positive'}, {'aspect': 'service', 'sentiment': 'positive'}]

RAW: aspect=ambiance, sentiment=positive; aspect=drinks, sentiment=positive
PARSED: [{'aspect': 'ambiance', 'sentiment': 'positive'}, {'aspect': 'drinks', 'sentiment': 'positive'}]



In [83]:
#f1 score compute function

def compute_f1(true_pairs, pred_pairs):
    """
    true_pairs: list of dicts [{"aspect":..., "sentiment":...}, ...]
    pred_pairs: list of dicts of same form
    returns precision, recall, f1 for the joint match (aspect+sentiment)
    """
    true_set = set((p["aspect"].lower(), p["sentiment"].lower()) for p in true_pairs)
    pred_set = set((p["aspect"].lower(), p["sentiment"].lower()) for p in pred_pairs)

    TP = len(true_set & pred_set)
    FP = len(pred_set - true_set)
    FN = len(true_set - pred_set)

    precision = TP / (TP + FP + 1e-12)
    recall = TP / (TP + FN + 1e-12)
    f1 = 2 * precision * recall / (precision + recall + 1e-12) if (precision + recall) > 0 else 0.0
    return precision, recall, f1

In [84]:
from tqdm import tqdm

def evaluate_on_dataset(split_dataset, limit=None):
    tot_p = tot_r = tot_f1 = 0.0
    n = 0

    for i, ex in enumerate(tqdm(split_dataset)):
        if limit and i >= limit:
            break

        # 1. Get the good labels from preprocess metadata
        gold = extract_good_pairs({
            "aspectTerms": ex["raw_aspects"]
        })

        # 2. Build the input text
        input_text = "ABSA: " + ex["raw_text"]

        # 3. Run model inference
        raw_output = absa_generate(input_text)

        # 4. Parse prediction into same structure as good pairs
        pred = parse_aspect_string(raw_output)

        # 5. Compute F1 for this sample
        p, r, f1 = compute_f1(gold, pred)

        tot_p += p
        tot_r += r
        tot_f1 += f1
        n += 1

    return {
        "precision": tot_p / n,
        "recall": tot_r / n,
        "f1": tot_f1 / n
    }

In [98]:
scores = evaluate_on_dataset(valid_dataset, limit=200)
print(scores)

 25%|██▌       | 200/800 [03:06<09:19,  1.07it/s]

{'precision': 0.4370833333330023, 'recall': 0.40116666666637685, 'f1': 0.41323809523755906}





In [89]:
for i in range(5):
    t = dataset["test"][i]["text"]
    print("INPUT:", t)
    print("MODEL OUTPUT:", absa_predict(t))
    print("-" * 50)

INPUT: The bread is top notch as well.
MODEL OUTPUT: aspect=bread, sentiment=positive
--------------------------------------------------
INPUT: I have to say they have one of the fastest delivery times in the city.
MODEL OUTPUT: aspect=delivery times, sentiment=positive
--------------------------------------------------
INPUT: Food is always fresh and hot- ready to eat!
MODEL OUTPUT: aspect=Food, sentiment=positive
--------------------------------------------------
INPUT: Did I mention that the coffee is OUTSTANDING?
MODEL OUTPUT: aspect=caffee, sentiment=positive
--------------------------------------------------
INPUT: Certainly not the best sushi in New York, however, it is always fresh, and the place is very clean, sterile.
MODEL OUTPUT: aspect=sushi, sentiment=positive; aspect=place, sentiment=positive
--------------------------------------------------
