# Hotel Review Aspect Summarization - Finetuning & Inference

This notebook finetunes a language model for aspect-based hotel review summarization using Unsloth + LoRA.

**Supported Models:**
- `gemma3`: Gemma-3 270M Instruct (full precision)
- `qwen25`: Qwen2.5 0.5B Instruct (4-bit)
- `llama32`: Llama-3.2 1B Instruct (4-bit)

**Data Recipes:**
- `synth_100`: 100 synthetic samples (1800 training examples)
- `human_25`: 25 human samples (450 training examples)
- `mixed`: 75 synth + 25 human (1800 training examples)

## 1. Setup & Installation

In [None]:
import os, re
import torch; v = re.match(r"[0-9]{1,}\.[0-9]{1,}", str(torch.__version__)).group(0)
xformers = "xformers==" + ("0.0.33.post1" if v=="2.9" else "0.0.32.post2" if v=="2.8" else "0.0.29.post3")
!pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer
!pip install --no-deps unsloth
!pip install transformers==4.57.3
!pip install --no-deps trl==0.22.2

## 2. Configuration

In [None]:
# ============ CONFIGURATION ============
# Change these settings as needed

# HuggingFace Dataset
HF_DATASET_REPO = "thanh309/hotel-reviews-summarization"  # <-- CHANGE THIS

# Model: "gemma3", "qwen25", or "llama32"
MODEL_KEY = "gemma3"

# Data recipe: "synth_100", "human_25", or "mixed"
RECIPE = "mixed"

# Training settings
MAX_SEQ_LENGTH = 20000
NUM_EPOCHS = 1
BATCH_SIZE = 1
GRAD_ACCUM = 8
LEARNING_RATE = 2e-4
LORA_R = 16
LORA_ALPHA = 32

# Output settings
OUTPUT_DIR = "outputs"
OUTPUT_NAME = f"{MODEL_KEY}_{RECIPE}"

print(f"Configuration:")
print(f"  HuggingFace Dataset: {HF_DATASET_REPO}")
print(f"  Model: {MODEL_KEY}")
print(f"  Recipe: {RECIPE}")
print(f"  Output: {OUTPUT_NAME}")

## 3. Download Dataset from HuggingFace

In [None]:
from huggingface_hub import hf_hub_download, snapshot_download
from pathlib import Path
import json

# Download all data files from HuggingFace
print(f"Downloading dataset from: {HF_DATASET_REPO}")

# Download training recipes
RECIPE_FILES = {
    "synth_100": "recipes/synth_100.jsonl",
    "human_25": "recipes/human_25.jsonl",
    "mixed": "recipes/mixed.jsonl",
}

# Download the selected recipe
train_file = hf_hub_download(
    repo_id=HF_DATASET_REPO,
    filename=RECIPE_FILES[RECIPE],
    repo_type="dataset",
)
print(f"Downloaded training data: {train_file}")

# Download test data
test_file = hf_hub_download(
    repo_id=HF_DATASET_REPO,
    filename="space_summ_test.json",
    repo_type="dataset",
)
print(f"Downloaded test data: {test_file}")

## 4. Model & Data Setup

In [None]:
import torch
from datasets import load_dataset
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template, train_on_responses_only
from trl import SFTTrainer, SFTConfig
from transformers import TrainerCallback

# Model configurations
MODEL_CONFIGS = {
    "gemma3": {
        "model_name": "unsloth/gemma-3-270m-it-bnb-4bit",
        "chat_template": "gemma-3",
        "instruction_part": "<start_of_turn>user\n",
        "response_part": "<start_of_turn>model\n",
        "load_in_4bit": True,
    },
    "qwen25": {
        "model_name": "unsloth/Qwen2.5-0.5B-Instruct-bnb-4bit",
        "chat_template": "qwen-2.5",
        "instruction_part": "<|im_start|>user\n",
        "response_part": "<|im_start|>assistant\n",
        "load_in_4bit": True,
    },
    "llama32": {
        "model_name": "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
        "chat_template": "llama-3.2",
        "instruction_part": "<|start_header_id|>user<|end_header_id|>\n\n",
        "response_part": "<|start_header_id|>assistant<|end_header_id|>\n\n",
        "load_in_4bit": True,
    },
}

config = MODEL_CONFIGS[MODEL_KEY]
use_4bit = config.get("load_in_4bit", True)
print(f"Using model: {config['model_name']} (4-bit: {use_4bit})")

In [None]:
# Training history callback
class TrainingHistoryCallback(TrainerCallback):
    def __init__(self, output_path):
        self.output_path = Path(output_path)
        self.history = {"steps": [], "loss": [], "learning_rate": [], "epoch": [], "grad_norm": []}
    
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs:
            self.history["steps"].append(state.global_step)
            self.history["loss"].append(logs.get("loss"))
            self.history["learning_rate"].append(logs.get("learning_rate"))
            self.history["epoch"].append(logs.get("epoch"))
            self.history["grad_norm"].append(logs.get("grad_norm"))
            with open(self.output_path / "training_history.json", "w") as f:
                json.dump(self.history, f, indent=2)
    
    def on_train_end(self, args, state, control, **kwargs):
        print(f"Training history saved to: {self.output_path / 'training_history.json'}")

In [None]:
# Load model
print(f"Loading model: {config['model_name']} (4-bit: {use_4bit})")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=config["model_name"],
    max_seq_length=MAX_SEQ_LENGTH,
    load_in_4bit=use_4bit,
)

# Add LoRA adapters
print("Adding LoRA adapters...")
model = FastLanguageModel.get_peft_model(
    model,
    r=LORA_R,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=LORA_ALPHA,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
)

# Apply chat template
print(f"Applying chat template: {config['chat_template']}")
tokenizer = get_chat_template(tokenizer, chat_template=config["chat_template"])

In [None]:
# Load dataset from downloaded file
print(f"Loading dataset: {train_file}")

dataset = load_dataset("json", data_files=train_file, split="train")
print(f"Dataset size: {len(dataset)} examples")

# Format conversations
def formatting_prompts_func(examples):
    texts = [
        tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
        for convo in examples["conversations"]
    ]
    return {"text": texts}

dataset = dataset.map(formatting_prompts_func, batched=True)
print("Dataset formatted!")

## 5. Training

In [None]:
# Create output directory
output_path = Path(OUTPUT_DIR) / OUTPUT_NAME
output_path.mkdir(parents=True, exist_ok=True)

# Training config
training_args = SFTConfig(
    dataset_text_field="text",
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACCUM,
    warmup_ratio=0.05,
    num_train_epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=42,
    output_dir=str(output_path),
    save_strategy="epoch",
    report_to="none",
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
)

# Create trainer with history callback
history_callback = TrainingHistoryCallback(output_path)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    args=training_args,
    callbacks=[history_callback],
)

# Apply train_on_responses_only
print("Applying train_on_responses_only...")
trainer = train_on_responses_only(
    trainer,
    instruction_part=config["instruction_part"],
    response_part=config["response_part"],
)

In [None]:
# Show GPU stats
if torch.cuda.is_available():
    gpu_stats = torch.cuda.get_device_properties(0)
    print(f"GPU: {gpu_stats.name}")
    print(f"Total memory: {gpu_stats.total_memory / 1024**3:.1f} GB")

In [None]:
# Train!
print("\n" + "="*60)
print(f"Starting training: {OUTPUT_NAME}")
print("="*60)

trainer_stats = trainer.train()

print("\nTraining completed!")
print(f"  Runtime: {trainer_stats.metrics['train_runtime']:.2f} seconds")
print(f"  Loss: {trainer_stats.metrics.get('train_loss', 'N/A')}")

In [None]:
# Save model
print(f"\nSaving model to: {output_path}")
model.save_pretrained(str(output_path))
tokenizer.save_pretrained(str(output_path))

# Save training config
training_config = {
    "model_key": MODEL_KEY,
    "model_name": config["model_name"],
    "chat_template": config["chat_template"],
    "recipe": RECIPE,
    "max_seq_length": MAX_SEQ_LENGTH,
    "lora_r": LORA_R,
    "lora_alpha": LORA_ALPHA,
    "num_train_epochs": NUM_EPOCHS,
    "learning_rate": LEARNING_RATE,
    "train_runtime_seconds": trainer_stats.metrics['train_runtime'],
    "train_loss": trainer_stats.metrics.get('train_loss'),
}
with open(output_path / "training_config.json", "w") as f:
    json.dump(training_config, f, indent=2)

print("Model saved!")

## 6. Inference

In [None]:
# Constants for inference
ASPECTS = ["rooms", "location", "service", "cleanliness", "building", "food"]

SYSTEM_PROMPT = """You are an expert abstractive summarizer. Your task is to write a summary of opinions for a specific aspect of a hotel.

**CRITICAL RULES:**
1. **NO META-LANGUAGE:** NEVER say "Guests said," "Reviewers mentioned," "The consensus is," or "Reports indicate."
   - BAD: "Guests found the location convenient."
   - GOOD: "The location is convenient."
2. **DIRECT ASSERTIONS:** State opinions as objective facts.
3. **BREVITY:** Keep it short (15-40 words), strictly under 75 words.
4. **FOCUS:** Identify the main sentiment and primary reasons for it."""

def format_reviews_as_text(reviews):
    paragraphs = []
    for review in reviews:
        sentences = review.get("sentences", [])
        if sentences:
            paragraph = " ".join(sentences)
            paragraphs.append(f"- {paragraph}")
    return "\n".join(paragraphs)

def create_messages(aspect, reviews_text):
    instruction = f"""Summarize the **{aspect}** aspect from the following hotel reviews.

Reviews:
{reviews_text}

Write a concise summary (15-40 words, strictly under 75 words) focusing only on {aspect}."""
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": instruction},
    ]

In [None]:
# Set model to eval mode
model.eval()

def generate_summary(aspect, reviews, max_new_tokens=256, temperature=0.7):
    reviews_text = format_reviews_as_text(reviews)
    messages = create_messages(aspect, reviews_text)
    
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    input_length = inputs.input_ids.shape[1]
    new_tokens = output_ids[0][input_length:]
    return tokenizer.decode(new_tokens, skip_special_tokens=True).strip()

In [None]:
# Load test data
print(f"Loading test data: {test_file}")
with open(test_file, "r", encoding="utf-8") as f:
    test_entities = json.load(f)

print(f"Total test entities: {len(test_entities)}")

In [None]:
from tqdm.auto import tqdm

# Run inference on all test entities
print(f"\nRunning inference on {len(test_entities)} entities...")
results = []

for entity in tqdm(test_entities, desc="Generating summaries"):
    entity_id = entity.get("entity_id", "")
    reviews = entity.get("reviews", [])
    golden_summaries = entity.get("summaries", {})
    
    generated_summaries = {}
    for aspect in ASPECTS:
        summary = generate_summary(aspect, reviews)
        generated_summaries[aspect] = summary
    
    results.append({
        "entity_id": entity_id,
        "reviews": reviews,
        "generated_summaries": generated_summaries,
        "golden_summaries": golden_summaries,
    })

print("Inference complete!")

In [None]:
# Save results
results_path = output_path / f"{OUTPUT_NAME}_results.json"
print(f"\nSaving results to: {results_path}")

with open(results_path, "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print("Results saved!")

## 7. Download Outputs

Download the trained model and results:

In [None]:
# List output files
print(f"Output directory: {output_path}")
print("\nFiles:")
for f in output_path.iterdir():
    size = f.stat().st_size / 1024  # KB
    print(f"  {f.name}: {size:.1f} KB")

In [None]:
# Create zip for download
import shutil

zip_path = f"{OUTPUT_NAME}.zip"
shutil.make_archive(OUTPUT_NAME, 'zip', output_path)
print(f"\nCreated: {zip_path}")
print("Download this file to get the trained model and results!")

In [None]:
# For Colab: download the zip file
# try:
#     from google.colab import files
#     files.download(zip_path)
# except:
#     print("Not in Colab. Zip file is available at:", zip_path)
print("Zip file is available at: ", zip_path)

## 8. Sample Output

Preview some generated summaries:

In [None]:
# Show sample output
if results:
    sample = results[0]
    print(f"Entity: {sample['entity_id']}")
    print("\n" + "="*60)
    print("GENERATED SUMMARIES:")
    print("="*60)
    for aspect, summary in sample['generated_summaries'].items():
        print(f"\n[{aspect.upper()}]")
        print(summary)