# Sentiment Classification of Hotel Reviews

This notebook loads customer hotel reviews, classifies their sentiment using `analyze_sentiment` (GPT-4o),
and compares the predicted sentiment with the original `survey_sentiment` label.

In [None]:
import json
import sys
import os
from pathlib import Path

# Add scripts/ to path so we can import the recommender module
NOTEBOOK_DIR = Path(os.path.abspath("")).resolve()
PROJECT_ROOT = NOTEBOOK_DIR.parent
sys.path.insert(0, str(PROJECT_ROOT / "scripts"))

print(f"Project root: {PROJECT_ROOT}")

In [None]:
import importlib
recommender = importlib.import_module("ner-trip-recommender")

## Load data

- `customer_surveys_hotels_1k.json` — contains `id`, `review`, `customer_satisfaction_score`, `survey_sentiment`
- `customer_surveys_hotels_1k_ner.json` — contains `id`, `text` (same review), `entities`

We join both on `id` to get the full picture.

In [None]:
DATA_DIR = PROJECT_ROOT / "data"

with open(DATA_DIR / "customer_surveys_hotels_1k.json", "r", encoding="utf-8") as f:
    surveys = json.load(f)

with open(DATA_DIR / "customer_surveys_hotels_1k_ner.json", "r", encoding="utf-8") as f:
    surveys_ner = json.load(f)

# Build lookup by id
survey_by_id = {s["id"]: s for s in surveys}

print(f"Loaded {len(surveys)} surveys and {len(surveys_ner)} NER records")
print(f"Sample keys (surveys): {list(surveys[0].keys())}")
print(f"Sample keys (NER):     {list(surveys_ner[0].keys())}")

## Classify sentiment

For each review we call `analyze_sentiment` which returns:
```json
{"sentiment_score": 1-5, "reasoning": "..."}
```

We map the 1-5 score to sentiment categories:
- **1-2** → `"negative"`
- **3** → `"neutral"`
- **4-5** → `"positive"`

This matches the 3-class `survey_sentiment` labels (positive/negative/neutral).

In [None]:
results = []

for i, record in enumerate(surveys_ner):
    review_id = record["id"]
    review_text = record["text"]
    survey = survey_by_id.get(review_id, {})

    sentiment = recommender.analyze_sentiment(review_text)
    
    # Convert 1-5 sentiment score to category
    score = sentiment["sentiment_score"]
    if score <= 2:
        predicted = "negative"
    elif score == 3:
        predicted = "neutral"
    else:  # score >= 4
        predicted = "positive"

    results.append({
        "id": review_id,
        "review": review_text,
        "customer_satisfaction_score": survey.get("customer_satisfaction_score"),
        "survey_sentiment": survey.get("survey_sentiment"),
        "predicted_sentiment_score": score,
        "predicted_sentiment": predicted,
    })

    if (i + 1) % 50 == 0 or i == 0:
        print(f"[{i+1}/{len(surveys_ner)}] id={review_id[:8]}... survey={survey.get('survey_sentiment')} predicted={predicted} (score={score})")

print(f"\nDone. Classified {len(results)} reviews.")

## Save results

In [None]:
OUTPUT_PATH = DATA_DIR / "sentiment_classification_results.json"

with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

print(f"Saved {len(results)} records to {OUTPUT_PATH}")

In [None]:

# Load results from file (use this cell to skip the classification step above)
with open(DATA_DIR / "sentiment_classification_results.json", "r", encoding="utf-8") as f:
    results = json.load(f)

print(f"Loaded {len(results)} results from sentiment_classification_results.json")

## Quick stats

In [None]:
from collections import Counter

survey_counts = Counter(r["survey_sentiment"] for r in results)
predicted_counts = Counter(r["predicted_sentiment"] for r in results)
score_dist = Counter(r["predicted_sentiment_score"] for r in results)

match = sum(1 for r in results if r["survey_sentiment"] == r["predicted_sentiment"])
total = len(results)

print("Survey sentiment distribution (3-class):")
for label, count in survey_counts.most_common():
    print(f"  {label}: {count}")

print(f"\nPredicted sentiment distribution (3-class):")
for label, count in predicted_counts.most_common():
    print(f"  {label}: {count}")

print(f"\nPredicted score distribution (1-5):")
for score in sorted(score_dist.keys()):
    print(f"  {score}: {score_dist[score]}")

print(f"\nExact match (survey == predicted): {match}/{total} ({match/total*100:.1f}%)")
print(f"\nBoth survey_sentiment and predicted_sentiment now use 3 classes:")
print(f"  - negative (score 1-2)")
print(f"  - neutral (score 3)")
print(f"  - positive (score 4-5)")

## Evaluation (3-class classification)

The model now predicts sentiment on a 1-5 scale, which is converted to 3 classes (positive/negative/neutral).
This matches the survey data's 3-class labels, allowing for direct comparison across all 1000 reviews.

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

# 3-class evaluation (all 1000 samples)
y_true = [r["survey_sentiment"] for r in results]
y_pred = [r["predicted_sentiment"] for r in results]

print(f"Samples: {len(y_true)} (all reviews included)\n")

# Overall metrics
accuracy = accuracy_score(y_true, y_pred)
print(f"Overall Accuracy: {accuracy:.3f}")

# Per-class metrics (macro average treats all classes equally)
precision_macro = precision_score(y_true, y_pred, average='macro', zero_division=0)
recall_macro = recall_score(y_true, y_pred, average='macro', zero_division=0)
f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0)

print(f"Macro Precision: {precision_macro:.3f}")
print(f"Macro Recall:    {recall_macro:.3f}")
print(f"Macro F1-score:  {f1_macro:.3f}")

print(f"\nFull classification report (3-class):")
print(classification_report(y_true, y_pred, digits=3, zero_division=0))

## Confusion Matrix (3-class)

In [None]:
import plotly.figure_factory as ff

# 3-class confusion matrix
labels = ["negative", "neutral", "positive"]
cm = confusion_matrix(y_true, y_pred, labels=labels)
cm_text = [[str(val) for val in row] for row in cm]

fig_cm = ff.create_annotated_heatmap(
    z=cm,
    x=[f"pred: {l}" for l in labels],
    y=[f"true: {l}" for l in labels],
    annotation_text=cm_text,
    colorscale="Blues",
    showscale=True,
)
fig_cm.update_layout(
    title="Confusion Matrix (3-class: negative/neutral/positive)",
    xaxis_title="Predicted",
    yaxis_title="Actual (Survey)",
    height=500,
    width=600,
)
fig_cm.show()