# Sentiment Embeddings Model Evaluation

This notebook loads the archived sentiment model and evaluates it on the test set. Metrics and visualizations are saved to the evaluation directory.

In [None]:
# Setup: imports and paths
import sys
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt

sys.path.insert(0, str(Path('../../utils').resolve()))
from paths import CACHE_PATH, ROOT_PATH

EVAL_DIR = ROOT_PATH / 'evaluation/sentiment_embeddings'
ARCHIVE_PATH = ROOT_PATH / 'models/sentiment_embeddings/sentiment_distilbert_best/sentiment_distilbert/best'
CACHE_DIR = CACHE_PATH / 'sentiment_embeddings'
SPLITS_PATH = ROOT_PATH / 'outputs/sentiment_embeddings/preprocessing/splits.json'

sys.path.insert(0, str(EVAL_DIR))
from eval_utils import compute_metrics
from model_utils import load_model_and_tokenizer, load_test_dataset
from unpack_model import unpack_model

In [None]:
# Unpack model to cache dir (if needed)
unpack_model(ARCHIVE_PATH, CACHE_DIR)
# Load test split and labels
test_df, labels = load_test_dataset(SPLITS_PATH)

In [None]:
# Load model and tokenizer
model, tokenizer, device = load_model_and_tokenizer(CACHE_DIR)

In [None]:
# Run inference
all_preds, all_labels = [], []
with torch.no_grad():
    for _, row in test_df.iterrows():
        inputs = tokenizer(row['text'], return_tensors='pt', truncation=True, padding=True).to(device)
        logits = model(**inputs).logits.cpu().numpy()[0]
        pred = np.argmax(logits)
        all_preds.append(pred)
        all_labels.append(row['label'])
all_preds, all_labels = np.array(all_preds), np.array(all_labels)

In [None]:
# Compute and save metrics
metrics = compute_metrics(all_labels, all_preds, labels)
with open(EVAL_DIR / 'metrics.json', 'w', encoding='utf-8') as f:
    json.dump(metrics, f, indent=2)
metrics

In [None]:
# Visualize confusion matrix
cm = np.array(metrics['confusion_matrix'])
fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(cm, cmap='Blues')
ax.set_xticks(np.arange(len(labels)))
ax.set_yticks(np.arange(len(labels)))
ax.set_xticklabels(labels, rotation=45, ha='right')
ax.set_yticklabels(labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.colorbar(im)
plt.tight_layout()
plt.savefig(EVAL_DIR / 'confusion_matrix.png')
plt.show()

## Results
- Metrics are saved to `metrics.json` in the evaluation directory.
- Confusion matrix is saved as `confusion_matrix.png`.
- See the classification report in the metrics output above.