# CLIP Multimodal Model Evaluation

This notebook loads a pretrained CLIP model and evaluates it on the test set. Metrics and visualizations are saved to the evaluation directory.

In [None]:
# Setup: imports and paths
import sys
from pathlib import Path
import json
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt

sys.path.insert(0, str(Path('../../utils').resolve()))
from utils.paths import ROOT_PATH, CACHE_PATH, CLIP_OUTPUTS_PATH

EVAL_DIR = ROOT_PATH / 'evaluation/clip_multimodal'
MODEL_NAME = 'openai/clip-vit-base-patch16'  # or your custom path if fine-tuned
SPLITS_PATH = CLIP_OUTPUTS_PATH / 'preprocessing/splits.json'

sys.path.insert(0, str(EVAL_DIR))
from eval_utils import compute_metrics
from model_utils import load_model_and_processor, load_test_dataset

In [5]:
# Load test split and labels
with open(SPLITS_PATH, 'r', encoding='utf-8') as f:
    splits = json.load(f)
labels = splits['labels']
test_records = splits['splits']['test']

test_df = pd.DataFrame([{'image': r['image'], 'text': r['text'], 'label': labels.index(r['label'])} for r in test_records])

FileNotFoundError: [Errno 2] No such file or directory: '/home/mateusz/dev/pjatk_zum/outputs/clip_multimodal/preprocessing/splits.json'

In [None]:
# Load CLIP model and processor
model, processor, device = load_model_and_processor(MODEL_NAME)

In [None]:
# Run inference
all_preds, all_labels = [], []
with torch.no_grad():
    for _, row in test_df.iterrows():
        inputs = processor(text=row['text'], images=row['image'], return_tensors='pt', padding=True, truncation=True).to(device)
        outputs = model(**inputs)
        logits_per_image = outputs.logits_per_image.cpu().numpy()[0]
        pred = np.argmax(logits_per_image)
        all_preds.append(pred)
        all_labels.append(row['label'])
all_preds, all_labels = np.array(all_preds), np.array(all_labels)

In [None]:
# Compute and save metrics
metrics = compute_metrics(all_labels, all_preds, labels)
with open(EVAL_DIR / 'metrics.json', 'w', encoding='utf-8') as f:
    json.dump(metrics, f, indent=2)
metrics

In [None]:
# Visualize confusion matrix
cm = np.array(metrics['confusion_matrix'])
fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(cm, cmap='Blues')
ax.set_xticks(np.arange(len(labels)))
ax.set_yticks(np.arange(len(labels)))
ax.set_xticklabels(labels, rotation=45, ha='right')
ax.set_yticklabels(labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.colorbar(im)
plt.tight_layout()
plt.savefig(EVAL_DIR / 'confusion_matrix.png')
plt.show()

## Results
- Metrics are saved to `metrics.json` in the evaluation directory.
- Confusion matrix is saved as `confusion_matrix.png`.
- See the classification report in the metrics output above.