In [None]:
import os
import pandas as pd
import numpy as np
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from tqdm import tqdm
import sys
import torch
from PIL import Image
import matplotlib.pyplot as plt

sys.path.append(os.path.join(os.getcwd(), 'CLIP'))
import clip

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
smoother = SmoothingFunction().method4

def compute_bleu_scores(references, hypothesis):
    references_tok = [ref.split() for ref in references]
    hypothesis_tok = hypothesis.split()
    bleu1 = sentence_bleu(references_tok, hypothesis_tok, weights=(1, 0, 0, 0), smoothing_function=smoother)
    bleu2 = sentence_bleu(references_tok, hypothesis_tok, weights=(0.5, 0.5, 0, 0), smoothing_function=smoother)
    return bleu1, bleu2

def compute_meteor_score_nltk(references, hypothesis):
    references_tok = [ref.split() for ref in references]
    hypothesis_tok = hypothesis.split()
    return min(1.0, meteor_score(references_tok, hypothesis_tok))

def evaluate(filepath):
    df = pd.read_csv(filepath)
    bleu1_scores = []
    bleu2_scores = []
    meteor_scores = []

    for _, row in tqdm(df.iterrows(), total=len(df), desc=f"Evaluating {os.path.basename(filepath)}"):
        references = [str(row['training_caption']).lower()]
        hypothesis = str(row['predicted_caption']).lower()

        bleu1, bleu2 = compute_bleu_scores(references, hypothesis)
        meteor = compute_meteor_score_nltk(references, hypothesis)

        bleu1_scores.append(bleu1)
        bleu2_scores.append(bleu2)
        meteor_scores.append(meteor)

    df['bleu1_pred'] = bleu1_scores
    df['bleu2_pred'] = bleu2_scores
    df['meteor_pred'] = meteor_scores

    output_file = filepath.replace(".csv", "_with_scores.csv")
    df.to_csv(output_file, index=False)

    print(f"\n{os.path.basename(output_file)}")
    print(f"BLEU-1:   {np.mean(bleu1_scores):.4f}")
    print(f"BLEU-2:   {np.mean(bleu2_scores):.4f}")
    print(f"METEOR:   {np.mean(meteor_scores):.4f}")

for file in os.listdir("."):
    if file.endswith(".csv") and not file.endswith("_with_scores.csv"):
        evaluate(file)

In [None]:
model, preprocess = clip.load("ViT-B/32", device=device)
model = model.to(device)

image_folder = "RISCM/resized/"

def compute_clip_score(image, caption):
    image_input = preprocess(image).unsqueeze(0).to(device)
    text_input = clip.tokenize([caption]).to(device)
    with torch.no_grad():
        image_features = model.encode_image(image_input)
        text_features = model.encode_text(text_input)
        image_features = image_features / image_features.norm(dim=-1, keepdim=True)
        text_features = text_features / text_features.norm(dim=-1, keepdim=True)
        similarity = (image_features @ text_features.T)
        return similarity.item()

def normalize_score(s, min_val=0.05, max_val=0.40):
    if s is None:
        return None
    norm = (s - min_val) / (max_val - min_val)
    return max(0.0, min(1.0, norm))

def evaluate_clip(filepath):
    df = pd.read_csv(filepath)

    clip_scores = []
    for idx, row in tqdm(df.iterrows(), total=len(df), desc=f"Scoring {os.path.basename(filepath)}"):
        image_path = os.path.join(image_folder, row["image"])

        try:
            image = Image.open(image_path).convert("RGB")
        except Exception as e:
            print(f"Failed to open image at {image_path}: {e}")
            clip_scores.append(None)
            continue

        caption = str(row["predicted_caption"])
        score = compute_clip_score(image, caption)
        clip_scores.append(score)

    df["clip_score"] = clip_scores

    valid_scores = [s for s in clip_scores if s is not None]
    if not valid_scores:
        print(f"No valid CLIP scores for {filepath}")
        return

    min_score = 0.05
    max_score = 0.40

    normalized_scores = [normalize_score(s, min_score, max_score) for s in clip_scores]
    df["clip_score_normalized"] = normalized_scores

    output_file = filepath.replace(".csv", "_with_clip_scores.csv")
    df.to_csv(output_file, index=False)

    overall_clip_avg = sum(valid_scores) / len(valid_scores)
    overall_norm_avg = sum([s for s in normalized_scores if s is not None]) / len(valid_scores)

    print(f"\n{os.path.basename(output_file)}")
    print(f"  CLIP Avg:         {overall_clip_avg:.4f}")
    print(f"  Normalized Avg:   {overall_norm_avg:.4f}")
    print(f"  Normalization bounds: Min = {min_score:.4f}, Max = {max_score:.4f}")

for file in os.listdir("."):
    if file.endswith(".csv") and not file.endswith("_with_clip_scores.csv"):
        evaluate_clip(file)