## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from vidore_benchmark.evaluation.eval_manager import EvalManager
from vidore_benchmark.utils.constants import OUTPUT_DIR

RESULTS_DIR = OUTPUT_DIR / "token_pooling"
RESULTS_DIR.mkdir(exist_ok=True, parents=True)

sns.set_style("whitegrid")

while "experiments" not in os.listdir():
    os.chdir("..")

## Load data

In [None]:
metrics_paths = list(Path.cwd().glob("./experiments/2024-08-06_impact_of_pool_factor_on_retrieval/outputs/**/metrics/"))
eval_managers = [EvalManager.from_dir(str(path)) for path in metrics_paths]
list_df = [eval_manager.melted for eval_manager in eval_managers]

df = pd.concat(list_df, ignore_index=True)
df

## Data preprocessing

In [None]:
# Extract pool factor and model name from file name
df["pool_factor"] = df["model"].str.extract(r"pool_factor_(\d+)").astype(int)
df["model"] = "vidore/colpali"

# Keep only metric of interest
METRIC_OF_INTEREST = "ndcg_at_5"
df = df[df["metric"] == METRIC_OF_INTEREST].copy()

# Compute relative performance
df["max_score"] = df.groupby(["model", "dataset"])["score"].transform("max")
df["relative_performance"] = df["score"] / df["max_score"]
df["relative_performance_percent"] = df["relative_performance"] * 100
df = df.drop(columns=["max_score"])

# Compute relative storage
df["relative_storage"] = 1 / df["pool_factor"]
df["relative_storage_percent"] = df["relative_storage"] * 100

## Sanitize DataFrame

In [None]:
column_mapping = {
    "model": "Model",
    "dataset": "Dataset",
    "pool_factor": "Pool Factor",
    "score": "NDCG@5",
    "relative_performance": "Relative NDCG@5",
    "relative_performance_percent": "Relative NDCG@5 (%)",
    "relative_storage": "Relative Storage",
    "relative_storage_percent": "Relative Storage (%)",
}

df_sanitized = df.rename(columns=column_mapping)

## Plots

### Figure 1

In [None]:
fig, ax = plt.subplots(figsize=(8, 5))
sns.lineplot(data=df_sanitized, x="Pool Factor", y="NDCG@5", hue="Dataset")
fig.tight_layout()
savepath = RESULTS_DIR / f"pool_factor_vs_{METRIC_OF_INTEREST}.png"
fig.savefig(str(savepath))

### Figure 1bis

In [None]:
df_with_mean = df_sanitized.groupby("Pool Factor")[["Relative NDCG@5 (%)"]].mean().reset_index()
df_with_mean["Model"] = "vidore/colpali"
df_with_mean["Dataset"] = "Average"

# df_with_mean = pd.concat([df_sanitized, df_with_mean], ignore_index=True)
df_with_mean

In [None]:
fig, ax = plt.subplots(figsize=(8, 5))

sns.lineplot(data=df_sanitized, x="Pool Factor", y="Relative NDCG@5 (%)", hue="Dataset", linewidth=1.5, alpha=0.5)
sns.lineplot(
    data=df_with_mean, x="Pool Factor", y="Relative NDCG@5 (%)", color="red", linewidth=3.0, alpha=1, label="Average"
)

fig.tight_layout()
savepath = RESULTS_DIR / f"pool_factor_vs_relative_{METRIC_OF_INTEREST}.png"
fig.savefig(str(savepath))

### Figure 2

In [None]:
df_sanitized_agg = (
    df_sanitized.groupby(["Model", "Pool Factor"])[["Relative NDCG@5 (%)", "Relative Storage (%)"]].mean().reset_index()
)

# Set relative performance to 100 for pool factor 1
df_sanitized_agg.loc[df_sanitized_agg["Pool Factor"] == 1, "Relative NDCG@5 (%)"] = 100

df_sanitized_agg

In [None]:
fig, ax = plt.subplots(figsize=(6, 6))
sns.scatterplot(data=df_sanitized_agg, x="Relative Storage (%)", y="Relative NDCG@5 (%)")
fig.tight_layout()
savepath = RESULTS_DIR / f"storage_vs_{METRIC_OF_INTEREST}.png"

plt.axvline(x=100, color="red", linestyle="--")
plt.axhline(y=100, color="red", linestyle="--")

fig.savefig(str(savepath))