In [65]:
from wandb_data_loader import WandbDataLoader

In [66]:
wandb_data_loader = WandbDataLoader(
    entity="dduka-max-planck-society",
)

In [67]:
baseline_df = wandb_data_loader.download_group_runs(
    project="Thesis",
    group_name="Master Seminar",
    metric_names=["test_vis_ndcg", "test_txt_ndcg", "test_vis_map", "test_txt_map", "test_avg_map", "test_charades_ego_mAP", "test_egtea_top_1_accuracy", "test_egtea_mean_class_accuracy"],
    name_filter="DUAL_ENCODER_PRE_BASELINE_DAIS",
)

baseline_history = wandb_data_loader.download_run_history(
    project="Thesis",
    run_id=baseline_df.iloc[0]['run_id'],
    metric_names=["test_vis_ndcg", "test_txt_ndcg", "test_vis_map", "test_txt_map", "test_avg_map", "test_charades_ego_mAP", "test_egtea_top_1_accuracy", "test_egtea_mean_class_accuracy"],
)

# Get the index of the max test_avg_map
max_index = baseline_history["test_avg_map"].idxmax()

baseline_stats = {
    "test_vis_ndcg": baseline_history["test_vis_ndcg"].values[max_index] * 100,
    "test_txt_ndcg": baseline_history["test_txt_ndcg"].values[max_index] * 100,
    "test_vis_map": baseline_history["test_vis_map"].values[max_index] * 100,
    "test_txt_map": baseline_history["test_txt_map"].values[max_index] * 100,
    # "test_avg_map": baseline_history["test_avg_map"].values[max_index] * 100,
    "test_charades_ego_mAP": baseline_history["test_charades_ego_mAP"].values[1] * 100,
    "test_egtea_top_1_accuracy": baseline_history["test_egtea_top_1_accuracy"].values[max_index],
    "test_egtea_mean_class_accuracy": baseline_history["test_egtea_mean_class_accuracy"].values[max_index],
}


Found 1 runs in group 'Master Seminar'
Downloaded data shape: (1, 82)


In [68]:
nlq_baseline_df = wandb_data_loader.download_group_runs(
    project="Thesis",
    group_name="Master Seminar",
    metric_names=["test_vis_ndcg", "test_txt_ndcg", "test_vis_map", "test_txt_map", "test_avg_map", "test_charades_ego_mAP", "test_egtea_top_1_accuracy", "test_egtea_mean_class_accuracy"],
    name_filter="DUAL_ENCODER_NLQ_DAIS",
)

nlq_baseline_history = wandb_data_loader.download_run_history(
    project="Thesis",
    run_id=nlq_baseline_df.iloc[0]['run_id'],
    metric_names=["test_vis_ndcg", "test_txt_ndcg", "test_vis_map", "test_txt_map", "test_avg_map", "test_charades_ego_mAP", "test_egtea_top_1_accuracy", "test_egtea_mean_class_accuracy"],
)

# Get the index of the max test_avg_map
max_index = nlq_baseline_history["test_avg_map"].idxmax()

nlq_baseline_stats = {
    "test_vis_ndcg": nlq_baseline_history["test_vis_ndcg"].values[max_index] * 100,
    "test_txt_ndcg": nlq_baseline_history["test_txt_ndcg"].values[max_index] * 100,
    "test_vis_map": nlq_baseline_history["test_vis_map"].values[max_index] * 100,
    "test_txt_map": nlq_baseline_history["test_txt_map"].values[max_index] * 100,
    # "test_avg_map": nlq_baseline_history["test_avg_map"].values[max_index] * 100,
    "test_charades_ego_mAP": nlq_baseline_history["test_charades_ego_mAP"].values[1] * 100,
    "test_egtea_top_1_accuracy": nlq_baseline_history["test_egtea_top_1_accuracy"].values[max_index],
    "test_egtea_mean_class_accuracy": nlq_baseline_history["test_egtea_mean_class_accuracy"].values[max_index],
}

Found 1 runs in group 'Master Seminar'
Downloaded data shape: (1, 83)


In [69]:
df = wandb_data_loader.download_group_runs(
    project="Thesis",
    group_name="Master Seminar",
    metric_names=["test_vis_ndcg", "test_txt_ndcg", "test_vis_map", "test_txt_map", "test_avg_map", "test_charades_ego_mAP", "test_egtea_top_1_accuracy", "test_egtea_mean_class_accuracy"],
    name_filter="DUAL_ENCODER_SCALED_OFFSET_",
)

Found 25 runs in group 'Master Seminar'
Downloaded data shape: (25, 83)


In [70]:
results = {}
for i in range(len(df)):
    run_id = df.iloc[i]['run_id']

    run_history = wandb_data_loader.download_run_history(
        project="Thesis",
        run_id=df.iloc[i]['run_id'],
        metric_names=["test_vis_ndcg", "test_txt_ndcg", "test_vis_map", "test_txt_map", "test_avg_map", "test_charades_ego_mAP", "test_egtea_top_1_accuracy", "test_egtea_mean_class_accuracy"],
    )

    max_index = run_history["test_avg_map"].idxmax()

    results[run_id] = {
        "test_vis_ndcg": run_history["test_vis_ndcg"].values[max_index] * 100,
        "test_txt_ndcg": run_history["test_txt_ndcg"].values[max_index] * 100,
        "test_vis_map": run_history["test_vis_map"].values[max_index] * 100,
        "test_txt_map": run_history["test_txt_map"].values[max_index] * 100,
        # "test_avg_map": run_history["test_avg_map"].values[max_index] * 100,
        "test_charades_ego_mAP": run_history["test_charades_ego_mAP"].values[1] * 100,
        "test_egtea_top_1_accuracy": run_history["test_egtea_top_1_accuracy"].values[max_index],
        "test_egtea_mean_class_accuracy": run_history["test_egtea_mean_class_accuracy"].values[max_index],
    }

results["baseline"] = baseline_stats
results["nlq_baseline"] = nlq_baseline_stats

In [71]:
print(results)

{'DUAL_ENCODER_SCALED_OFFSET_1.1_DAIS': {'test_vis_ndcg': np.float64(31.887617550275156), 'test_txt_ndcg': np.float64(28.722805284863423), 'test_vis_map': np.float64(32.650101277749805), 'test_txt_map': np.float64(24.794368477083445), 'test_charades_ego_mAP': np.float64(22.044353089283646), 'test_egtea_top_1_accuracy': np.float64(34.041501976284586), 'test_egtea_mean_class_accuracy': np.float64(28.263231642961067)}, 'DUAL_ENCODER_SCALED_OFFSET_1.6_DAIS': {'test_vis_ndcg': np.float64(31.237688245216273), 'test_txt_ndcg': np.float64(28.40034398711701), 'test_vis_map': np.float64(32.031513177264664), 'test_txt_map': np.float64(24.72693516295791), 'test_charades_ego_mAP': np.float64(22.340604134632855), 'test_egtea_top_1_accuracy': np.float64(30.533596837944664), 'test_egtea_mean_class_accuracy': np.float64(26.55304721053752)}, 'DUAL_ENCODER_SCALED_OFFSET_1.7_DAIS': {'test_vis_ndcg': np.float64(31.535205266986367), 'test_txt_ndcg': np.float64(28.768952891297545), 'test_vis_map': np.float64

In [72]:
import pandas as pd
df = pd.DataFrame.from_dict(results, orient='index')
df.to_csv("fixed_scale_results.csv")

In [73]:
sum_baseline = sum(baseline_stats.values())
print(f"Baseline total sum: {sum_baseline}")

other_sums = {}
for key, stats in results.items():
    other_sums[key] = sum(stats.values())

Baseline total sum: 189.8249096461666


In [74]:
for key, total in other_sums.items():
    print(f"{key}: {total} (Difference from baseline: {(total - sum_baseline) / 7})")

DUAL_ENCODER_SCALED_OFFSET_1.1_DAIS: 202.40397929850116 (Difference from baseline: 1.7970099503335095)
DUAL_ENCODER_SCALED_OFFSET_1.6_DAIS: 195.8237287556709 (Difference from baseline: 0.8569741585006179)
DUAL_ENCODER_SCALED_OFFSET_1.7_DAIS: 198.54441646897908 (Difference from baseline: 1.2456438318303558)
DUAL_ENCODER_SCALED_OFFSET_1.4_DAIS: 195.9801171565113 (Difference from baseline: 0.8793153586206748)
DUAL_ENCODER_SCALED_OFFSET_1.5_DAIS: 198.56830047939442 (Difference from baseline: 1.2490558333182622)
DUAL_ENCODER_SCALED_OFFSET_1.2_DAIS: 200.20767174112046 (Difference from baseline: 1.4832517278505537)
DUAL_ENCODER_SCALED_OFFSET_1.3_DAIS: 197.14324952521392 (Difference from baseline: 1.0454771255781898)
DUAL_ENCODER_SCALED_OFFSET_1.8_DAIS: 195.92169434372235 (Difference from baseline: 0.870969242507966)
DUAL_ENCODER_SCALED_OFFSET_2.1_DAIS: 204.24966663575205 (Difference from baseline: 2.0606795699407803)
DUAL_ENCODER_SCALED_OFFSET_1.9_DAIS: 197.50112069069618 (Difference from bas

In [75]:
nlq_baseline_sum = sum(nlq_baseline_stats.values())
print(f"NLQ Baseline total sum: {nlq_baseline_sum} (Difference from baseline: {(nlq_baseline_sum - sum_baseline) / 7})")

NLQ Baseline total sum: 194.61396564113477 (Difference from baseline: 0.6841508564240257)


In [76]:
print(nlq_baseline_sum)

194.61396564113477
