In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
# Configure Matplotlib to use LaTeX for text rendering
plt.rcParams['text.usetex'] = True

# Set fonts for Matplotlib
plt.rcParams['font.serif'] = 'Palatino'
plt.rcParams['font.sans-serif'] = 'Helvetica'
plt.rcParams['font.monospace'] = 'Courier'
plt.rcParams['font.size'] = 11
width_cm = 15
height_cm = 6
# Convert centimeters to inches
width_in = width_cm / 2.54
height_in = height_cm / 2.54
import seaborn as sns
sns.set_theme()
figsize= (width_in, height_in)


import numpy as np
import pandas as pd 
import wandb
from metrics.plotting_utils import save_plot, get_history_from_project, save_csv, get_dataset_name_mapping
from ood_detection.config import Config
from datasets.config import DATASETS_DICT



dataset_mapping = get_dataset_name_mapping()
api = wandb.Api()

### Classification improvements:
1. Compare zeroshot baseline to TIP & TIP-f. Maybe add linear


In [None]:
orig = get_history_from_project('thesis-tip-adapters-16_shots-temp_0.01', set_name_index=False)

In [None]:
zsa, tip, tipf = 'ZEROSHOT', 'TIP ADAPTER', 'TIP-F ADAPTER'
diffs = [f"{zsa}-{tip}", f"{zsa}-{tipf}", f"{tip}-{tipf}"]
metrics = [zsa, tip, tipf]
metric_converter = {'ZEROSHOT': 'ZSA', 'TIP ADAPTER':'TIP', 'TIP-F ADAPTER':'TIP-f' }

In [None]:
classification_df = orig.copy()
classification_df = classification_df.sort_values('name').reset_index(drop=True)
classification_df['name']= classification_df['name'].apply(lambda x: dataset_mapping[x])
means = classification_df

### get differences

In [None]:
for met1 in metrics:
    for met2 in metrics:
        if met1 == met2:
            continue
        classification_df[f'{met1}-{met2}'] = classification_df.apply(lambda row: row[met2] - row[met1],  axis=1)

## Means & stds overall

In [None]:
classification_df[metrics].mean().round(2)

In [None]:
classification_df[metrics].std()

In [None]:
classification_df[diffs].mean()

In [None]:
#classification_df[diffs].std()

In [None]:
#for name, group in classification_df.groupby('name'):
    #print(name)
    #print('mean')
    #print(group[metrics].mean())
   # print('std')
    #print(group[metrics].std())
    #print('\n')

In [None]:
#classification_df[diffs].std()

In [None]:
#for name, group in classification_df.groupby('name'):
#    print(name)
#    print('\t\tmean')
#    print(group[metrics].mean())
#    print('\t\tSTD')
#    print(group[metrics].std())
#    print('\n')

### Plot errorbars over all datasets for appendix

In [None]:
def plot_errorbar(means, stds, metric):
    
    error_corr = .5 * stds[metric]
    lower = means[metric] - error_corr
    upper = means[metric] + error_corr
    ax.plot(means.index, means[metric], label= metric_converter[metric])
    ax.plot(lower, color='tab:blue', alpha=.1)
    ax.plot(upper, color='tab:blue', alpha=.1)
    ax.fill_between(range(len(means)), lower, upper, alpha=.3)

In [None]:
classification_df.sort_values('name')

In [None]:
 classification_df['name'].unique()

In [None]:
for met in diffs[:2]:
    classification_df[met+'-percent'] = classification_df[met] /classification_df['ZEROSHOT']

In [None]:
classification_df.columns

In [None]:
percents = ['ZEROSHOT-TIP ADAPTER-percent', 'ZEROSHOT-TIP-F ADAPTER-percent']

In [None]:
means = classification_df[metrics + ['name']].groupby('name').mean()
stds = classification_df[metrics + ['name']].groupby('name').std()

mean_std = pd.concat([means, stds.rename(columns={'ZEROSHOT':'z-std', 'TIP ADAPTER': 'TIP-std', 'TIP-F ADAPTER':'TIP-F-std'})], axis=1).round(2)
mean_std['ZEROSHOT'] = mean_std['ZEROSHOT'].astype(str) + " \pm "+  mean_std['z-std'].astype(str)
mean_std['TIP ADAPTER'] = mean_std['TIP ADAPTER'].astype(str) + " \pm "+  mean_std['TIP-std'].astype(str)
mean_std['TIP-F ADAPTER'] = mean_std['TIP-F ADAPTER'].astype(str) + " \pm "+  mean_std['TIP-F-std'].astype(str)
save_csv(mean_std[metrics], '6', 'classification_adapter_plusminus')

In [None]:
means

In [None]:
mean_std

In [None]:
classification_df.groupby('name').mean()[metrics + percents].abs().sort_values(percents[1])


##### classification_df.groupby('name').mean()[metrics + diffs].sort_values(diffs[0], ascending=False)

In [None]:
classification_df.groupby('name').mean()[metrics + diffs].sort_values(diffs[2])

In [None]:
meaned_cl_df = classification_df.groupby('name').mean()[metrics + diffs].abs().sort_values(diffs[2],ascending=True)

In [None]:
means.sort_values(metrics[1])

In [None]:
stds

In [None]:
metrics = ['ZEROSHOT', 'TIP ADAPTER', 'TIP-F ADAPTER']
metrics_and_name = metrics + ['name']
std_metrics = [metric + '_std' for metric in metrics]
tip_mappings = {'tip':'T-MCM', 'tipf':'T-MCM-f', 'clip':'MCM'}

In [None]:
fig, ax = plt.subplots(figsize=figsize)

means = classification_df[metrics_and_name].groupby('name').mean()
stds = classification_df[metrics_and_name].groupby('name').std()

plot_errorbar(means, stds, metrics[0])
plot_errorbar(means, stds, metrics[1])
plot_errorbar(means, stds, metrics[2])
ax.vlines([range(len(means))], ymin=0, ymax=means.max(axis=1),color='darkgrey', alpha=0.6)

_ =ax.legend(fontsize=7, loc='lower left')
_ = ax.set_xticklabels
_ = ax.set_xticklabels([ind for ind in classification_df['name'].unique()], rotation=45, ha='right')
ax.set_ylabel('Accuracy %')
ax.set_xlabel('Dataset')
plt.tight_layout()

save_plot(fig, 'accuracies_domain_adaption', '6', False)

In [None]:
classification_df[metrics_and_name].groupby('name').mean()

### MCM 

In [None]:
adapter_df = get_history_from_project('thesis-toc-ood-test-hyperparam-search-10-runs', False)
working_adapter = adapter_df.copy()
working_adapter['name'] =working_adapter['name'].apply(lambda x: dataset_mapping[x]) 

In [None]:
metrics_and_name

In [None]:
metrics = ['tip', 'tipf', 'clip']
metrics_and_name = metrics + ['name']
std_metrics = [metric + '_std' for metric in metrics]
tip_mappings = {'tip':'T-MCM', 'tipf':'T-MCM-f', 'clip':'MCM'}

In [None]:
diff_bigger = 10
columns_of_relevance = 'ZEROSHOT-TIP ADAPTER'
# get the names were adaption is better than xy points
full_mean = classification_df.groupby('name').mean()
names = full_mean[full_mean[columns_of_relevance] > diff_bigger].index.values


In [None]:
to_safe = working_adapter[metrics+['name']+ std_metrics].set_index('name').round(3)
for metric, std_metric in zip(metrics, std_metrics):
    to_safe[metric] = to_safe[metric].astype(str) + " \pm "+  to_safe[std_metric].astype(str)
to_safe = to_safe.drop(std_metrics, axis=1).sort_index()


In [None]:
save_csv(to_safe.rename(columns=metric_converter).sort_index(), "6", "mcm_tip_tipf")

In [None]:
to_safe.rename(columns=metric_converter)

In [None]:
working_adapter[metrics].mean().round(3)

In [None]:
working_adapter[std_metrics].mean().round(3)

## Use only good adapted ones

In [None]:
diff_bigger = 10
columns_of_relevance = 'ZEROSHOT-TIP-F ADAPTER'
# get the names were adaption is better than xy points
full_mean = classification_df.groupby('name').mean()
names = full_mean[full_mean[columns_of_relevance] > diff_bigger].index.values
print(f"Acc improvement over {diff_bigger}: {names}")

In [None]:
working_adapter.set_index('name').loc[names][metrics].mean().round(3)

In [None]:
working_adapter.set_index('name').loc[names][metrics].mean().round(3)

In [None]:
diff_bigger = 10
columns_of_relevance = 'ZEROSHOT-TIP ADAPTER'
# get the names were adaption is better than xy points
full_mean = classification_df.groupby('name').mean()
names = full_mean[full_mean[columns_of_relevance] > diff_bigger].index.values
print(f"Acc improvement over {diff_bigger}: {names}")

In [None]:
working_adapter.set_index('name').loc[names][metrics].mean().round(3)

In [None]:
working_adapter.set_index('name').loc[names][std_metrics].mean().round(3)

In [None]:
working_adapter[metrics].plot(kind='bar')

## ZOC

In [None]:
working_adapter = adapter_df.copy()
working_adapter['name'] = working_adapter['name'].apply(lambda x: dataset_mapping[x])
working_adapter.set_index('name', inplace=True)
metrics_z = ['zoc', 'toc', 'tocf']
std_metrics_z = [metric + '_std' for metric in metrics_z]
tip_mappings = {'zoc':'ZOC', 'toc':'T-ZOC', 'toc-f':'T-ZOC-f'}
working_adapter[metrics_z].mean().round(3)

In [None]:
working_adapter[std_metrics_z].mean().round(3)

In [None]:
names

In [None]:
working_adapter.loc[names][metrics_z].mean().round(3)

In [None]:
working_adapter.loc[names][std_metrics_z].mean().round(3)

In [None]:
renamer_for_plot = {'zoc':'ZOC', 'toc':'T-ZOC', 'tip':'T-MCM', 'tocf':'T-ZOC-f', 'tipf':'T-MCM-f', 'clip':'MCM'}

In [None]:
barplot_df =  working_adapter.rename(columns=renamer_for_plot).loc[names][['MCM', 'T-MCM', 'T-ZOC']]

In [None]:
barplot_df

In [None]:
figsize

In [None]:
fig, ax = plt.subplots(figsize=(figsize[0], 3))
bar_width = 0.23
pos1 = range(len(barplot_df))
pos2 = [bar_width+pos for pos in pos1]
pos3 = [bar_width+pos for pos in pos2]

ax.bar(pos1, barplot_df['MCM'], width=bar_width, label='MCM')
ax.bar(pos2, barplot_df['T-MCM'], width=bar_width, label='T-MCM (Ours)')
ax.bar(pos3, barplot_df['T-ZOC'], width=bar_width, label='T-ZOC (Ours)')
ax.hlines(0.5, xmin=-.5, xmax=[len(barplot_df)], colors='darkgrey', alpha=0.9, linestyles='--', lw=2, label='Uninformed Guesser')
_ = ax.set_xticks(pos2)
ax.set_xticklabels(barplot_df.index, rotation=45)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=7)
ax.set_ylabel('AUROC')
ax.set_xlabel('DATASET')
plt.tight_layout()

save_plot(fig, 'best_bars','6', False)

# corrs

In [None]:
classification_df.groupby('name').mean().columns

In [None]:
class_and_ood = pd.concat([classification_df.groupby('name').mean()[['ZEROSHOT', 'TIP ADAPTER', 'TIP-F ADAPTER', 'ZEROSHOT-TIP ADAPTER','ZEROSHOT-TIP-F ADAPTER']], working_adapter.rename(columns=renamer_for_plot).drop([col for col in working_adapter.columns if "_std" in col],axis=1)], axis=1)

In [None]:
auroc_metrics = ['T-ZOC-f', 'T-ZOC', 'MCM', 'T-MCM', 'T-MCM-f','ZOC']

In [None]:
class_and_ood.corr()[auroc_metrics]

In [None]:
classification_df.groupby('name').mean()[['ZEROSHOT-TIP ADAPTER', 'ZEROSHOT-TIP-F ADAPTER',
       'TIP ADAPTER-ZEROSHOT', 'TIP ADAPTER-TIP-F ADAPTER',
       'TIP-F ADAPTER-ZEROSHOT']].round(2)

## Benchmark comp

In [None]:
bench = get_history_from_project('thesis-ood-benchmark-logistic', False)


In [None]:
bench_df = bench.copy()
bench_df['name'] = bench_df['name'].apply(lambda x: dataset_mapping[x])

In [None]:
bench_df = bench_df.set_index('name')
bench_df = bench_df.rename(columns={'AUC': 'Benchmark'})

In [None]:
final_full = pd.concat([working_adapter.rename(columns=renamer_for_plot)[['MCM', 'T-MCM','T-MCM-f', 'ZOC', 'T-ZOC', 'T-ZOC-f']], bench_df.drop('std', axis=1)], axis=1)

In [None]:
save_csv(final_full.round(3), folder='6', name='full_oodd_with_benchmark')

In [None]:
final = pd.concat([working_adapter.rename(columns=renamer_for_plot)[['MCM', 'T-MCM', 'T-ZOC']], bench_df.drop('std', axis=1)], axis=1)

In [None]:
final.mean()

In [None]:
final.loc[names]['T-MCM'] - final.loc[names]['T-MCM']

In [None]:
fig, ax = plt.subplots(figsize=(figsize[0], 3))
bar_width = 0.2
pos1 = range(len(final.loc[names]))
pos2 = [bar_width+pos for pos in pos1]
pos3 = [bar_width+pos for pos in pos2]

ax.bar(pos1, final.loc[names]['T-MCM'], width=bar_width, label='T-MCM (Ours)')
ax.bar(pos2, final.loc[names]['T-ZOC'], width=bar_width, label='T-ZOC (Ours)')
ax.bar(pos3, final.loc[names]['Benchmark'], width=bar_width, label='Fine-tuned')

ax.hlines(0.5, xmin=-.5, xmax=[len(final.loc[names])], colors='darkgrey', alpha=0.9, linestyles='--', lw=2, label='Uninformed Guesser')
_ = ax.set_xticks(pos2)
ax.set_xticklabels(final.loc[names].index, rotation=45)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=7)
ax.set_ylabel('AUROC')
ax.set_xlabel('DATASET')
plt.tight_layout()
save_plot(fig, 'benchmark bars','6', False)

In [None]:
final.mean()

## All classification resultos

In [None]:
bench_class_df = get_history_from_project('thesis-classification-logistic_head-large', False)
base_class_df = get_history_from_project('thesis-classification-logistic', False) 

In [None]:
bench_class = bench_class_df.copy()
bench_class['name'] = bench_class['name'].apply(lambda x: dataset_mapping[x])
bench_class = bench_class.set_index('name')
bench_class = bench_class.rename(columns={'test accuracy': 'Logistic - ViT-L/16@336px'})

In [None]:
base_class = base_class_df.copy()
base_class['name'] = base_class['name'].apply(lambda x: dataset_mapping[x])
base_class = base_class.set_index('name')
base_class = base_class.rename(columns={'Acc': 'Logistic - ViT-B/32'})

In [None]:
bench_class

In [None]:
class_metrics = ['TIP ADAPTER', 'TIP-F ADAPTER', 'ZEROSHOT']

In [None]:
full_reg = pd.concat([base_class*100, bench_class*100, classification_df.groupby('name').mean()[class_metrics]], axis=1)

In [None]:
full_reg.sort_index(inplace=True)

In [None]:
fig, ax = plt.subplots(figsize=figsize)
for col in full_reg.columns:
    ax.plot(full_reg.index, full_reg[col], label=col)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=3)
ax.set_xticklabels(full_reg.index, rotation=45, ha='right')

ax.set_ylabel('Accuracy %')
ax.set_xlabel('Dataset')
plt.tight_layout()

save_plot(fig, 'full_classification_line','6', False)
save_csv(full_reg.round(3),folder='6', name="full classification results")