In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
def load_data(file_paths):
    data_frames = []
    for path in file_paths:
        df = pd.read_csv(path, index_col=0)
        data_frames.append(df)
    return data_frames

In [None]:
def prepare_data(data_frames, model_names):
    combined_df = pd.concat(data_frames, keys=model_names, names=['Model', 'Label'])
    combined_df = combined_df.reset_index().melt(id_vars=['Model', 'Label'], var_name='Pooling Method', value_name='Accuracy')
    return combined_df

In [None]:
def plot_accuracy(data):
    pooling_methods = data['Pooling Method'].unique()
    
    # Determine the global min and max for 'Accuracy'
    y_min = data['Accuracy'].min()
    y_max = data['Accuracy'].max()
    
    for pooling_method in pooling_methods:
        # Skip the specified pooling methods
        if pooling_method in ["average_embedding_max_accuracy", "average_embedding_mean_accuracy"]:
            continue

        plot_data = data[data['Pooling Method'] == pooling_method].copy()
        plot_data['Label'] = plot_data['Label'].replace('Enlarged Cardiomediastinum', 'Enl Card')

        if pooling_method == "average_cosine_similarity_max_accuracy":
            pooling_label = "Avg Similarity/Study Max"
        elif pooling_method == "average_cosine_similarity_mean_accuracy":
            pooling_label = "Avg Similarity/Study Avg"
        elif pooling_method == "max_cosine_similarity_max_accuracy":
            pooling_label = "Max Similarity/Study Max"
        elif pooling_method == "max_cosine_similarity_mean_accuracy":
            pooling_label = "Max Similarity/Study Avg"

        plt.figure(figsize=(12, 8))
        sns.barplot(x='Label', y='Accuracy', hue='Model', data=plot_data, errorbar=None)
        plt.ylim(y_min, y_max)  # Set the y-axis limits
        plt.title(f'Model Accuracy for Pooling Method: {pooling_label}')
        plt.ylabel('Accuracy')
        plt.xlabel('Label')
        plt.xticks(rotation=90)
        plt.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.show()

In [None]:
# File paths to CSV files and corresponding model names
csv_paths = ["/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_validate_biovil_top_n_accuracy.csv",
            "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_validate_biovilt_top_n_accuracy.csv", 
            "/opt/gpudata/imadejski/search-model/ctds-search-model/data/param_search_v3_biovilt/model_run_3_True_ig_tg/model_top_n_accuracy_results.csv",
            "/opt/gpudata/imadejski/search-model/ctds-search-model/data/param_search_v3_biovilt/model_run_1_True_igl_tg/model_top_n_accuracy_results.csv", 
            "/opt/gpudata/imadejski/search-model/ctds-search-model/data/param_search_v3_biovilt/model_run_2_True_ig_tgl/model_top_n_accuracy_results.csv",
            "/opt/gpudata/imadejski/search-model/ctds-search-model/data/param_search_v3_biovilt/model_run_0_True_igl_tgl/model_top_n_accuracy_results.csv",
            "/opt/gpudata/imadejski/search-model/ctds-search-model/data/param_search_v3_biovilt/model_run_7_False_ig_tg/model_top_n_accuracy_results.csv",
            "/opt/gpudata/imadejski/search-model/ctds-search-model/data/param_search_v3_biovilt/model_run_5_False_igl_tg/model_top_n_accuracy_results.csv",
            "/opt/gpudata/imadejski/search-model/ctds-search-model/data/param_search_v3_biovilt/model_run_6_False_ig_tgl/model_top_n_accuracy_results.csv",
            "/opt/gpudata/imadejski/search-model/ctds-search-model/data/param_search_v3_biovilt/model_run_4_False_igl_tgl/model_top_n_accuracy_results.csv"
            ]
model_names = ["BioVIL", "BioVIL-T", "True, IG_TG", "True, IGL_TG", "True, IG_TGL", "True, IGL_TGL", "False, IG_TG", "False, IGL_TG", "False, IG_TGL", "False, IGL_TGL"]

# Loading and preparing the data
data_frames = load_data(csv_paths)
data = prepare_data(data_frames, model_names)

# Plotting the accuracy data
plot_accuracy(data)