In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy.stats import ttest_rel

In [None]:
split_path = "/opt/gpudata/mimic-cxr/mimic-cxr-2.0.0-split.csv"
split_df = pd.read_csv(split_path)

unique_study_ids = split_df[split_df["split"] == "test"]
print(len(unique_study_ids))

In [None]:
cosine_similarity_biovilt_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/biovilt_base_model/cosine_similarity.csv"
cosine_similarity_biovilt_df = pd.read_csv(cosine_similarity_biovilt_path)

print(len(cosine_similarity_biovilt_df))
print(len(cosine_similarity_biovilt_df["study_id"].unique()))

In [None]:
biovilt_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/biovilt_base_model/top_n_accuracy_resampling.csv"
biovilt_resampling_df = pd.read_csv(biovilt_resampling_path)

In [None]:
biovilt_resampling_df['label'] = biovilt_resampling_df['label'].replace({
        "Enlarged Cardiomediastinum": "Enl Card"
    })

biovilt_resampling_df['embedding_type'] = biovilt_resampling_df['embedding_type'].replace({
        "average_cosine_similarity_max_accuracy": "Study Max, Query Avg",
        "average_cosine_similarity_mean_accuracy": "Study Avg, Query Avg",
        "max_cosine_similarity_max_accuracy": "Study Max, Query Max",
        "max_cosine_similarity_mean_accuracy": "Study Avg, Query Max",
    })

embedding_order = [
    "Study Avg, Query Avg",
    "Study Avg, Query Max", 
    "Study Max, Query Avg", 
    "Study Max, Query Max", 
]

biovilt_resampling_df['embedding_type'] = pd.Categorical(
    biovilt_resampling_df['embedding_type'], 
    categories=embedding_order, 
    ordered=True
)

palette = sns.color_palette("Set2", 8)

colors = {
    "Study Avg, Query Avg": palette[0],
    "Study Avg, Query Max": palette[1],
    "Study Max, Query Avg": palette[6],
    "Study Max, Query Max": palette[2]
}

plt.figure(figsize=(12, 8))

# Get unique labels and embedding types
labels = biovilt_resampling_df['label'].unique()
embedding_types = biovilt_resampling_df['embedding_type'].cat.categories
x = range(len(labels))

# Width of each bar
bar_width = 0.2
# Offset for each embedding type
offsets = [bar_width * i for i in range(len(embedding_types))]

# Plot bars for each embedding type
for i, emb_type in enumerate(embedding_types):
    data_for_emb = biovilt_resampling_df[biovilt_resampling_df['embedding_type'] == emb_type]
    means = data_for_emb['mean']
    ci_lower = data_for_emb['mean'] - data_for_emb['ci_lower']
    ci_upper = data_for_emb['ci_upper'] - data_for_emb['mean']

    plt.bar(
        [p + offsets[i] for p in x], 
        means,
        width=bar_width,
        label=emb_type,
        color=colors[emb_type],
        yerr=[ci_lower, ci_upper],
        capsize=5,
    )

# Set labels, title, and legend
plt.xlabel('Labels')
plt.ylabel('Mean Accuracy')
#plt.title('BioViL-T Base Model Accuracy by Label and Pooling Type with Resampling, Test Set with CheXpert Labels')
plt.xticks([p + bar_width for p in x], labels, rotation=90)
plt.legend(title='Pooling Types')

plt.ylim(0, 0.8)

# Save the plot
plt.tight_layout()
plt.savefig('/opt/gpudata/imadejski/search-model/ctds-search-model/data/figures/biovilt_base_model_accuracy_resampling_mimic_test_chexpert.png', dpi=300)
plt.show()

In [None]:
biovilt_all_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/biovilt_base_model/top_n_accuracy_all_resampling.csv"
biovilt_all_resampling_df = pd.read_csv(biovilt_all_resampling_path)

In [None]:
results = []

# Get unique labels and embedding types
labels = biovilt_all_resampling_df['label'].unique()
embedding_types = biovilt_all_resampling_df['embedding_type'].unique()

combinations =  (("max_cosine_similarity_max_accuracy", "average_cosine_similarity_max_accuracy"),
                ("max_cosine_similarity_max_accuracy", "max_cosine_similarity_mean_accuracy"),
                ("max_cosine_similarity_max_accuracy", "average_cosine_similarity_mean_accuracy"),
                ("average_cosine_similarity_max_accuracy", "max_cosine_similarity_mean_accuracy"),
                ("average_cosine_similarity_max_accuracy", "average_cosine_similarity_mean_accuracy"),
                ("max_cosine_similarity_mean_accuracy", "average_cosine_similarity_mean_accuracy")
)

# Perform t-tests for each label
for label in labels:
    # Filter the data for the current label
    label_data = biovilt_all_resampling_df[biovilt_all_resampling_df['label'] == label]

    # Generate all combinations of embedding types for pairwise comparison
    for emb_type1, emb_type2 in combinations:
        # Filter the data for each embedding type
        data1 = label_data[label_data['embedding_type'] == emb_type1]['value']
        data2 = label_data[label_data['embedding_type'] == emb_type2]['value']

        # Perform the t-test
        t_stat, p_value = ttest_rel(data1, data2, nan_policy='raise')

        # Store the result
        results.append({
            'Label': label,
            'Embedding Type 1': emb_type1,
            'Embedding Type 2': emb_type2,
            'T-Statistic': t_stat,
            'P-Value': p_value,
            'Bonferroni Corrected P-Value': p_value/6
        })

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)
results_df.to_csv('/opt/gpudata/imadejski/search-model/ctds-search-model/data/figures/biovilt_base_pooling_type_t_test_chexpert.csv')

In [None]:
def determine_winner(row):
    if row['Bonferroni Corrected P-Value'] < 0.05:
        if row['T-Statistic'] > 0:
            return row['Embedding Type 1']
        elif row['T-Statistic'] < 0:
            return row['Embedding Type 2']
    return None

# Apply the function to determine winners for each row
results_df['Winner'] = results_df.apply(determine_winner, axis=1)

# Count the number of wins for each embedding type
win_counts = results_df['Winner'].value_counts()

# Display the win counts
print(win_counts)

In [None]:
biovilt_true_igl_tgl_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/param_search_v3_biovilt/model_run_0_True_igl_tgl/model_top_n_accuracy_results_resampling.csv"
biovilt_true_igl_tgl_resampling_df = pd.read_csv(biovilt_true_igl_tgl_resampling_path)

biovilt_true_igl_tg_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/param_search_v3_biovilt/model_run_1_True_igl_tg/model_top_n_accuracy_results_resampling.csv"
biovilt_true_igl_tg_resampling_df = pd.read_csv(biovilt_true_igl_tg_resampling_path)

biovilt_true_ig_tgl_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/param_search_v3_biovilt/model_run_2_True_ig_tgl/model_top_n_accuracy_results_resampling.csv"
biovilt_true_ig_tgl_resampling_df = pd.read_csv(biovilt_true_ig_tgl_resampling_path)

biovilt_true_ig_tg_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/param_search_v3_biovilt/model_run_3_True_ig_tg/model_top_n_accuracy_results_resampling.csv"
biovilt_true_ig_tg_resampling_df = pd.read_csv(biovilt_true_ig_tg_resampling_path)

In [None]:
biovilt_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/biovilt_base_model/top_n_accuracy_resampling.csv"
biovilt_resampling_df = pd.read_csv(biovilt_resampling_path)

# Concatenate the dataframes into one, adding a column to indicate the model
biovilt_resampling_df['model'] = 'Base'
biovilt_true_ig_tg_resampling_df['model'] = 'IG_TG'
biovilt_true_igl_tg_resampling_df['model'] = 'IGL_TG'
biovilt_true_ig_tgl_resampling_df['model'] = 'IG_TGL'
biovilt_true_igl_tgl_resampling_df['model'] = 'IGL_TGL'

models_df = pd.concat([biovilt_resampling_df, 
                biovilt_true_ig_tg_resampling_df,
                biovilt_true_igl_tg_resampling_df, 
                biovilt_true_ig_tgl_resampling_df, 
                biovilt_true_igl_tgl_resampling_df], 
                ignore_index=True)

palette = sns.color_palette("Set2", 8)

colors = {
    "Base": palette[0],
    "IG_TG": palette[4],
    "IGL_TG": palette[3],
    "IG_TGL": palette[7],
    "IGL_TGL": palette[5],
}

# Filter for rows where embedding_type is 'average_cosine_similarity_mean_accuracy'
filtered_df = models_df[models_df['embedding_type'] == 'average_cosine_similarity_mean_accuracy']

filtered_df['label'] = filtered_df['label'].replace({
        "Enlarged Cardiomediastinum": "Enl Card"
    })

labels = filtered_df['label'].unique()
models = filtered_df['model'].unique()

# Create the bar plot
plt.figure(figsize=(12, 8))
bar_width = 0.15
positions = range(len(labels))

# Plot bars for each model within each label group
for idx, model in enumerate(models):
    model_data = filtered_df[filtered_df['model'] == model]
    means = model_data['mean']
    ci_lower = model_data['mean'] - model_data['ci_lower']
    ci_upper = model_data['ci_upper'] - model_data['mean']
    plt.bar(
        [p + bar_width * idx for p in positions],
        means,
        width=bar_width,
        color=colors[model],
        yerr=[ci_lower, ci_upper], 
        capsize=3.75,
        label=model
    )

# Add labels and title
plt.xlabel('Labels')
plt.ylabel('Mean Accuracy')
#plt.title('Mean Accuracy for Fine-Tuned Models by Label with Resampling, Test Set with CheXpert Labels')
plt.xticks([p + bar_width for p in x], labels, rotation=90)
plt.ylim(0, 0.8)

# Add legend
plt.legend(title='Models', bbox_to_anchor=(0, 1), loc='upper left')

# Adjust layout to make room for rotated x labels
plt.tight_layout()

# Show the plot
plt.savefig('/opt/gpudata/imadejski/search-model/ctds-search-model/data/figures/mean_accuracy_fine_tuned_models_resampling_mimic_test_chexpert.png', dpi=300)
plt.show()


In [None]:
biovilt_true_igl_tgl_all_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/param_search_v3_biovilt/model_run_0_True_igl_tgl/model_top_n_accuracy_results_all_resampling.csv"
biovilt_true_igl_tgl_all_resampling_df = pd.read_csv(biovilt_true_igl_tgl_all_resampling_path)

biovilt_true_igl_tg_all_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/param_search_v3_biovilt/model_run_1_True_igl_tg/model_top_n_accuracy_results_all_resampling.csv"
biovilt_true_igl_tg_all_resampling_df = pd.read_csv(biovilt_true_igl_tg_all_resampling_path)

biovilt_true_ig_tgl_all_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/param_search_v3_biovilt/model_run_2_True_ig_tgl/model_top_n_accuracy_results_all_resampling.csv"
biovilt_true_ig_tgl_all_resampling_df = pd.read_csv(biovilt_true_ig_tgl_all_resampling_path)

biovilt_true_ig_tg_all_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/param_search_v3_biovilt/model_run_3_True_ig_tg/model_top_n_accuracy_results_all_resampling.csv"
biovilt_true_ig_tg_all_resampling_df = pd.read_csv(biovilt_true_ig_tg_all_resampling_path)

In [None]:
results = []

biovilt_all_resampling_path = "/opt/gpudata/imadejski/search-model/ctds-search-model/data/mimic_test/biovilt_base_model/top_n_accuracy_all_resampling.csv"
biovilt_all_resampling_df = pd.read_csv(biovilt_all_resampling_path)

# Get unique labels and embedding types
labels = biovilt_all_resampling_df['label'].unique()

biovilt_all_resampling_df['model'] = 'Base'
biovilt_true_ig_tg_all_resampling_df['model'] = 'IG_TG'
biovilt_true_igl_tg_all_resampling_df['model'] = 'IGL_TG'
biovilt_true_ig_tgl_all_resampling_df['model'] = 'IG_TGL'
biovilt_true_igl_tgl_all_resampling_df['model'] = 'IGL_TGL'

combinations = (('Base', 'IG_TG'),
    ('Base', 'IGL_TG'),
    ('Base', 'IG_TGL'),
    ('Base', 'IGL_TGL'),
    ('IG_TG', 'IGL_TG'),
    ('IG_TG', 'IG_TGL'),
    ('IG_TG', 'IGL_TGL'),
    ('IGL_TG', 'IG_TGL'),
    ('IGL_TG', 'IGL_TGL'),
    ('IG_TGL', 'IGL_TGL')
)

models_all_df = pd.concat([biovilt_all_resampling_df, 
                biovilt_true_ig_tg_all_resampling_df,
                biovilt_true_igl_tg_all_resampling_df, 
                biovilt_true_ig_tgl_all_resampling_df, 
                biovilt_true_igl_tgl_all_resampling_df], 
                ignore_index=True)

# Filter for rows where embedding_type is 'average_cosine_similarity_mean_accuracy'
filtered_all_df = models_all_df[models_all_df['embedding_type'] == 'average_cosine_similarity_mean_accuracy']


# Perform t-tests for each label
for label in labels:
    # Filter the data for the current label
    label_data = filtered_all_df[filtered_all_df['label'] == label]

    # Generate all combinations of embedding types for pairwise comparison
    for model1, model2 in combinations:
        # Filter the data for each embedding type
        data1 = label_data[label_data['model'] == model1]['value']
        data2 = label_data[label_data['model'] == model2]['value']

        # Perform the t-test
        t_stat, p_value = ttest_rel(data1, data2, nan_policy='raise')

        # Store the result
        results.append({
            'Label': label,
            'Model Type 1': model1,
            'Model Type 2': model2,
            'T-Statistic': t_stat,
            'P-Value': p_value,
            'Bonferroni Corrected P-Value': p_value/10
        })

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)
results_df.to_csv('/opt/gpudata/imadejski/search-model/ctds-search-model/data/figures/fine_tuned_model_type_t_test_chexpert.csv')

In [None]:
def determine_winner(row):
    if row['Bonferroni Corrected P-Value'] < 0.05:
        if row['T-Statistic'] > 0:
            return row['Model Type 1']
        elif row['T-Statistic'] < 0:
            return row['Model Type 2']
    return None

# Apply the function to determine winners for each row
results_df['Winner'] = results_df.apply(determine_winner, axis=1)

# Count the number of wins for each embedding type
win_counts = results_df['Winner'].value_counts()

# Display the win counts
print(win_counts)

In [None]:
results = []

filtered_all_df = models_all_df[models_all_df['embedding_type'] == 'average_cosine_similarity_mean_accuracy']

comparisons = [('Base', 'IG_TG'),
               ('IGL_TG', 'IG_TG'),
               ('IG_TGL', 'IG_TG'),
               ('IGL_TGL', 'IG_TG')]

for label in labels:
    label_data = filtered_all_df[filtered_all_df['label'] == label]

    for model1, model2 in comparisons:
        data1 = label_data[label_data['model'] == model1]['value']
        data2 = label_data[label_data['model'] == model2]['value']

        t_stat, p_value = ttest_rel(data1, data2, nan_policy='raise')

        results.append({
            'Label': label,
            'Model Type 1': model1,
            'Model Type 2': model2,
            'T-Statistic': t_stat,
            'P-Value': p_value,
            'Bonferroni Corrected P-Value': p_value / 4  # Correction for 4 tests
        })

results_df = pd.DataFrame(results)

In [None]:
results_df['Winner'] = results_df.apply(determine_winner, axis=1)
win_counts = results_df['Winner'].value_counts()
print(win_counts)

In [None]:
pivot_df = filtered_df.pivot(index='label', columns='model', values='mean')

# Calculate the difference in accuracy between IG_TG and other models
pivot_df['Base_vs_IG_TG'] = pivot_df['Base'] - pivot_df['IG_TG']
pivot_df['IGL_TG_vs_IG_TG'] = pivot_df['IGL_TG'] - pivot_df['IG_TG']
pivot_df['IG_TGL_vs_IG_TG'] = pivot_df['IG_TGL'] - pivot_df['IG_TG']
pivot_df['IGL_TGL_vs_IG_TG'] = pivot_df['IGL_TGL'] - pivot_df['IG_TG']

# Select the columns for the differences
diff_df = pivot_df[['Base_vs_IG_TG', 'IGL_TG_vs_IG_TG', 'IG_TGL_vs_IG_TG', 'IGL_TGL_vs_IG_TG']]
diff_df.to_csv('/opt/gpudata/imadejski/search-model/ctds-search-model/data/figures/fine_tuned_model_ig_tg_mean_diff_chexpert.csv')

In [None]:
print(diff_df)

In [None]:
# Generate a custom diverging color palette: orange for negative, blue for positive
cmap = sns.diverging_palette(20, 220, as_cmap=True)

# Define the min and max for the color bar
vmin, vmax = -0.275, 0.275

# Create the heatmap
plt.figure(figsize=(10, 8))
ax = sns.heatmap(diff_df, cmap=cmap, center=0, annot=True, fmt=".3f", linewidths=.5, 
                 vmin=vmin, vmax=vmax, cbar_kws={'label': 'Accuracy Difference'})

# Customize the color bar
cbar = ax.collections[0].colorbar  # Access the color bar

# Set ticks to be every 0.05 between -0.25 and 0.25
ticks = np.arange(-0.25, 0.30, 0.05)
cbar.set_ticks(ticks)

# Add labels and title
#plt.title('Model Accuracy Differences Relative to IG_TG Fine-Tuned Model with Resampling, \n Test Set with CheXpert Labels')
plt.xlabel('Comparison')
plt.ylabel('Labels')

# Show the heatmap
plt.tight_layout()
plt.savefig('/opt/gpudata/imadejski/search-model/ctds-search-model/data/figures/accuracy_dff_vs_igtg_resampling_mimic_test_chexpert.png', dpi=300)
plt.show()

In [None]:
# Generate a custom diverging color palette: orange for negative, blue for positive
cmap = sns.diverging_palette(20, 220, as_cmap=True)

# Define the min and max for the color bar
vmin, vmax = -0.275, 0.275

diff_df_t = diff_df.T

# Create the heatmap
plt.figure(figsize=(10, 8))
ax = sns.heatmap(diff_df_t, cmap=cmap, center=0, annot=True, fmt=".3f", linewidths=.5, 
                 vmin=vmin, vmax=vmax, cbar_kws={'label': 'Accuracy Difference'})

# Customize the color bar
cbar = ax.collections[0].colorbar  # Access the color bar

# Set ticks to be every 0.05 between -0.25 and 0.25
ticks = np.arange(-0.25, 0.30, 0.05)
cbar.set_ticks(ticks)

# Add labels and title
#plt.title('Model Accuracy Differences Relative to IG_TG Fine-Tuned Model with Resampling, \n Test Set with CheXpert Labels')
plt.xlabel('Comparison')
plt.ylabel('Labels')

# Show the heatmap
plt.tight_layout()
plt.savefig('/opt/gpudata/imadejski/search-model/ctds-search-model/data/figures/accuracy_dff_vs_igtg_resampling_mimic_test_chexpert_flipped.png', dpi=300)
plt.show()