In [18]:
import pandas as pd
from tabulate import tabulate


def format_mean_std(row, metric):
    mean_val = row[(metric, 'mean')]
    std_val = row[(metric, 'std')]
    return f"{mean_val:.3f} ± {std_val:.3f}"


def get_stat_df(filename, verbose=True):
    # Sample loading of your CSV - this would be replaced with your actual loading
    result_df = pd.read_csv(filename)

    metrics = ['mcc', 'micro_auprc',  'sensitivity', 'precision']
    grouped = result_df.groupby('model_key').agg({
        k: ['mean', 'std'] for k in metrics
    })

    record_counts = result_df.groupby('model_key').size()

    for metric in metrics:
        grouped[f'{metric}_formatted'] = grouped.apply(
            lambda row: format_mean_std(row, metric), axis=1)

    # Add the record count column to the grouped dataframe
    grouped['record_count'] = record_counts.values

    formatted_df = grouped[[
        k + '_formatted' for k in metrics] + ['record_count']]
    formatted_df.columns = metrics + ['record_count']
    
    order_model_keys = [
        'esm-t33',
        'esm-t33-pretrained',
        'esm-t33-gearnet',
        'esm-t33-gearnet-pretrained',
        'esm-t33-gearnet-adaboost-r10',
        'esm-t33-gearnet-pretrained-adaboost-r10',
        'esm-t33-gearnet-resiboost-r10',
        'esm-t33-gearnet-pretrained-resiboost-r10',
    ]
    

    formatted_df = formatted_df.reset_index()
    formatted_df = formatted_df.sort_values(metrics[0], ascending=True)

    formatted_df['model_key'] = pd.Categorical(formatted_df['model_key'], categories=order_model_keys, ordered=True)
    formatted_df = formatted_df.sort_values('model_key')




    if verbose:
        markdown_table = tabulate(formatted_df[[
                                  'model_key'] + metrics], headers='keys', tablefmt='pipe', showindex=False)
        print(markdown_table)

    return formatted_df


get_stat_df('imatinib_stats.csv')

| model_key                                | mcc           | micro_auprc   | sensitivity   | precision     |
|:-----------------------------------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33                                  | 0.612 ± 0.026 | 0.511 ± 0.021 | 0.594 ± 0.032 | 0.678 ± 0.052 |
| esm-t33-pretrained                       | 0.579 ± 0.033 | 0.502 ± 0.017 | 0.548 ± 0.068 | 0.663 ± 0.026 |
| esm-t33-gearnet                          | 0.548 ± 0.077 | 0.581 ± 0.027 | 0.513 ± 0.046 | 0.656 ± 0.172 |
| esm-t33-gearnet-pretrained               | 0.627 ± 0.020 | 0.602 ± 0.036 | 0.533 ± 0.035 | 0.782 ± 0.063 |
| esm-t33-gearnet-adaboost-r10             | 0.589 ± 0.018 | 0.577 ± 0.022 | 0.478 ± 0.023 | 0.773 ± 0.036 |
| esm-t33-gearnet-pretrained-adaboost-r10  | 0.609 ± 0.060 | 0.612 ± 0.040 | 0.507 ± 0.120 | 0.789 ± 0.068 |
| esm-t33-gearnet-resiboost-r10            | 0.589 ± 0.045 | 0.622 ± 0.021 | 0.600 ± 0.045 | 0.634 ± 0.109 |
| esm-t33-gearnet-p

Unnamed: 0,model_key,mcc,micro_auprc,sensitivity,precision,record_count
0,esm-t33,0.612 ± 0.026,0.511 ± 0.021,0.594 ± 0.032,0.678 ± 0.052,5
7,esm-t33-pretrained,0.579 ± 0.033,0.502 ± 0.017,0.548 ± 0.068,0.663 ± 0.026,5
1,esm-t33-gearnet,0.548 ± 0.077,0.581 ± 0.027,0.513 ± 0.046,0.656 ± 0.172,5
3,esm-t33-gearnet-pretrained,0.627 ± 0.020,0.602 ± 0.036,0.533 ± 0.035,0.782 ± 0.063,5
2,esm-t33-gearnet-adaboost-r10,0.589 ± 0.018,0.577 ± 0.022,0.478 ± 0.023,0.773 ± 0.036,5
4,esm-t33-gearnet-pretrained-adaboost-r10,0.609 ± 0.060,0.612 ± 0.040,0.507 ± 0.120,0.789 ± 0.068,5
6,esm-t33-gearnet-resiboost-r10,0.589 ± 0.045,0.622 ± 0.021,0.600 ± 0.045,0.634 ± 0.109,5
5,esm-t33-gearnet-pretrained-resiboost-r10,0.622 ± 0.013,0.643 ± 0.024,0.600 ± 0.019,0.690 ± 0.038,5


In [19]:
get_stat_df('dasatinib_stats.csv')

| model_key                                | mcc           | micro_auprc   | sensitivity   | precision     |
|:-----------------------------------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33                                  | 0.561 ± 0.030 | 0.624 ± 0.050 | 0.515 ± 0.074 | 0.669 ± 0.070 |
| esm-t33-pretrained                       | 0.584 ± 0.028 | 0.617 ± 0.035 | 0.576 ± 0.056 | 0.647 ± 0.090 |
| esm-t33-gearnet                          | 0.588 ± 0.076 | 0.612 ± 0.093 | 0.508 ± 0.083 | 0.730 ± 0.068 |
| esm-t33-gearnet-pretrained               | 0.562 ± 0.076 | 0.628 ± 0.045 | 0.393 ± 0.119 | 0.876 ± 0.093 |
| esm-t33-gearnet-adaboost-r10             | 0.554 ± 0.065 | 0.607 ± 0.072 | 0.420 ± 0.065 | 0.786 ± 0.112 |
| esm-t33-gearnet-pretrained-adaboost-r10  | 0.586 ± 0.087 | 0.655 ± 0.012 | 0.444 ± 0.152 | 0.854 ± 0.095 |
| esm-t33-gearnet-resiboost-r10            | 0.605 ± 0.043 | 0.674 ± 0.052 | 0.627 ± 0.077 | 0.644 ± 0.121 |
| esm-t33-gearnet-p

Unnamed: 0,model_key,mcc,micro_auprc,sensitivity,precision,record_count
0,esm-t33,0.561 ± 0.030,0.624 ± 0.050,0.515 ± 0.074,0.669 ± 0.070,5
7,esm-t33-pretrained,0.584 ± 0.028,0.617 ± 0.035,0.576 ± 0.056,0.647 ± 0.090,5
1,esm-t33-gearnet,0.588 ± 0.076,0.612 ± 0.093,0.508 ± 0.083,0.730 ± 0.068,5
3,esm-t33-gearnet-pretrained,0.562 ± 0.076,0.628 ± 0.045,0.393 ± 0.119,0.876 ± 0.093,5
2,esm-t33-gearnet-adaboost-r10,0.554 ± 0.065,0.607 ± 0.072,0.420 ± 0.065,0.786 ± 0.112,5
4,esm-t33-gearnet-pretrained-adaboost-r10,0.586 ± 0.087,0.655 ± 0.012,0.444 ± 0.152,0.854 ± 0.095,5
6,esm-t33-gearnet-resiboost-r10,0.605 ± 0.043,0.674 ± 0.052,0.627 ± 0.077,0.644 ± 0.121,5
5,esm-t33-gearnet-pretrained-resiboost-r10,0.618 ± 0.036,0.709 ± 0.030,0.647 ± 0.066,0.645 ± 0.108,5


In [20]:
get_stat_df('bosutinib_stats.csv')

| model_key                                | mcc           | micro_auprc   | sensitivity   | precision     |
|:-----------------------------------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33                                  | 0.745 ± 0.030 | 0.852 ± 0.020 | 0.667 ± 0.059 | 0.871 ± 0.059 |
| esm-t33-pretrained                       | 0.754 ± 0.047 | 0.832 ± 0.040 | 0.660 ± 0.084 | 0.897 ± 0.014 |
| esm-t33-gearnet                          | 0.717 ± 0.027 | 0.811 ± 0.034 | 0.710 ± 0.043 | 0.768 ± 0.082 |
| esm-t33-gearnet-pretrained               | 0.735 ± 0.019 | 0.830 ± 0.010 | 0.677 ± 0.058 | 0.840 ± 0.062 |
| esm-t33-gearnet-adaboost-r10             | 0.643 ± 0.150 | 0.694 ± 0.159 | 0.563 ± 0.167 | 0.787 ± 0.105 |
| esm-t33-gearnet-pretrained-adaboost-r10  | 0.729 ± 0.043 | 0.793 ± 0.031 | 0.647 ± 0.077 | 0.863 ± 0.052 |
| esm-t33-gearnet-resiboost-r10            | 0.730 ± 0.039 | 0.818 ± 0.031 | 0.817 ± 0.066 | 0.695 ± 0.082 |
| esm-t33-gearnet-p

Unnamed: 0,model_key,mcc,micro_auprc,sensitivity,precision,record_count
0,esm-t33,0.745 ± 0.030,0.852 ± 0.020,0.667 ± 0.059,0.871 ± 0.059,5
7,esm-t33-pretrained,0.754 ± 0.047,0.832 ± 0.040,0.660 ± 0.084,0.897 ± 0.014,5
1,esm-t33-gearnet,0.717 ± 0.027,0.811 ± 0.034,0.710 ± 0.043,0.768 ± 0.082,5
3,esm-t33-gearnet-pretrained,0.735 ± 0.019,0.830 ± 0.010,0.677 ± 0.058,0.840 ± 0.062,5
2,esm-t33-gearnet-adaboost-r10,0.643 ± 0.150,0.694 ± 0.159,0.563 ± 0.167,0.787 ± 0.105,5
4,esm-t33-gearnet-pretrained-adaboost-r10,0.729 ± 0.043,0.793 ± 0.031,0.647 ± 0.077,0.863 ± 0.052,5
6,esm-t33-gearnet-resiboost-r10,0.730 ± 0.039,0.818 ± 0.031,0.817 ± 0.066,0.695 ± 0.082,5
5,esm-t33-gearnet-pretrained-resiboost-r10,0.777 ± 0.025,0.854 ± 0.019,0.777 ± 0.073,0.815 ± 0.091,5


In [21]:
def get_stat_df_combined(metrics=['mcc'], verbose=True):
    df_imatinib = get_stat_df('imatinib_stats.csv', verbose=False)
    df_dasatinib = get_stat_df('dasatinib_stats.csv', verbose=False)
    df_bosutinib = get_stat_df('bosutinib_stats.csv', verbose=False)

    df_imatinib = df_imatinib.set_index('model_key')
    df_dasatinib = df_dasatinib.set_index('model_key')
    df_bosutinib = df_bosutinib.set_index('model_key')

    df_combined = pd.concat([df_imatinib, df_dasatinib, df_bosutinib], axis=1, keys=['imatinib', 'dasatinib', 'bosutinib'])
    df_combined = df_combined[[(drug, met) for drug in ['imatinib', 'dasatinib', 'bosutinib'] for met in metrics]]
    
    if verbose:
        markdown_table = tabulate(df_combined, headers='keys', tablefmt='pipe', showindex=True)
        print(markdown_table)

    return df_combined


get_stat_df_combined(metrics=['mcc', 'micro_auprc'])

| model_key                                | ('imatinib', 'mcc')   | ('imatinib', 'micro_auprc')   | ('dasatinib', 'mcc')   | ('dasatinib', 'micro_auprc')   | ('bosutinib', 'mcc')   | ('bosutinib', 'micro_auprc')   |
|:-----------------------------------------|:----------------------|:------------------------------|:-----------------------|:-------------------------------|:-----------------------|:-------------------------------|
| esm-t33                                  | 0.612 ± 0.026         | 0.511 ± 0.021                 | 0.561 ± 0.030          | 0.624 ± 0.050                  | 0.745 ± 0.030          | 0.852 ± 0.020                  |
| esm-t33-pretrained                       | 0.579 ± 0.033         | 0.502 ± 0.017                 | 0.584 ± 0.028          | 0.617 ± 0.035                  | 0.754 ± 0.047          | 0.832 ± 0.040                  |
| esm-t33-gearnet                          | 0.548 ± 0.077         | 0.581 ± 0.027                 | 0.588 ± 0.076          | 0.612 

Unnamed: 0_level_0,imatinib,imatinib,dasatinib,dasatinib,bosutinib,bosutinib
Unnamed: 0_level_1,mcc,micro_auprc,mcc,micro_auprc,mcc,micro_auprc
model_key,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
esm-t33,0.612 ± 0.026,0.511 ± 0.021,0.561 ± 0.030,0.624 ± 0.050,0.745 ± 0.030,0.852 ± 0.020
esm-t33-pretrained,0.579 ± 0.033,0.502 ± 0.017,0.584 ± 0.028,0.617 ± 0.035,0.754 ± 0.047,0.832 ± 0.040
esm-t33-gearnet,0.548 ± 0.077,0.581 ± 0.027,0.588 ± 0.076,0.612 ± 0.093,0.717 ± 0.027,0.811 ± 0.034
esm-t33-gearnet-pretrained,0.627 ± 0.020,0.602 ± 0.036,0.562 ± 0.076,0.628 ± 0.045,0.735 ± 0.019,0.830 ± 0.010
esm-t33-gearnet-adaboost-r10,0.589 ± 0.018,0.577 ± 0.022,0.554 ± 0.065,0.607 ± 0.072,0.643 ± 0.150,0.694 ± 0.159
esm-t33-gearnet-pretrained-adaboost-r10,0.609 ± 0.060,0.612 ± 0.040,0.586 ± 0.087,0.655 ± 0.012,0.729 ± 0.043,0.793 ± 0.031
esm-t33-gearnet-resiboost-r10,0.589 ± 0.045,0.622 ± 0.021,0.605 ± 0.043,0.674 ± 0.052,0.730 ± 0.039,0.818 ± 0.031
esm-t33-gearnet-pretrained-resiboost-r10,0.622 ± 0.013,0.643 ± 0.024,0.618 ± 0.036,0.709 ± 0.030,0.777 ± 0.025,0.854 ± 0.019
