In [1]:
import pandas as pd
from tabulate import tabulate
def format_mean_std(row, metric):
    mean_val = row[(metric, 'mean')]
    std_val = row[(metric, 'std')]
    return f"{mean_val:.3f} ± {std_val:.3f}"


def get_stat_df(result_df, verbose=True, metrics=['mcc', 'micro_auprc', 'micro_auroc',
               'sensitivity', 'precision']):
    # Sample loading of your CSV - this would be replaced with your actual loading

    grouped = result_df.groupby('model_key').agg({
        k: ['mean', 'std'] for k in metrics
    })

    record_counts = result_df.groupby('model_key').size()

    for metric in metrics:
        grouped[f'{metric}_formatted'] = grouped.apply(
            lambda row: format_mean_std(row, metric), axis=1)

    # Add the record count column to the grouped dataframe
    grouped['record_count'] = record_counts.values

    formatted_df = grouped[[
        k + '_formatted' for k in metrics] + ['record_count']]
    formatted_df.columns = metrics + ['record_count']
    formatted_df = formatted_df.sort_values(metrics[0], ascending=True)

    formatted_df = formatted_df.reset_index()

    if verbose:
        markdown_table = tabulate(formatted_df[[
                                  'model_key'] + metrics], headers='keys', tablefmt='pipe', showindex=False)
        print(markdown_table)

    return formatted_df

In [2]:
import pandas as pd

df = pd.read_csv('atpbind3d_stats.csv')

df_gn = df[
    # ((df['model_key'].str.startswith('esm-t33-gearnet-640')) |
    (df['model_key'].str.startswith('esm-t33-gearnet'))
    # (df['model_key'] == 'esm-t33-gearnet') | 
    # (df['model_key'] == 'esm-t33')) & 
]

get_stat_df(df_gn, metrics=['mcc','micro_auprc',   'micro_auroc',
               'sensitivity', 'precision', ])

| model_key                           | mcc           | micro_auprc   | micro_auroc   | sensitivity   | precision     |
|:------------------------------------|:--------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33-gearnet-640-2               | 0.671 ± 0.009 | 0.683 ± 0.013 | 0.923 ± 0.008 | 0.622 ± 0.023 | 0.753 ± 0.021 |
| esm-t33-gearnet                     | 0.671 ± 0.011 | 0.690 ± 0.009 | 0.925 ± 0.007 | 0.628 ± 0.022 | 0.748 ± 0.015 |
| esm-t33-gearnet-320-4               | 0.672 ± 0.008 | 0.683 ± 0.004 | 0.920 ± 0.006 | 0.623 ± 0.024 | 0.755 ± 0.015 |
| esm-t33-gearnet-800-4               | 0.672 ± 0.011 | 0.694 ± 0.018 | 0.932 ± 0.009 | 0.640 ± 0.033 | 0.736 ± 0.027 |
| esm-t33-gearnet-800-3               | 0.672 ± 0.016 | 0.685 ± 0.014 | 0.925 ± 0.008 | 0.623 ± 0.024 | 0.753 ± 0.022 |
| esm-t33-gearnet-960-2               | 0.676 ± 0.013 | 0.691 ± 0.015 | 0.926 ± 0.010 | 0.631 ± 0.014 | 0.753 ± 0.023 |
| esm-t33-gearnet-960-1               | 

Unnamed: 0,model_key,mcc,micro_auprc,micro_auroc,sensitivity,precision,record_count
0,esm-t33-gearnet-640-2,0.671 ± 0.009,0.683 ± 0.013,0.923 ± 0.008,0.622 ± 0.023,0.753 ± 0.021,10
1,esm-t33-gearnet,0.671 ± 0.011,0.690 ± 0.009,0.925 ± 0.007,0.628 ± 0.022,0.748 ± 0.015,5
2,esm-t33-gearnet-320-4,0.672 ± 0.008,0.683 ± 0.004,0.920 ± 0.006,0.623 ± 0.024,0.755 ± 0.015,5
3,esm-t33-gearnet-800-4,0.672 ± 0.011,0.694 ± 0.018,0.932 ± 0.009,0.640 ± 0.033,0.736 ± 0.027,5
4,esm-t33-gearnet-800-3,0.672 ± 0.016,0.685 ± 0.014,0.925 ± 0.008,0.623 ± 0.024,0.753 ± 0.022,15
5,esm-t33-gearnet-960-2,0.676 ± 0.013,0.691 ± 0.015,0.926 ± 0.010,0.631 ± 0.014,0.753 ± 0.023,10
6,esm-t33-gearnet-960-1,0.676 ± 0.016,0.684 ± 0.016,0.923 ± 0.006,0.617 ± 0.030,0.769 ± 0.029,10
7,esm-t33-gearnet-640,0.677 ± 0.009,0.685 ± 0.017,0.923 ± 0.008,0.640 ± 0.014,0.745 ± 0.015,15
8,esm-t33-gearnet-800-3-adaboost-r10,0.685 ± 0.005,0.705 ± 0.003,0.913 ± 0.005,0.639 ± 0.011,0.762 ± 0.014,5
9,esm-t33-gearnet-640-adaboost-r10,0.685 ± 0.014,0.691 ± 0.011,0.902 ± 0.008,0.655 ± 0.008,0.746 ± 0.020,5


In [3]:
import pandas as pd

df = pd.read_csv('atpbind3d_stats.csv')

df.groupby('model_key').agg({'mcc': ['mean', 'std'], 'micro_auprc': [
    'mean', 'std']}).sort_values(('micro_auprc', 'mean'), ascending=False)

Unnamed: 0_level_0,mcc,mcc,micro_auprc,micro_auprc
Unnamed: 0_level_1,mean,std,mean,std
model_key,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
esm-t33-gearnet-adaboost-r50,0.69442,0.005516,0.7255,0.011629
esm-t33-gearnet-resiboost-r90,0.69656,0.011627,0.7248,0.007234
esm-t33-gearnet-resiboost-r50,0.69716,0.009042,0.72448,0.008971
esm-t33-gearnet-ensemble,0.69238,0.00925,0.7238,0.009424
esm-t33-gearnet-adaboost-r80,0.69846,0.007369,0.72304,0.008454
esm-t33-gearnet-800-3-ensemble,0.69268,0.004945,0.72286,0.008355
esm-t33-gearnet-adaboost-r90,0.69486,0.006749,0.72234,0.008256
esm-t33-gearnet-resiboost-r80,0.69628,0.010193,0.72228,0.007619
esm-t33-gearnet-rus-r50,0.69486,0.015224,0.72128,0.008891
esm-t33-gearnet-rus-r90,0.69458,0.01046,0.7187,0.008048


In [4]:
df_final = df[
    (df['model_key'].str.startswith('esm-t33-gearnet') & 
     ~df['model_key'].str.startswith('esm-t33-gearnet-800') & 
     ~df['model_key'].str.startswith('esm-t33-gearnet-320') & 
     ~df['model_key'].str.startswith('esm-t33-gearnet-640') & 
     ~df['model_key'].str.startswith('esm-t33-gearnet-960')) |
    (df['model_key'] == 'gearnet') |
    (df['model_key'] == 'bert') |
    (df['model_key'] == 'bert-gearnet') |
    (df['model_key'] == 'esm-t33')
]

        
get_stat_df(df_final)

| model_key                     | mcc           | micro_auprc   | micro_auroc   | sensitivity   | precision     |
|:------------------------------|:--------------|:--------------|:--------------|:--------------|:--------------|
| gearnet                       | 0.470 ± 0.012 | 0.490 ± 0.012 | 0.879 ± 0.009 | 0.385 ± 0.057 | 0.627 ± 0.065 |
| bert                          | 0.481 ± 0.031 | 0.467 ± 0.014 | 0.871 ± 0.007 | 0.399 ± 0.014 | 0.626 ± 0.061 |
| bert-gearnet                  | 0.563 ± 0.006 | 0.576 ± 0.010 | 0.902 ± 0.004 | 0.504 ± 0.027 | 0.669 ± 0.028 |
| esm-t33                       | 0.662 ± 0.011 | 0.671 ± 0.011 | 0.928 ± 0.007 | 0.614 ± 0.019 | 0.746 ± 0.029 |
| esm-t33-gearnet               | 0.671 ± 0.011 | 0.690 ± 0.009 | 0.925 ± 0.007 | 0.628 ± 0.022 | 0.748 ± 0.015 |
| esm-t33-gearnet-adaboost-r10  | 0.689 ± 0.012 | 0.707 ± 0.011 | 0.911 ± 0.006 | 0.654 ± 0.018 | 0.753 ± 0.013 |
| esm-t33-gearnet-resiboost-r10 | 0.691 ± 0.010 | 0.713 ± 0.009 | 0.911 ± 0.007 | 0.662 

Unnamed: 0,model_key,mcc,micro_auprc,micro_auroc,sensitivity,precision,record_count
0,gearnet,0.470 ± 0.012,0.490 ± 0.012,0.879 ± 0.009,0.385 ± 0.057,0.627 ± 0.065,5
1,bert,0.481 ± 0.031,0.467 ± 0.014,0.871 ± 0.007,0.399 ± 0.014,0.626 ± 0.061,5
2,bert-gearnet,0.563 ± 0.006,0.576 ± 0.010,0.902 ± 0.004,0.504 ± 0.027,0.669 ± 0.028,5
3,esm-t33,0.662 ± 0.011,0.671 ± 0.011,0.928 ± 0.007,0.614 ± 0.019,0.746 ± 0.029,5
4,esm-t33-gearnet,0.671 ± 0.011,0.690 ± 0.009,0.925 ± 0.007,0.628 ± 0.022,0.748 ± 0.015,5
5,esm-t33-gearnet-adaboost-r10,0.689 ± 0.012,0.707 ± 0.011,0.911 ± 0.006,0.654 ± 0.018,0.753 ± 0.013,5
6,esm-t33-gearnet-resiboost-r10,0.691 ± 0.010,0.713 ± 0.009,0.911 ± 0.007,0.662 ± 0.018,0.750 ± 0.031,5
7,esm-t33-gearnet-ensemble,0.692 ± 0.009,0.724 ± 0.009,0.939 ± 0.004,0.635 ± 0.012,0.783 ± 0.019,5
8,esm-t33-gearnet-adaboost-r50,0.694 ± 0.006,0.726 ± 0.012,0.935 ± 0.007,0.650 ± 0.021,0.770 ± 0.016,5
9,esm-t33-gearnet-adaboost-r90,0.695 ± 0.007,0.722 ± 0.008,0.935 ± 0.009,0.657 ± 0.011,0.762 ± 0.002,5
