In [2]:
import pandas as pd

df = pd.read_csv('atpbind3d_stats.csv')

df.groupby('model_key').agg({
    'mcc': ['mean', 'std'],
    'micro_auprc': ['mean', 'std']}
).sort_values(('mcc', 'mean'), ascending=False)

Unnamed: 0_level_0,mcc,mcc,micro_auprc,micro_auprc
Unnamed: 0_level_1,mean,std,mean,std
model_key,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
esm-t33-gearnet-resiboost-r10,0.69114,0.009983,0.71308,0.009251
esm-t33-gearnet-adaboost-r10,0.6885,0.011599,0.70702,0.011393
esm-t33-gearnet,0.67136,0.010596,0.68956,0.008797
esm-t33,0.66246,0.011116,0.67136,0.011282


In [1]:
import pandas as pd

df = pd.read_csv('atpbind3d_stats.csv')

df.groupby('model_key').agg({'mcc': ['mean', 'std'], 'micro_auprc': [
    'mean', 'std']}).sort_values(('micro_auprc', 'mean'), ascending=False)

Unnamed: 0_level_0,mcc,mcc,micro_auprc,micro_auprc
Unnamed: 0_level_1,mean,std,mean,std
model_key,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
esm-t33-gearnet-resiboost-r10,0.69266,0.007966,0.71218,0.006018
esm-t33-gearnet-adaboost-r10,0.6873,0.009608,0.70158,0.011219
esm-t33,0.66022,0.018397,0.67628,0.017926


In [3]:
import pandas as pd
from tabulate import tabulate

def format_mean_std(row, metric):
    mean_val = row[(metric, 'mean')]
    std_val = row[(metric, 'std')]
    return f"{mean_val:.3f} ± {std_val:.3f}"


def get_stat_df(filename, verbose=True):
    # Sample loading of your CSV - this would be replaced with your actual loading
    result_df = pd.read_csv(filename)

    metrics = ['micro_auprc', 'mcc',  'sensitivity', 'precision', 'micro_auroc']
    grouped = result_df.groupby('model_key').agg({
        k: ['mean', 'std'] for k in metrics
    })

    record_counts = result_df.groupby('model_key').size()

    for metric in metrics:
        grouped[f'{metric}_formatted'] = grouped.apply(lambda row: format_mean_std(row, metric), axis=1)


    # Add the record count column to the grouped dataframe
    grouped['record_count'] = record_counts.values

    formatted_df = grouped[[k + '_formatted' for k in metrics] + ['record_count']]
    formatted_df.columns = metrics + ['record_count']
    formatted_df = formatted_df.sort_values(metrics[0], ascending=True)

    formatted_df = formatted_df.reset_index()
    
    if verbose:
        markdown_table = tabulate(formatted_df[['model_key'] + metrics] , headers='keys', tablefmt='pipe', showindex=False)
        print(markdown_table)

    return formatted_df

        
get_stat_df('atpbind3d_stats.csv')

| model_key                     | micro_auprc   | mcc           | sensitivity   | precision     | micro_auroc   |
|:------------------------------|:--------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33                       | 0.671 ± 0.011 | 0.662 ± 0.011 | 0.614 ± 0.019 | 0.746 ± 0.029 | 0.928 ± 0.007 |
| esm-t33-gearnet               | 0.690 ± 0.009 | 0.671 ± 0.011 | 0.628 ± 0.022 | 0.748 ± 0.015 | 0.925 ± 0.007 |
| esm-t33-gearnet-adaboost-r10  | 0.707 ± 0.011 | 0.689 ± 0.012 | 0.654 ± 0.018 | 0.753 ± 0.013 | 0.911 ± 0.006 |
| esm-t33-gearnet-resiboost-r10 | 0.713 ± 0.009 | 0.691 ± 0.010 | 0.662 ± 0.018 | 0.750 ± 0.031 | 0.911 ± 0.007 |


Unnamed: 0,model_key,micro_auprc,mcc,sensitivity,precision,micro_auroc,record_count
0,esm-t33,0.671 ± 0.011,0.662 ± 0.011,0.614 ± 0.019,0.746 ± 0.029,0.928 ± 0.007,5
1,esm-t33-gearnet,0.690 ± 0.009,0.671 ± 0.011,0.628 ± 0.022,0.748 ± 0.015,0.925 ± 0.007,5
2,esm-t33-gearnet-adaboost-r10,0.707 ± 0.011,0.689 ± 0.012,0.654 ± 0.018,0.753 ± 0.013,0.911 ± 0.006,5
3,esm-t33-gearnet-resiboost-r10,0.713 ± 0.009,0.691 ± 0.010,0.662 ± 0.018,0.750 ± 0.031,0.911 ± 0.007,5
