In [5]:
import pandas as pd

def format_mean_std(row, metric):
    mean_val = row[(metric, 'mean')]
    std_val = row[(metric, 'std')]
    return f"{mean_val:.3f} ± {std_val:.3f}"

# Sample loading of your CSV - this would be replaced with your actual loading
result_df = pd.read_csv('record.csv')

# metrics = ['mcc', 'sensitivity', 'specificity', 'accuracy', 'precision', 'micro_auroc']
metrics = ['mcc', 'sensitivity', 'precision', 'micro_auroc']
grouped = result_df.groupby('model_key').agg({
    k: ['mean', 'std'] for k in metrics
})

record_counts = result_df.groupby('model_key').size()

for metric in metrics:
    grouped[f'{metric}_formatted'] = grouped.apply(lambda row: format_mean_std(row, metric), axis=1)


# Add the record count column to the grouped dataframe
grouped['record_count'] = record_counts.values

formatted_df = grouped[[k + '_formatted' for k in metrics] + ['record_count']]
formatted_df.columns = metrics + ['record_count']
formatted_df = formatted_df.sort_values(metrics[0], ascending=True)
'''
ESM:
2: esm-t33-pretrained
4: esm-33-gearnet-resiboost	-> 2번하고 비교헀을 때: gearnet을 추가하는 게 더 낫다. resiboost를 추가하는 것도 좋다.
9: esm-33-gearnet-pretrained-freezelm-ensemble -> 4번하고 비교했을 때 pretrain된 거니까 좋다.


'''
formatted_df = formatted_df.reset_index()
formatted_df



Unnamed: 0,model_key,mcc,sensitivity,precision,micro_auroc,record_count
0,esm-t33-pretrained-freezelm,0.535 ± 0.029,0.515 ± 0.095,0.632 ± 0.098,0.775 ± 0.008,10
1,esm-33-gearnet-pretrained-freezeall,0.655 ± 0.034,0.629 ± 0.026,0.731 ± 0.056,0.870 ± 0.017,10
2,esm-t33-pretrained,0.689 ± 0.015,0.705 ± 0.008,0.715 ± 0.021,0.936 ± 0.025,10
3,esm-t33,0.697 ± 0.010,0.719 ± 0.030,0.718 ± 0.031,0.929 ± 0.034,12
4,esm-33-gearnet-resiboost,0.699 ± 0.015,0.704 ± 0.005,0.735 ± 0.029,0.877 ± 0.007,5
5,esm-33-gearnet-pretrained-freezelm-resiboost,0.704 ± 0.011,0.700 ± 0.000,0.748 ± 0.021,0.875 ± 0.031,2
6,esm-33-gearnet-pretrained-ensemble,0.704 ± 0.022,0.720 ± 0.023,0.730 ± 0.054,0.897 ± 0.034,23
7,esm-33-gearnet-pretrained,0.705 ± 0.017,0.716 ± 0.019,0.735 ± 0.038,0.900 ± 0.033,10
8,esm-33-gearnet,0.711 ± 0.022,0.708 ± 0.017,0.754 ± 0.035,0.900 ± 0.017,10
9,esm-33-gearnet-pretrained-freezelm-ensemble,0.717 ± 0.014,0.700 ± 0.013,0.775 ± 0.031,0.912 ± 0.014,20


In [6]:
from tabulate import tabulate

def visualize(formatted_df, metrics):
    markdown_table = tabulate(formatted_df[['model_key'] + metrics] , headers='keys', tablefmt='pipe', showindex=False)
    print(markdown_table)

visualize(formatted_df=formatted_df, metrics=metrics)

| model_key                                    | mcc           | sensitivity   | precision     | micro_auroc   |
|:---------------------------------------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33-pretrained-freezelm                  | 0.535 ± 0.029 | 0.515 ± 0.095 | 0.632 ± 0.098 | 0.775 ± 0.008 |
| esm-33-gearnet-pretrained-freezeall          | 0.655 ± 0.034 | 0.629 ± 0.026 | 0.731 ± 0.056 | 0.870 ± 0.017 |
| esm-t33-pretrained                           | 0.689 ± 0.015 | 0.705 ± 0.008 | 0.715 ± 0.021 | 0.936 ± 0.025 |
| esm-t33                                      | 0.697 ± 0.010 | 0.719 ± 0.030 | 0.718 ± 0.031 | 0.929 ± 0.034 |
| esm-33-gearnet-resiboost                     | 0.699 ± 0.015 | 0.704 ± 0.005 | 0.735 ± 0.029 | 0.877 ± 0.007 |
| esm-33-gearnet-pretrained-freezelm-resiboost | 0.704 ± 0.011 | 0.700 ± 0.000 | 0.748 ± 0.021 | 0.875 ± 0.031 |
| esm-33-gearnet-pretrained-ensemble           | 0.704 ± 0.022 | 0.720 ± 0.023 | 0.730 ± 0.054 |