In [1]:
import pandas as pd
from tabulate import tabulate


def format_mean_std(row, metric):
    mean_val = row[(metric, 'mean')]
    std_val = row[(metric, 'std')]
    return f"{mean_val:.3f} ± {std_val:.3f}"


def get_stat_df(result_df, verbose=True, metrics=['micro_auprc', 'micro_auroc',  'mcc',
                                                  'sensitivity', 'precision']):
    # Sample loading of your CSV - this would be replaced with your actual loading
    grouped = result_df.groupby('model_key').agg({
        k: ['mean', 'std'] for k in metrics
    })

    record_counts = result_df.groupby('model_key').size()

    for metric in metrics:
        grouped[f'{metric}_formatted'] = grouped.apply(
            lambda row: format_mean_std(row, metric), axis=1)

    # Add the record count column to the grouped dataframe
    grouped['record_count'] = record_counts.values

    formatted_df = grouped[[
        k + '_formatted' for k in metrics] + ['record_count']]
    formatted_df.columns = metrics + ['record_count']
    formatted_df = formatted_df.sort_values(metrics[0], ascending=True)

    formatted_df = formatted_df.reset_index()

    if verbose:
        markdown_table = tabulate(formatted_df[[
                                  'model_key'] + metrics], headers='keys', tablefmt='pipe', showindex=False)
        print(markdown_table)

    return formatted_df

df_imatinib = pd.read_csv('imatinib_stats.csv')

def global_filter(df):
    return df[
        (df['model_key'].str.contains(r'^esm-t33-gearnet-(?!\d)') & 
        ~df['model_key'].str.contains(r'r50') &
         ~df['model_key'].str.contains(r'r80') &
         ~df['model_key'].str.contains(r'r10') &
         ~df['model_key'].str.contains(r'rus') &
         ~df['model_key'].str.contains(r'ensemble')) | (df['model_key'] == 'esm-t33-gearnet') | (df['model_key'] == 'esm-t33') | (df['model_key'] == 'esm-t33-pretrained')
    ]

get_stat_df(global_filter(df_imatinib))

| model_key                                | micro_auprc   | micro_auroc   | mcc           | sensitivity   | precision     |
|:-----------------------------------------|:--------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33-pretrained                       | 0.502 ± 0.017 | 0.857 ± 0.015 | 0.579 ± 0.033 | 0.548 ± 0.068 | 0.663 ± 0.026 |
| esm-t33                                  | 0.511 ± 0.021 | 0.864 ± 0.033 | 0.612 ± 0.026 | 0.594 ± 0.032 | 0.678 ± 0.052 |
| esm-t33-gearnet                          | 0.581 ± 0.027 | 0.846 ± 0.010 | 0.548 ± 0.077 | 0.513 ± 0.046 | 0.656 ± 0.172 |
| esm-t33-gearnet-pretrained               | 0.602 ± 0.036 | 0.820 ± 0.044 | 0.627 ± 0.020 | 0.533 ± 0.035 | 0.782 ± 0.063 |
| esm-t33-gearnet-resiboost-r90            | 0.638 ± 0.018 | 0.845 ± 0.010 | 0.610 ± 0.040 | 0.548 ± 0.068 | 0.741 ± 0.134 |
| esm-t33-gearnet-adaboost-r90             | 0.640 ± 0.017 | 0.847 ± 0.012 | 0.610 ± 0.058 | 0.555 ± 0.083 | 0.741 ± 0.170 |


Unnamed: 0,model_key,micro_auprc,micro_auroc,mcc,sensitivity,precision,record_count
0,esm-t33-pretrained,0.502 ± 0.017,0.857 ± 0.015,0.579 ± 0.033,0.548 ± 0.068,0.663 ± 0.026,5
1,esm-t33,0.511 ± 0.021,0.864 ± 0.033,0.612 ± 0.026,0.594 ± 0.032,0.678 ± 0.052,5
2,esm-t33-gearnet,0.581 ± 0.027,0.846 ± 0.010,0.548 ± 0.077,0.513 ± 0.046,0.656 ± 0.172,5
3,esm-t33-gearnet-pretrained,0.602 ± 0.036,0.820 ± 0.044,0.627 ± 0.020,0.533 ± 0.035,0.782 ± 0.063,5
4,esm-t33-gearnet-resiboost-r90,0.638 ± 0.018,0.845 ± 0.010,0.610 ± 0.040,0.548 ± 0.068,0.741 ± 0.134,10
5,esm-t33-gearnet-adaboost-r90,0.640 ± 0.017,0.847 ± 0.012,0.610 ± 0.058,0.555 ± 0.083,0.741 ± 0.170,10
6,esm-t33-gearnet-pretrained-adaboost-r90,0.649 ± 0.021,0.844 ± 0.030,0.615 ± 0.017,0.565 ± 0.075,0.724 ± 0.106,5
7,esm-t33-gearnet-pretrained-resiboost-r90,0.656 ± 0.027,0.853 ± 0.039,0.624 ± 0.014,0.554 ± 0.075,0.756 ± 0.100,5


In [2]:
df_dasatinib = pd.read_csv('dasatinib_stats.csv')
get_stat_df(global_filter(df_dasatinib))

| model_key                                | micro_auprc   | micro_auroc   | mcc           | sensitivity   | precision     |
|:-----------------------------------------|:--------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33-gearnet                          | 0.612 ± 0.093 | 0.905 ± 0.030 | 0.588 ± 0.076 | 0.508 ± 0.083 | 0.730 ± 0.068 |
| esm-t33-pretrained                       | 0.617 ± 0.035 | 0.907 ± 0.014 | 0.584 ± 0.028 | 0.576 ± 0.056 | 0.647 ± 0.090 |
| esm-t33                                  | 0.624 ± 0.050 | 0.900 ± 0.040 | 0.561 ± 0.030 | 0.515 ± 0.074 | 0.669 ± 0.070 |
| esm-t33-gearnet-pretrained               | 0.628 ± 0.045 | 0.904 ± 0.017 | 0.562 ± 0.076 | 0.393 ± 0.119 | 0.876 ± 0.093 |
| esm-t33-gearnet-adaboost-r90             | 0.694 ± 0.049 | 0.935 ± 0.016 | 0.567 ± 0.116 | 0.420 ± 0.168 | 0.843 ± 0.071 |
| esm-t33-gearnet-resiboost-r90            | 0.696 ± 0.059 | 0.936 ± 0.009 | 0.587 ± 0.080 | 0.485 ± 0.134 | 0.774 ± 0.072 |


Unnamed: 0,model_key,micro_auprc,micro_auroc,mcc,sensitivity,precision,record_count
0,esm-t33-gearnet,0.612 ± 0.093,0.905 ± 0.030,0.588 ± 0.076,0.508 ± 0.083,0.730 ± 0.068,5
1,esm-t33-pretrained,0.617 ± 0.035,0.907 ± 0.014,0.584 ± 0.028,0.576 ± 0.056,0.647 ± 0.090,5
2,esm-t33,0.624 ± 0.050,0.900 ± 0.040,0.561 ± 0.030,0.515 ± 0.074,0.669 ± 0.070,5
3,esm-t33-gearnet-pretrained,0.628 ± 0.045,0.904 ± 0.017,0.562 ± 0.076,0.393 ± 0.119,0.876 ± 0.093,5
4,esm-t33-gearnet-adaboost-r90,0.694 ± 0.049,0.935 ± 0.016,0.567 ± 0.116,0.420 ± 0.168,0.843 ± 0.071,5
5,esm-t33-gearnet-resiboost-r90,0.696 ± 0.059,0.936 ± 0.009,0.587 ± 0.080,0.485 ± 0.134,0.774 ± 0.072,5
6,esm-t33-gearnet-pretrained-adaboost-r90,0.712 ± 0.038,0.941 ± 0.012,0.590 ± 0.050,0.488 ± 0.145,0.787 ± 0.111,5
7,esm-t33-gearnet-pretrained-resiboost-r90,0.715 ± 0.035,0.941 ± 0.012,0.606 ± 0.053,0.475 ± 0.097,0.828 ± 0.058,5


In [3]:
df_bosutinib = pd.read_csv('bosutinib_stats.csv')
get_stat_df(global_filter(df_bosutinib))

| model_key                                | micro_auprc   | micro_auroc   | mcc           | sensitivity   | precision     |
|:-----------------------------------------|:--------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33-gearnet-resiboost-r90            | 0.806 ± 0.042 | 0.942 ± 0.021 | 0.731 ± 0.047 | 0.697 ± 0.063 | 0.814 ± 0.130 |
| esm-t33-gearnet-adaboost-r90             | 0.809 ± 0.039 | 0.947 ± 0.020 | 0.739 ± 0.034 | 0.697 ± 0.092 | 0.831 ± 0.107 |
| esm-t33-gearnet                          | 0.811 ± 0.034 | 0.953 ± 0.007 | 0.717 ± 0.027 | 0.710 ± 0.043 | 0.768 ± 0.082 |
| esm-t33-gearnet-pretrained               | 0.830 ± 0.010 | 0.957 ± 0.012 | 0.735 ± 0.019 | 0.677 ± 0.058 | 0.840 ± 0.062 |
| esm-t33-pretrained                       | 0.832 ± 0.040 | 0.936 ± 0.035 | 0.754 ± 0.047 | 0.660 ± 0.084 | 0.897 ± 0.014 |
| esm-t33                                  | 0.852 ± 0.020 | 0.952 ± 0.018 | 0.745 ± 0.030 | 0.667 ± 0.059 | 0.871 ± 0.059 |


Unnamed: 0,model_key,micro_auprc,micro_auroc,mcc,sensitivity,precision,record_count
0,esm-t33-gearnet-resiboost-r90,0.806 ± 0.042,0.942 ± 0.021,0.731 ± 0.047,0.697 ± 0.063,0.814 ± 0.130,5
1,esm-t33-gearnet-adaboost-r90,0.809 ± 0.039,0.947 ± 0.020,0.739 ± 0.034,0.697 ± 0.092,0.831 ± 0.107,5
2,esm-t33-gearnet,0.811 ± 0.034,0.953 ± 0.007,0.717 ± 0.027,0.710 ± 0.043,0.768 ± 0.082,5
3,esm-t33-gearnet-pretrained,0.830 ± 0.010,0.957 ± 0.012,0.735 ± 0.019,0.677 ± 0.058,0.840 ± 0.062,5
4,esm-t33-pretrained,0.832 ± 0.040,0.936 ± 0.035,0.754 ± 0.047,0.660 ± 0.084,0.897 ± 0.014,5
5,esm-t33,0.852 ± 0.020,0.952 ± 0.018,0.745 ± 0.030,0.667 ± 0.059,0.871 ± 0.059,5
6,esm-t33-gearnet-pretrained-resiboost-r90,0.872 ± 0.023,0.968 ± 0.009,0.766 ± 0.024,0.730 ± 0.110,0.850 ± 0.116,5
7,esm-t33-gearnet-pretrained-adaboost-r90,0.875 ± 0.026,0.969 ± 0.010,0.765 ± 0.025,0.720 ± 0.114,0.860 ± 0.119,5


In [4]:
def get_stat_df_combined(metrics=['mcc'], verbose=True):
    df_imatinib = get_stat_df(global_filter(pd.read_csv('imatinib_stats.csv')), verbose=False)
    df_dasatinib = get_stat_df(global_filter(pd.read_csv('dasatinib_stats.csv')), verbose=False)
    df_bosutinib = get_stat_df(global_filter(pd.read_csv('bosutinib_stats.csv')), verbose=False)

    df_imatinib = df_imatinib.set_index('model_key')
    df_dasatinib = df_dasatinib.set_index('model_key')
    df_bosutinib = df_bosutinib.set_index('model_key')

    df_combined = pd.concat([df_imatinib, df_dasatinib, df_bosutinib], axis=1, keys=['imatinib', 'dasatinib', 'bosutinib'])
    df_combined = df_combined[[(drug, met) for drug in ['imatinib', 'dasatinib', 'bosutinib'] for met in metrics]]
    
    if verbose:
        markdown_table = tabulate(df_combined, headers='keys', tablefmt='pipe', showindex=True)
        print(markdown_table)

    return df_combined


get_stat_df_combined(metrics=['mcc', 'micro_auprc'])

| model_key                                | ('imatinib', 'mcc')   | ('imatinib', 'micro_auprc')   | ('dasatinib', 'mcc')   | ('dasatinib', 'micro_auprc')   | ('bosutinib', 'mcc')   | ('bosutinib', 'micro_auprc')   |
|:-----------------------------------------|:----------------------|:------------------------------|:-----------------------|:-------------------------------|:-----------------------|:-------------------------------|
| esm-t33-pretrained                       | 0.579 ± 0.033         | 0.502 ± 0.017                 | 0.584 ± 0.028          | 0.617 ± 0.035                  | 0.754 ± 0.047          | 0.832 ± 0.040                  |
| esm-t33                                  | 0.612 ± 0.026         | 0.511 ± 0.021                 | 0.561 ± 0.030          | 0.624 ± 0.050                  | 0.745 ± 0.030          | 0.852 ± 0.020                  |
| esm-t33-gearnet                          | 0.548 ± 0.077         | 0.581 ± 0.027                 | 0.588 ± 0.076          | 0.612 

Unnamed: 0_level_0,imatinib,imatinib,dasatinib,dasatinib,bosutinib,bosutinib
Unnamed: 0_level_1,mcc,micro_auprc,mcc,micro_auprc,mcc,micro_auprc
model_key,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
esm-t33-pretrained,0.579 ± 0.033,0.502 ± 0.017,0.584 ± 0.028,0.617 ± 0.035,0.754 ± 0.047,0.832 ± 0.040
esm-t33,0.612 ± 0.026,0.511 ± 0.021,0.561 ± 0.030,0.624 ± 0.050,0.745 ± 0.030,0.852 ± 0.020
esm-t33-gearnet,0.548 ± 0.077,0.581 ± 0.027,0.588 ± 0.076,0.612 ± 0.093,0.717 ± 0.027,0.811 ± 0.034
esm-t33-gearnet-pretrained,0.627 ± 0.020,0.602 ± 0.036,0.562 ± 0.076,0.628 ± 0.045,0.735 ± 0.019,0.830 ± 0.010
esm-t33-gearnet-resiboost-r90,0.610 ± 0.040,0.638 ± 0.018,0.587 ± 0.080,0.696 ± 0.059,0.731 ± 0.047,0.806 ± 0.042
esm-t33-gearnet-adaboost-r90,0.610 ± 0.058,0.640 ± 0.017,0.567 ± 0.116,0.694 ± 0.049,0.739 ± 0.034,0.809 ± 0.039
esm-t33-gearnet-pretrained-adaboost-r90,0.615 ± 0.017,0.649 ± 0.021,0.590 ± 0.050,0.712 ± 0.038,0.765 ± 0.025,0.875 ± 0.026
esm-t33-gearnet-pretrained-resiboost-r90,0.624 ± 0.014,0.656 ± 0.027,0.606 ± 0.053,0.715 ± 0.035,0.766 ± 0.024,0.872 ± 0.023


# New: Imatinib

In [5]:
models = ['esm-t33', 'esm-t33-gearnet', 'esm-t33-pretrained', 'esm-t33-gearent-pretrained']
concat_csv = pd.concat([
    pd.read_csv('imatinib_esm-t33_stats.csv'),
    pd.read_csv('imatinib_esm-t33-gearnet_stats.csv'),
    pd.read_csv('imatinib_esm-t33-pretrained_stats.csv'),
    pd.read_csv('imatinib_esm-t33-gearnet-pretrained_stats.csv')
])


get_stat_df(concat_csv, metrics=['mcc', 'micro_auprc'])


| model_key                  | mcc           | micro_auprc   |
|:---------------------------|:--------------|:--------------|
| esm-t33-gearnet-pretrained | 0.485 ± 0.102 | 0.485 ± 0.090 |
| esm-t33-pretrained         | 0.540 ± 0.067 | 0.563 ± 0.041 |
| esm-t33-gearnet            | 0.575 ± 0.024 | 0.554 ± 0.024 |
| esm-t33                    | 0.582 ± 0.023 | 0.587 ± 0.019 |


Unnamed: 0,model_key,mcc,micro_auprc,record_count
0,esm-t33-gearnet-pretrained,0.485 ± 0.102,0.485 ± 0.090,1939
1,esm-t33-pretrained,0.540 ± 0.067,0.563 ± 0.041,1020
2,esm-t33-gearnet,0.575 ± 0.024,0.554 ± 0.024,5
3,esm-t33,0.582 ± 0.023,0.587 ± 0.019,5


In [6]:
import os
models = ['esm-t33', 'esm-t33-gearnet',
          'esm-t33-pretrained', 'esm-t33-gearent-pretrained']
files_to_read = [
    'imatinib_esm-t33_stats.csv',
    'imatinib_esm-t33-gearnet_stats.csv',
    'imatinib_esm-t33-pretrained_stats.csv',
    'imatinib_esm-t33-gearnet-pretrained_stats.csv'
]

csv_files = [pd.read_csv(file) for file in files_to_read if os.path.exists(file)]
concat_csv = pd.concat(csv_files)


get_stat_df(concat_csv, metrics=['mcc', 'micro_auprc'])

| model_key                  | mcc           | micro_auprc   |
|:---------------------------|:--------------|:--------------|
| esm-t33-gearnet-pretrained | 0.485 ± 0.102 | 0.485 ± 0.090 |
| esm-t33-pretrained         | 0.540 ± 0.067 | 0.563 ± 0.041 |
| esm-t33-gearnet            | 0.575 ± 0.024 | 0.554 ± 0.024 |
| esm-t33                    | 0.582 ± 0.023 | 0.587 ± 0.019 |


Unnamed: 0,model_key,mcc,micro_auprc,record_count
0,esm-t33-gearnet-pretrained,0.485 ± 0.102,0.485 ± 0.090,1939
1,esm-t33-pretrained,0.540 ± 0.067,0.563 ± 0.041,1020
2,esm-t33-gearnet,0.575 ± 0.024,0.554 ± 0.024,5
3,esm-t33,0.582 ± 0.023,0.587 ± 0.019,5


In [7]:
def analyze_metrics(csv_path, group_by_param, filter_conditions={}, sort_by=('mcc', 'mean'), ascending=False, limit=-1):
    """
    Analyze metrics by grouping on specified hyperparameters and filtering the DataFrame.

    Parameters:
    - group_by_param: str or list, the hyperparameter(s) to group by.
    - filter_conditions: dict, conditions to filter the DataFrame.

    Returns:
    - grouped_df: DataFrame, the grouped and averaged metrics.
    """
    import pandas as pd
    df = pd.read_csv(csv_path)

    # Apply filter conditions
    for column, value in filter_conditions.items():
        if isinstance(value, list):
            df = df[df[column].isin(value)]
        else:
            df = df[df[column] == value]

    # Define the hyperparameters to group by
    hyperparameters = group_by_param if isinstance(
        group_by_param, list) else [group_by_param]

    # Define the metrics to average
    metrics = ['mcc', 'micro_auprc', 'sensitivity', 'precision']

    # Group by hyperparameters and calculate mean of metrics, including a single count column
    grouped_df = df.groupby(hyperparameters)[metrics].agg(
        ['mean', 'std']).reset_index()
    grouped_df['count'] = df.groupby(hyperparameters).size().values

    # Round the final metrics to the fourth digit
    for metric in metrics:
        grouped_df[(metric, 'mean')] = grouped_df[(metric, 'mean')].round(4)
        grouped_df[(metric, 'std')] = grouped_df[(metric, 'std')].round(4)

    # Sort by MCC (you can change this to any other metric)
    grouped_df = grouped_df.sort_values(sort_by, ascending=ascending)

    if limit > 0:
        grouped_df = grouped_df.head(limit)
    # Display the results
    return grouped_df


def print_markdown_table(grouped_df, metrics=['mcc', 'micro_auprc', 'sensitivity', 'precision']):
    """
    Print the markdown formatted table from the grouped DataFrame.

    Parameters:
    - grouped_df: DataFrame, the grouped and averaged metrics.
    - metrics: list, the metrics to include in the table.
    """
    from tabulate import tabulate

    # Format the metrics with mean and std
    for metric in metrics:
        grouped_df[f'{metric}_formatted'] = grouped_df.apply(
            lambda row: f"{row[(metric, 'mean')]:.3f} ± {row[(metric, 'std')]:.3f}", axis=1)
    # Drop the original metric columns
    for metric in metrics:
        grouped_df.drop(columns=[(metric, 'mean'),
                        (metric, 'std')], inplace=True)

    # Rename the formatted metric columns to the original metric names
    for metric in metrics:
        grouped_df.rename(
            columns={f'{metric}_formatted': metric}, inplace=True)

    # Select the columns to display, including non-metric keys
    non_metric_keys = [col for col in grouped_df.columns.levels[0]
                       if col not in metrics and col != 'count' and col != 'index']
    formatted_df = grouped_df[non_metric_keys +
                              [f'{metric}' for metric in metrics] + ['count']]
    formatted_df.columns = non_metric_keys + metrics + ['record_count']
    # Print the markdown table
    markdown_table = tabulate(
        formatted_df, headers='keys', tablefmt='pipe', showindex=False)
    print(markdown_table)

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                limit=15)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
86,30.0,0.0003,0.003,6.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5819,0.042,0.5512,0.0222,0.4352,0.0497,0.8373,0.1243,15
102,30.0,0.0003,0.007,4.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.576,0.0292,0.5244,0.0512,0.4057,0.054,0.8711,0.0793,5
210,31.0,0.0003,0.007,4.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.5758,0.0166,0.5366,0.03,0.4343,0.0329,0.8155,0.068,5
111,30.0,0.0003,0.01,4.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.5722,0.0333,0.5251,0.0324,0.3971,0.0275,0.8734,0.073,5
37,30.0,0.0001,0.005,6.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.5716,0.017,0.5448,0.0103,0.4486,0.0078,0.7787,0.0378,5
164,31.0,0.0001,0.005,6.0,empty,0.5702,0.0319,0.5302,0.0279,0.4228,0.0412,0.8198,0.0427,5
123,30.0,0.002,0.002,6.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.5693,0.0321,0.5459,0.0278,0.4358,0.0101,0.7939,0.0621,2
43,30.0,0.0001,0.007,4.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.5685,0.0488,0.5269,0.0275,0.4429,0.0639,0.7824,0.0256,5
91,30.0,0.0003,0.003,10.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5669,0.0361,0.5499,0.0218,0.4352,0.0608,0.8047,0.1311,15
97,30.0,0.0003,0.005,6.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5669,0.0428,0.5417,0.0201,0.46,0.0538,0.7597,0.1203,5


### Imatinib

#### ESM (No Pretrain)

In [8]:
analyze_metrics('imatinib_esm-t33_stats.csv',
                ['model_key'],
                limit=10)

Unnamed: 0_level_0,model_key,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 10_level_1
0,esm-t33,0.5824,0.0232,0.5866,0.0191,0.4686,0.0445,0.781,0.108,5


In [9]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['empty'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                    
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
2,30.0,0.0003,0.003,8.0,empty,0.5956,0.0359,0.5858,0.0329,0.4629,0.0424,0.8165,0.073,5
0,30.0,0.0003,0.003,4.0,empty,0.569,0.0877,0.5982,0.0204,0.4905,0.103,0.7661,0.2323,15
1,30.0,0.0003,0.003,6.0,empty,0.5661,0.069,0.5898,0.0288,0.4714,0.0862,0.7722,0.2152,15
3,30.0,0.0003,0.003,10.0,empty,0.5613,0.0816,0.5885,0.027,0.4933,0.0778,0.73,0.2081,15


#### ESM (Pretrain)

In [10]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33_1.pt'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
0,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33_1.pt,0.5628,0.0205,0.5854,0.0138,0.4714,0.0337,0.7302,0.0912,15
1,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33_1.pt,0.5513,0.0149,0.5658,0.0219,0.4771,0.0357,0.6952,0.0638,15
3,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33_1.pt,0.5011,0.0214,0.5375,0.0148,0.42,0.0534,0.6684,0.0982,15
2,30.0,0.0003,0.003,8.0,weight/atpbind3d-1930_esm-t33_1.pt,0.4944,0.0279,0.5495,0.0215,0.4229,0.0837,0.6598,0.1411,5


In [11]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33_1_rmmlp.pt'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
0,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5699,0.0246,0.5847,0.0136,0.4647,0.048,0.7593,0.1009,15
2,30.0,0.0003,0.003,8.0,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5377,0.0387,0.5612,0.0333,0.4857,0.0474,0.6632,0.1431,5
1,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5268,0.0364,0.5655,0.027,0.4838,0.0438,0.6382,0.0931,15
3,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.516,0.0407,0.5553,0.0248,0.4495,0.0894,0.6682,0.1139,15


In [12]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d_esm-t33_1.pt'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
2,30.0,0.0003,0.003,8.0,weight/atpbind3d_esm-t33_1.pt,0.5921,0.0239,0.5553,0.0148,0.4486,0.0372,0.8334,0.0875,5
1,30.0,0.0003,0.003,6.0,weight/atpbind3d_esm-t33_1.pt,0.5699,0.0367,0.5471,0.0117,0.46,0.0412,0.7692,0.1346,15
0,30.0,0.0003,0.003,4.0,weight/atpbind3d_esm-t33_1.pt,0.5675,0.0475,0.5506,0.0133,0.4514,0.0632,0.7843,0.1548,15
3,30.0,0.0003,0.003,10.0,weight/atpbind3d_esm-t33_1.pt,0.5547,0.0293,0.5503,0.0289,0.4457,0.0287,0.7471,0.0843,15


#### Multiview (No Pretrain)

In [13]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['empty'],
                    'model_kwargs.lm_freeze_layer_count': [30],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
4,30.0,0.0003,0.003,10.0,empty,0.5578,0.0304,0.5543,0.02,0.4238,0.0558,0.7996,0.1243,15
3,30.0,0.0003,0.003,6.0,empty,0.5287,0.0398,0.5167,0.0296,0.4095,0.0525,0.7535,0.1413,15
2,30.0,0.0003,0.003,4.0,empty,0.2878,0.148,0.2997,0.1474,0.2571,0.1705,0.514,0.273,15
1,30.0,0.0003,0.003,3.0,empty,0.1774,0.1333,0.2293,0.1171,0.1467,0.1635,0.3976,0.2853,15
0,30.0,0.0003,0.003,2.0,empty,-0.0164,0.0291,0.055,0.0091,0.0219,0.0264,0.0573,0.1292,15


#### Multiview (Pretrain)

In [41]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-gearnet_1.pt'],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=15)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
1,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.5288,0.0247,0.5175,0.0088,0.4238,0.0257,0.7196,0.0796,15
3,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.5243,0.044,0.5034,0.0315,0.3905,0.0578,0.7761,0.1508,15
2,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.5093,0.0253,0.5025,0.0367,0.3933,0.0483,0.728,0.1074,15
4,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.501,0.0357,0.5342,0.0318,0.4352,0.0836,0.6567,0.1174,15
0,30.0,0.0003,0.003,2.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.4673,0.0585,0.4205,0.0507,0.4819,0.0339,0.5264,0.1129,15


In [37]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt'],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=10)

Unnamed: 0_level_0,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 10_level_1
0,weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt,0.4938,0.0894,0.4881,0.0876,0.3937,0.1076,0.7142,0.1484,75


In [39]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d_esm-t33-gearnet_1.pt'],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=15)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
3,30.0,0.0003,0.003,6.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5819,0.042,0.5512,0.0222,0.4352,0.0497,0.8373,0.1243,15
4,30.0,0.0003,0.003,10.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5669,0.0361,0.5499,0.0218,0.4352,0.0608,0.8047,0.1311,15
1,30.0,0.0003,0.003,3.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5518,0.0426,0.5241,0.0364,0.4295,0.0721,0.7789,0.1252,15
2,30.0,0.0003,0.003,4.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5068,0.0683,0.5141,0.0246,0.4076,0.0716,0.7193,0.1813,15
0,30.0,0.0003,0.003,2.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.3019,0.0724,0.305,0.0652,0.2715,0.1145,0.4348,0.0646,15


#### Adaboost (No Pretrain)

In [46]:

analyze_metrics('imatinib_esm-t33-gearnet-resiboost_stats.csv',
                ['boost_negative_use_ratio',
                    'boost_mask_positive'],
                {
                    'boost_mask_positive': [True], # Adaboost
                },
                limit=10)




Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
2,0.3,True,0.5867,0.0241,0.5811,0.0183,0.4029,0.0557,0.908,0.0853,5
3,0.5,True,0.5838,0.0202,0.5819,0.0097,0.4286,0.044,0.8494,0.0918,5
0,0.1,True,0.5745,0.02,0.5574,0.0112,0.4257,0.0433,0.829,0.0745,5
4,0.9,True,0.5708,0.0546,0.5863,0.0136,0.4514,0.0824,0.803,0.2073,5
1,0.2,True,0.5482,0.0643,0.5751,0.0118,0.38,0.1072,0.866,0.1063,5


#### Adaboost (Pretrain)

In [47]:
analyze_metrics('imatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
                ['boost_negative_use_ratio', 'boost_mask_positive'],
                {
                    'boost_mask_positive': [True],  # Adaboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
3,0.5,True,0.5869,0.0283,0.5658,0.018,0.4453,0.0753,0.8358,0.1132,6
1,0.2,True,0.5742,0.0634,0.5544,0.0107,0.4071,0.0984,0.8768,0.0774,6
4,0.9,True,0.57,0.0517,0.5629,0.019,0.4314,0.0724,0.8348,0.2142,5
2,0.3,True,0.5632,0.0404,0.5608,0.0175,0.4143,0.0745,0.8405,0.1688,6
0,0.1,True,0.5147,0.1156,0.5075,0.0415,0.419,0.1083,0.7577,0.267,6


#### Resiboost (No Pretrain)

In [48]:

analyze_metrics('imatinib_esm-t33-gearnet-resiboost_stats.csv',
                ['boost_negative_use_ratio',
                    'boost_mask_positive'],
                {
                    'boost_mask_positive': [False],  # Resiboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
1,0.2,False,0.5751,0.0148,0.5829,0.0158,0.4743,0.0583,0.7562,0.093,5
0,0.1,False,0.563,0.0635,0.5901,0.0141,0.4828,0.0488,0.7283,0.1748,5
3,0.5,False,0.5557,0.0478,0.5917,0.0071,0.4429,0.0875,0.7826,0.1994,5
2,0.3,False,0.5528,0.0725,0.5904,0.0143,0.4829,0.0695,0.7184,0.2115,5
4,0.9,False,0.5434,0.0743,0.585,0.0063,0.4429,0.0915,0.7628,0.2351,5


#### Resiboost (Pretrain)

In [49]:
analyze_metrics('imatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
                ['boost_negative_use_ratio', 'boost_mask_positive'],
                {
                    'boost_mask_positive': [False],  # Resiboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
1,0.2,False,0.592,0.0157,0.5703,0.0177,0.4286,0.0394,0.8687,0.0778,6
3,0.5,False,0.5873,0.0175,0.5693,0.0212,0.419,0.0631,0.8828,0.1151,6
4,0.9,False,0.5793,0.0349,0.5659,0.0164,0.431,0.0629,0.8479,0.1668,6
0,0.1,False,0.5723,0.024,0.5716,0.0222,0.4143,0.0519,0.8472,0.0942,6
2,0.3,False,0.5608,0.0557,0.5718,0.021,0.4357,0.0916,0.805,0.1869,6


In [21]:
def pick_and_aggregate(model_lists, metrics=['mcc', 'micro_auprc', 'sensitivity', 'precision']):
    '''
    model_lists: list of {'label': str, 'csv_path': str, 'filter_conditions': dict}

    For each model:
    - pick only the rows that match the filter_conditions.
    - make rows with columns listed in metrics, and the model_key column named as in the label
    
    After that, put all the rows into a single DataFrame, and aggregate the rows by the model_key column.
    Aggregate the metrics by mean and std, and also add the count of the rows aggregated.
    '''
    import pandas as pd
    import numpy as np

    all_data = []

    for model in model_lists:
        # Read CSV file
        df = pd.read_csv(model['csv_path'])
        
        # Apply filter conditions
        for col, values in model['filter_conditions'].items():
            df = df[df[col].isin(values)]
        
        # Select required columns
        df = df[['model_key'] + metrics]
        
        # Set model_key column's value to model['label']
        df['model_key'] = model['label']
        
        all_data.append(df)
    
    # Combine all dataframes
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Aggregate by model_key
    result = combined_df.groupby('model_key').agg({
        metric: ['mean', 'std'] for metric in metrics
    })
    
    # Add count of rows
    result['count'] = combined_df.groupby('model_key').size()
    
    # Reset index and sort according to the order in model_lists
    result = result.reset_index()
    result['order'] = result['model_key'].map({model['label']: i for i, model in enumerate(model_lists)})
    result = result.sort_values('order').drop('order', axis=1)
    
    # Round final result to 3 decimal places
    result = result.round(3)
    
    return result

# Example usage
result = pick_and_aggregate([
    {
        'label': 'ESM (No Pretrain)',
        'csv_path': 'imatinib_esm-t33-pretrained_stats.csv',
        'filter_conditions': {
            'model_kwargs.freeze_layer_count': [30],
            'pretrained_weight_path': ['empty'],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'model_kwargs.freeze_layer_count': [30],
            'cycle_size': [6],
        },
    },
    {
        'label': 'ESM (Pretrain)',
        'csv_path': 'imatinib_esm-t33-pretrained_stats.csv',
        'filter_conditions': {
            'model_kwargs.freeze_layer_count': [30],
            'pretrained_weight_path': ['weight/atpbind3d_esm-t33_1.pt'],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'model_kwargs.freeze_layer_count': [30],
            'cycle_size': [6],
        },
    },
    {
        'label': 'Multiview (No Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['empty'],
            'model_kwargs.lm_freeze_layer_count': [30],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [6],
        },
    },
    {
        'label': 'Multiview (Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['weight/atpbind3d_esm-t33-gearnet_1.pt'],
            'model_kwargs.lm_freeze_layer_count': [30.0],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [6],
        },
    },
    {
        'label': 'Multiview (Adaboost) (No Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.2],
            'boost_mask_positive': [True],
        },
    },
    {
        'label': 'Multiview (Adaboost) (Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.2],
            'boost_mask_positive': [True],
        },
    },
    {
        'label': 'Multiview (Resiboost) (No Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.2],
            'boost_mask_positive': [False],
        },
    },
    {
        'label': 'Multiview (Resiboost) (Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.2],
            'boost_mask_positive': [False],
        },
    },
    
])

print_markdown_table(result)    

| model_key                           | mcc           | micro_auprc   | sensitivity   | precision     |   record_count |
|:------------------------------------|:--------------|:--------------|:--------------|:--------------|---------------:|
| ESM (No Pretrain)                   | 0.566 ± 0.069 | 0.590 ± 0.029 | 0.471 ± 0.086 | 0.772 ± 0.215 |             15 |
| ESM (Pretrain)                      | 0.570 ± 0.037 | 0.547 ± 0.012 | 0.460 ± 0.041 | 0.769 ± 0.135 |             15 |
| Multiview (No Pretrain)             | 0.529 ± 0.040 | 0.517 ± 0.030 | 0.410 ± 0.053 | 0.753 ± 0.141 |             15 |
| Multiview (Pretrain)                | 0.582 ± 0.042 | 0.551 ± 0.022 | 0.435 ± 0.050 | 0.837 ± 0.124 |             15 |
| Multiview (Adaboost) (No Pretrain)  | 0.548 ± 0.064 | 0.575 ± 0.012 | 0.380 ± 0.107 | 0.866 ± 0.106 |              5 |
| Multiview (Adaboost) (Pretrain)     | 0.599 ± 0.022 | 0.553 ± 0.011 | 0.446 ± 0.031 | 0.852 ± 0.054 |              5 |
| Multiview (Resiboost) (No Pret

  result = result.sort_values('order').drop('order', axis=1)


### Dasatinib

#### ESM (No Pretrain)

In [23]:
analyze_metrics('dasatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['empty'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)


Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
0,30,0.0003,0.003,3,empty,0.6112,0.044,0.6365,0.0489,0.6068,0.065,0.6645,0.0534,5
1,30,0.0003,0.003,4,empty,0.5818,0.0852,0.6451,0.0408,0.5119,0.1537,0.724,0.0617,5
2,30,0.0003,0.003,6,empty,0.5749,0.0506,0.6322,0.0489,0.5593,0.1351,0.666,0.1447,5
3,30,0.0003,0.003,10,empty,0.5444,0.0767,0.6021,0.0904,0.539,0.1081,0.6246,0.1524,5


### ESM (Pretrain)

In [34]:
analyze_metrics('dasatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33_1.pt', 'weight/atpbind3d-1930_esm-t33_1_rmmlp.pt', 'weight/atpbind3d_esm-t33_1.pt'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
2,30,0.0003,0.003,3,weight/atpbind3d_esm-t33_1.pt,0.6297,0.0275,0.6874,0.0169,0.6204,0.0827,0.689,0.0596,5
7,30,0.0003,0.003,6,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5877,0.0588,0.6875,0.042,0.5729,0.1369,0.6743,0.1391,5
11,30,0.0003,0.003,10,weight/atpbind3d_esm-t33_1.pt,0.5858,0.0311,0.6169,0.035,0.5864,0.0652,0.6416,0.091,5
0,30,0.0003,0.003,3,weight/atpbind3d-1930_esm-t33_1.pt,0.5816,0.0316,0.6432,0.016,0.5458,0.0947,0.6765,0.0611,5
6,30,0.0003,0.003,6,weight/atpbind3d-1930_esm-t33_1.pt,0.5801,0.0414,0.6587,0.0373,0.5322,0.0754,0.6864,0.0597,5
5,30,0.0003,0.003,4,weight/atpbind3d_esm-t33_1.pt,0.5788,0.0489,0.6681,0.0062,0.5424,0.1356,0.6922,0.1335,5
3,30,0.0003,0.003,4,weight/atpbind3d-1930_esm-t33_1.pt,0.5756,0.0988,0.6442,0.0297,0.539,0.1675,0.682,0.0488,5
9,30,0.0003,0.003,10,weight/atpbind3d-1930_esm-t33_1.pt,0.5582,0.0704,0.6791,0.0332,0.4949,0.1513,0.7014,0.0902,5
4,30,0.0003,0.003,4,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5495,0.0794,0.6834,0.057,0.5153,0.2493,0.7144,0.2051,5
1,30,0.0003,0.003,3,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5468,0.0503,0.6953,0.013,0.5051,0.2018,0.7171,0.2307,5


### Multiview (No Pretrain)

In [26]:

analyze_metrics('dasatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['empty'],
                    'model_kwargs.lm_freeze_layer_count': [30],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
4,30,0.0003,0.003,10,empty,0.5818,0.069,0.6278,0.0821,0.4881,0.0808,0.7431,0.0479,5
3,30,0.0003,0.003,6,empty,0.5035,0.1368,0.6043,0.1198,0.4034,0.1989,0.732,0.106,5
2,30,0.0003,0.003,4,empty,0.317,0.1354,0.3195,0.1237,0.1966,0.1109,0.6213,0.1858,5
1,30,0.0003,0.003,3,empty,0.0717,0.0786,0.0957,0.0478,0.0373,0.0455,0.3922,0.4287,5
0,30,0.0003,0.003,2,empty,-0.0115,0.0073,0.0436,0.0039,0.0,0.0,0.0,0.0,5


#### Multiview (Pretrain)

In [32]:

analyze_metrics('dasatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-gearnet_1.pt', 'weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt', 'weight/atpbind3d_esm-t33-gearnet_1.pt'],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=15)

Unnamed: 0_level_0,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 10_level_1
0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.4703,0.1495,0.5284,0.1425,0.3919,0.1695,0.6575,0.1593,25
2,weight/atpbind3d_esm-t33-gearnet_1.pt,0.4574,0.1217,0.5309,0.1277,0.3532,0.1592,0.7107,0.158,25
1,weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt,0.4094,0.2196,0.4504,0.2424,0.3227,0.2146,0.6409,0.2293,25


#### Adaboost (No Pretrain)

In [31]:
# TODO..