In [2]:
import pandas as pd
from tabulate import tabulate


def format_mean_std(row, metric):
    mean_val = row[(metric, 'mean')]
    std_val = row[(metric, 'std')]
    return f"{mean_val:.3f} ± {std_val:.3f}"


def get_stat_df(result_df, verbose=True, metrics=['micro_auprc', 'micro_auroc',  'mcc',
                                                  'sensitivity', 'precision']):
    # Sample loading of your CSV - this would be replaced with your actual loading
    grouped = result_df.groupby('model_key').agg({
        k: ['mean', 'std'] for k in metrics
    })

    record_counts = result_df.groupby('model_key').size()

    for metric in metrics:
        grouped[f'{metric}_formatted'] = grouped.apply(
            lambda row: format_mean_std(row, metric), axis=1)

    # Add the record count column to the grouped dataframe
    grouped['record_count'] = record_counts.values

    formatted_df = grouped[[
        k + '_formatted' for k in metrics] + ['record_count']]
    formatted_df.columns = metrics + ['record_count']
    formatted_df = formatted_df.sort_values(metrics[0], ascending=True)

    formatted_df = formatted_df.reset_index()

    if verbose:
        markdown_table = tabulate(formatted_df[[
                                  'model_key'] + metrics], headers='keys', tablefmt='pipe', showindex=False)
        print(markdown_table)

    return formatted_df

df_imatinib = pd.read_csv('imatinib_stats.csv')

def global_filter(df):
    return df[
        (df['model_key'].str.contains(r'^esm-t33-gearnet-(?!\d)') & 
        ~df['model_key'].str.contains(r'r50') &
         ~df['model_key'].str.contains(r'r80') &
         ~df['model_key'].str.contains(r'r10') &
         ~df['model_key'].str.contains(r'rus') &
         ~df['model_key'].str.contains(r'ensemble')) | (df['model_key'] == 'esm-t33-gearnet') | (df['model_key'] == 'esm-t33') | (df['model_key'] == 'esm-t33-pretrained')
    ]

get_stat_df(global_filter(df_imatinib))

| model_key                                | micro_auprc   | micro_auroc   | mcc           | sensitivity   | precision     |
|:-----------------------------------------|:--------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33-pretrained                       | 0.502 ± 0.017 | 0.857 ± 0.015 | 0.579 ± 0.033 | 0.548 ± 0.068 | 0.663 ± 0.026 |
| esm-t33                                  | 0.511 ± 0.021 | 0.864 ± 0.033 | 0.612 ± 0.026 | 0.594 ± 0.032 | 0.678 ± 0.052 |
| esm-t33-gearnet                          | 0.581 ± 0.027 | 0.846 ± 0.010 | 0.548 ± 0.077 | 0.513 ± 0.046 | 0.656 ± 0.172 |
| esm-t33-gearnet-pretrained               | 0.602 ± 0.036 | 0.820 ± 0.044 | 0.627 ± 0.020 | 0.533 ± 0.035 | 0.782 ± 0.063 |
| esm-t33-gearnet-resiboost-r90            | 0.638 ± 0.018 | 0.845 ± 0.010 | 0.610 ± 0.040 | 0.548 ± 0.068 | 0.741 ± 0.134 |
| esm-t33-gearnet-adaboost-r90             | 0.640 ± 0.017 | 0.847 ± 0.012 | 0.610 ± 0.058 | 0.555 ± 0.083 | 0.741 ± 0.170 |


Unnamed: 0,model_key,micro_auprc,micro_auroc,mcc,sensitivity,precision,record_count
0,esm-t33-pretrained,0.502 ± 0.017,0.857 ± 0.015,0.579 ± 0.033,0.548 ± 0.068,0.663 ± 0.026,5
1,esm-t33,0.511 ± 0.021,0.864 ± 0.033,0.612 ± 0.026,0.594 ± 0.032,0.678 ± 0.052,5
2,esm-t33-gearnet,0.581 ± 0.027,0.846 ± 0.010,0.548 ± 0.077,0.513 ± 0.046,0.656 ± 0.172,5
3,esm-t33-gearnet-pretrained,0.602 ± 0.036,0.820 ± 0.044,0.627 ± 0.020,0.533 ± 0.035,0.782 ± 0.063,5
4,esm-t33-gearnet-resiboost-r90,0.638 ± 0.018,0.845 ± 0.010,0.610 ± 0.040,0.548 ± 0.068,0.741 ± 0.134,10
5,esm-t33-gearnet-adaboost-r90,0.640 ± 0.017,0.847 ± 0.012,0.610 ± 0.058,0.555 ± 0.083,0.741 ± 0.170,10
6,esm-t33-gearnet-pretrained-adaboost-r90,0.649 ± 0.021,0.844 ± 0.030,0.615 ± 0.017,0.565 ± 0.075,0.724 ± 0.106,5
7,esm-t33-gearnet-pretrained-resiboost-r90,0.656 ± 0.027,0.853 ± 0.039,0.624 ± 0.014,0.554 ± 0.075,0.756 ± 0.100,5


In [3]:
df_dasatinib = pd.read_csv('dasatinib_stats.csv')
get_stat_df(global_filter(df_dasatinib))

| model_key                                | micro_auprc   | micro_auroc   | mcc           | sensitivity   | precision     |
|:-----------------------------------------|:--------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33-gearnet                          | 0.612 ± 0.093 | 0.905 ± 0.030 | 0.588 ± 0.076 | 0.508 ± 0.083 | 0.730 ± 0.068 |
| esm-t33-pretrained                       | 0.617 ± 0.035 | 0.907 ± 0.014 | 0.584 ± 0.028 | 0.576 ± 0.056 | 0.647 ± 0.090 |
| esm-t33                                  | 0.624 ± 0.050 | 0.900 ± 0.040 | 0.561 ± 0.030 | 0.515 ± 0.074 | 0.669 ± 0.070 |
| esm-t33-gearnet-pretrained               | 0.628 ± 0.045 | 0.904 ± 0.017 | 0.562 ± 0.076 | 0.393 ± 0.119 | 0.876 ± 0.093 |
| esm-t33-gearnet-adaboost-r90             | 0.694 ± 0.049 | 0.935 ± 0.016 | 0.567 ± 0.116 | 0.420 ± 0.168 | 0.843 ± 0.071 |
| esm-t33-gearnet-resiboost-r90            | 0.696 ± 0.059 | 0.936 ± 0.009 | 0.587 ± 0.080 | 0.485 ± 0.134 | 0.774 ± 0.072 |


Unnamed: 0,model_key,micro_auprc,micro_auroc,mcc,sensitivity,precision,record_count
0,esm-t33-gearnet,0.612 ± 0.093,0.905 ± 0.030,0.588 ± 0.076,0.508 ± 0.083,0.730 ± 0.068,5
1,esm-t33-pretrained,0.617 ± 0.035,0.907 ± 0.014,0.584 ± 0.028,0.576 ± 0.056,0.647 ± 0.090,5
2,esm-t33,0.624 ± 0.050,0.900 ± 0.040,0.561 ± 0.030,0.515 ± 0.074,0.669 ± 0.070,5
3,esm-t33-gearnet-pretrained,0.628 ± 0.045,0.904 ± 0.017,0.562 ± 0.076,0.393 ± 0.119,0.876 ± 0.093,5
4,esm-t33-gearnet-adaboost-r90,0.694 ± 0.049,0.935 ± 0.016,0.567 ± 0.116,0.420 ± 0.168,0.843 ± 0.071,5
5,esm-t33-gearnet-resiboost-r90,0.696 ± 0.059,0.936 ± 0.009,0.587 ± 0.080,0.485 ± 0.134,0.774 ± 0.072,5
6,esm-t33-gearnet-pretrained-adaboost-r90,0.712 ± 0.038,0.941 ± 0.012,0.590 ± 0.050,0.488 ± 0.145,0.787 ± 0.111,5
7,esm-t33-gearnet-pretrained-resiboost-r90,0.715 ± 0.035,0.941 ± 0.012,0.606 ± 0.053,0.475 ± 0.097,0.828 ± 0.058,5


In [4]:
df_bosutinib = pd.read_csv('bosutinib_stats.csv')
get_stat_df(global_filter(df_bosutinib))

| model_key                                | micro_auprc   | micro_auroc   | mcc           | sensitivity   | precision     |
|:-----------------------------------------|:--------------|:--------------|:--------------|:--------------|:--------------|
| esm-t33-gearnet-resiboost-r90            | 0.806 ± 0.042 | 0.942 ± 0.021 | 0.731 ± 0.047 | 0.697 ± 0.063 | 0.814 ± 0.130 |
| esm-t33-gearnet-adaboost-r90             | 0.809 ± 0.039 | 0.947 ± 0.020 | 0.739 ± 0.034 | 0.697 ± 0.092 | 0.831 ± 0.107 |
| esm-t33-gearnet                          | 0.811 ± 0.034 | 0.953 ± 0.007 | 0.717 ± 0.027 | 0.710 ± 0.043 | 0.768 ± 0.082 |
| esm-t33-gearnet-pretrained               | 0.830 ± 0.010 | 0.957 ± 0.012 | 0.735 ± 0.019 | 0.677 ± 0.058 | 0.840 ± 0.062 |
| esm-t33-pretrained                       | 0.832 ± 0.040 | 0.936 ± 0.035 | 0.754 ± 0.047 | 0.660 ± 0.084 | 0.897 ± 0.014 |
| esm-t33                                  | 0.852 ± 0.020 | 0.952 ± 0.018 | 0.745 ± 0.030 | 0.667 ± 0.059 | 0.871 ± 0.059 |


Unnamed: 0,model_key,micro_auprc,micro_auroc,mcc,sensitivity,precision,record_count
0,esm-t33-gearnet-resiboost-r90,0.806 ± 0.042,0.942 ± 0.021,0.731 ± 0.047,0.697 ± 0.063,0.814 ± 0.130,5
1,esm-t33-gearnet-adaboost-r90,0.809 ± 0.039,0.947 ± 0.020,0.739 ± 0.034,0.697 ± 0.092,0.831 ± 0.107,5
2,esm-t33-gearnet,0.811 ± 0.034,0.953 ± 0.007,0.717 ± 0.027,0.710 ± 0.043,0.768 ± 0.082,5
3,esm-t33-gearnet-pretrained,0.830 ± 0.010,0.957 ± 0.012,0.735 ± 0.019,0.677 ± 0.058,0.840 ± 0.062,5
4,esm-t33-pretrained,0.832 ± 0.040,0.936 ± 0.035,0.754 ± 0.047,0.660 ± 0.084,0.897 ± 0.014,5
5,esm-t33,0.852 ± 0.020,0.952 ± 0.018,0.745 ± 0.030,0.667 ± 0.059,0.871 ± 0.059,5
6,esm-t33-gearnet-pretrained-resiboost-r90,0.872 ± 0.023,0.968 ± 0.009,0.766 ± 0.024,0.730 ± 0.110,0.850 ± 0.116,5
7,esm-t33-gearnet-pretrained-adaboost-r90,0.875 ± 0.026,0.969 ± 0.010,0.765 ± 0.025,0.720 ± 0.114,0.860 ± 0.119,5


In [5]:
def get_stat_df_combined(metrics=['mcc'], verbose=True):
    df_imatinib = get_stat_df(global_filter(pd.read_csv('imatinib_stats.csv')), verbose=False)
    df_dasatinib = get_stat_df(global_filter(pd.read_csv('dasatinib_stats.csv')), verbose=False)
    df_bosutinib = get_stat_df(global_filter(pd.read_csv('bosutinib_stats.csv')), verbose=False)

    df_imatinib = df_imatinib.set_index('model_key')
    df_dasatinib = df_dasatinib.set_index('model_key')
    df_bosutinib = df_bosutinib.set_index('model_key')

    df_combined = pd.concat([df_imatinib, df_dasatinib, df_bosutinib], axis=1, keys=['imatinib', 'dasatinib', 'bosutinib'])
    df_combined = df_combined[[(drug, met) for drug in ['imatinib', 'dasatinib', 'bosutinib'] for met in metrics]]
    
    if verbose:
        markdown_table = tabulate(df_combined, headers='keys', tablefmt='pipe', showindex=True)
        print(markdown_table)

    return df_combined


get_stat_df_combined(metrics=['mcc', 'micro_auprc'])

| model_key                                | ('imatinib', 'mcc')   | ('imatinib', 'micro_auprc')   | ('dasatinib', 'mcc')   | ('dasatinib', 'micro_auprc')   | ('bosutinib', 'mcc')   | ('bosutinib', 'micro_auprc')   |
|:-----------------------------------------|:----------------------|:------------------------------|:-----------------------|:-------------------------------|:-----------------------|:-------------------------------|
| esm-t33-pretrained                       | 0.579 ± 0.033         | 0.502 ± 0.017                 | 0.584 ± 0.028          | 0.617 ± 0.035                  | 0.754 ± 0.047          | 0.832 ± 0.040                  |
| esm-t33                                  | 0.612 ± 0.026         | 0.511 ± 0.021                 | 0.561 ± 0.030          | 0.624 ± 0.050                  | 0.745 ± 0.030          | 0.852 ± 0.020                  |
| esm-t33-gearnet                          | 0.548 ± 0.077         | 0.581 ± 0.027                 | 0.588 ± 0.076          | 0.612 

Unnamed: 0_level_0,imatinib,imatinib,dasatinib,dasatinib,bosutinib,bosutinib
Unnamed: 0_level_1,mcc,micro_auprc,mcc,micro_auprc,mcc,micro_auprc
model_key,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
esm-t33-pretrained,0.579 ± 0.033,0.502 ± 0.017,0.584 ± 0.028,0.617 ± 0.035,0.754 ± 0.047,0.832 ± 0.040
esm-t33,0.612 ± 0.026,0.511 ± 0.021,0.561 ± 0.030,0.624 ± 0.050,0.745 ± 0.030,0.852 ± 0.020
esm-t33-gearnet,0.548 ± 0.077,0.581 ± 0.027,0.588 ± 0.076,0.612 ± 0.093,0.717 ± 0.027,0.811 ± 0.034
esm-t33-gearnet-pretrained,0.627 ± 0.020,0.602 ± 0.036,0.562 ± 0.076,0.628 ± 0.045,0.735 ± 0.019,0.830 ± 0.010
esm-t33-gearnet-resiboost-r90,0.610 ± 0.040,0.638 ± 0.018,0.587 ± 0.080,0.696 ± 0.059,0.731 ± 0.047,0.806 ± 0.042
esm-t33-gearnet-adaboost-r90,0.610 ± 0.058,0.640 ± 0.017,0.567 ± 0.116,0.694 ± 0.049,0.739 ± 0.034,0.809 ± 0.039
esm-t33-gearnet-pretrained-adaboost-r90,0.615 ± 0.017,0.649 ± 0.021,0.590 ± 0.050,0.712 ± 0.038,0.765 ± 0.025,0.875 ± 0.026
esm-t33-gearnet-pretrained-resiboost-r90,0.624 ± 0.014,0.656 ± 0.027,0.606 ± 0.053,0.715 ± 0.035,0.766 ± 0.024,0.872 ± 0.023


# New: Imatinib

In [6]:
models = ['esm-t33', 'esm-t33-gearnet', 'esm-t33-pretrained', 'esm-t33-gearent-pretrained']
concat_csv = pd.concat([
    pd.read_csv('imatinib_esm-t33_stats.csv'),
    pd.read_csv('imatinib_esm-t33-gearnet_stats.csv'),
    pd.read_csv('imatinib_esm-t33-pretrained_stats.csv'),
    pd.read_csv('imatinib_esm-t33-gearnet-pretrained_stats.csv')
])


get_stat_df(concat_csv, metrics=['mcc', 'micro_auprc'])


| model_key                  | mcc           | micro_auprc   |
|:---------------------------|:--------------|:--------------|
| esm-t33-gearnet-pretrained | 0.473 ± 0.127 | 0.474 ± 0.110 |
| esm-t33-pretrained         | 0.547 ± 0.064 | 0.567 ± 0.038 |
| esm-t33-gearnet            | 0.575 ± 0.024 | 0.554 ± 0.024 |
| esm-t33                    | 0.582 ± 0.023 | 0.587 ± 0.019 |


Unnamed: 0,model_key,mcc,micro_auprc,record_count
0,esm-t33-gearnet-pretrained,0.473 ± 0.127,0.474 ± 0.110,2239
1,esm-t33-pretrained,0.547 ± 0.064,0.567 ± 0.038,1282
2,esm-t33-gearnet,0.575 ± 0.024,0.554 ± 0.024,5
3,esm-t33,0.582 ± 0.023,0.587 ± 0.019,5


In [7]:
import os
models = ['esm-t33', 'esm-t33-gearnet',
          'esm-t33-pretrained', 'esm-t33-gearent-pretrained']
files_to_read = [
    'imatinib_esm-t33_stats.csv',
    'imatinib_esm-t33-gearnet_stats.csv',
    'imatinib_esm-t33-pretrained_stats.csv',
    'imatinib_esm-t33-gearnet-pretrained_stats.csv'
]

csv_files = [pd.read_csv(file) for file in files_to_read if os.path.exists(file)]
concat_csv = pd.concat(csv_files)


get_stat_df(concat_csv, metrics=['mcc', 'micro_auprc'])

| model_key                  | mcc           | micro_auprc   |
|:---------------------------|:--------------|:--------------|
| esm-t33-gearnet-pretrained | 0.473 ± 0.127 | 0.474 ± 0.110 |
| esm-t33-pretrained         | 0.547 ± 0.064 | 0.567 ± 0.038 |
| esm-t33-gearnet            | 0.575 ± 0.024 | 0.554 ± 0.024 |
| esm-t33                    | 0.582 ± 0.023 | 0.587 ± 0.019 |


Unnamed: 0,model_key,mcc,micro_auprc,record_count
0,esm-t33-gearnet-pretrained,0.473 ± 0.127,0.474 ± 0.110,2239
1,esm-t33-pretrained,0.547 ± 0.064,0.567 ± 0.038,1282
2,esm-t33-gearnet,0.575 ± 0.024,0.554 ± 0.024,5
3,esm-t33,0.582 ± 0.023,0.587 ± 0.019,5


In [8]:
def analyze_metrics(csv_path, group_by_param, filter_conditions={}, sort_by=('mcc', 'mean'), ascending=False, limit=-1):
    """
    Analyze metrics by grouping on specified hyperparameters and filtering the DataFrame.

    Parameters:
    - group_by_param: str or list, the hyperparameter(s) to group by.
    - filter_conditions: dict, conditions to filter the DataFrame.

    Returns:
    - grouped_df: DataFrame, the grouped and averaged metrics.
    """
    import pandas as pd
    df = pd.read_csv(csv_path)

    # Apply filter conditions
    for column, value in filter_conditions.items():
        if isinstance(value, list):
            df = df[df[column].isin(value)]
        else:
            df = df[df[column] == value]

    # Define the hyperparameters to group by
    hyperparameters = group_by_param if isinstance(
        group_by_param, list) else [group_by_param]

    # Define the metrics to average
    metrics = ['mcc', 'micro_auprc', 'sensitivity', 'precision']

    # Group by hyperparameters and calculate mean of metrics, including a single count column
    grouped_df = df.groupby(hyperparameters)[metrics].agg(
        ['mean', 'std']).reset_index()
    grouped_df['count'] = df.groupby(hyperparameters).size().values

    # Round the final metrics to the fourth digit
    for metric in metrics:
        grouped_df[(metric, 'mean')] = grouped_df[(metric, 'mean')].round(4)
        grouped_df[(metric, 'std')] = grouped_df[(metric, 'std')].round(4)

    # Sort by MCC (you can change this to any other metric)
    grouped_df = grouped_df.sort_values(sort_by, ascending=ascending)

    if limit > 0:
        grouped_df = grouped_df.head(limit)
    # Display the results
    return grouped_df


def print_markdown_table(grouped_df, metrics=['mcc', 'micro_auprc', 'sensitivity', 'precision']):
    """
    Print the markdown formatted table from the grouped DataFrame.

    Parameters:
    - grouped_df: DataFrame, the grouped and averaged metrics.
    - metrics: list, the metrics to include in the table.
    """
    from tabulate import tabulate

    # Format the metrics with mean and std
    for metric in metrics:
        grouped_df[f'{metric}_formatted'] = grouped_df.apply(
            lambda row: f"{row[(metric, 'mean')]:.3f} ± {row[(metric, 'std')]:.3f}", axis=1)
    # Drop the original metric columns
    for metric in metrics:
        grouped_df.drop(columns=[(metric, 'mean'),
                        (metric, 'std')], inplace=True)

    # Rename the formatted metric columns to the original metric names
    for metric in metrics:
        grouped_df.rename(
            columns={f'{metric}_formatted': metric}, inplace=True)

    # Select the columns to display, including non-metric keys
    non_metric_keys = [col for col in grouped_df.columns.levels[0]
                       if col not in metrics and col != 'count' and col != 'index']
    formatted_df = grouped_df[non_metric_keys +
                              [f'{metric}' for metric in metrics] + ['count']]
    formatted_df.columns = non_metric_keys + metrics + ['record_count']
    # Print the markdown table
    markdown_table = tabulate(
        formatted_df, headers='keys', tablefmt='pipe', showindex=False)
    print(markdown_table)

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                limit=15)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
116,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt,0.5934,0.0403,0.5662,0.0224,0.47,0.0401,0.8078,0.1271,10
114,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-gearnet-lr1e-5_0.pt,0.5933,0.0292,0.5609,0.0148,0.4486,0.0647,0.8434,0.0973,10
110,30.0,0.0003,0.003,6.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5819,0.042,0.5512,0.0222,0.4352,0.0497,0.8373,0.1243,15
104,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-4_0.pt,0.5819,0.0574,0.5352,0.034,0.46,0.0221,0.7904,0.1157,10
132,30.0,0.0003,0.007,4.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.576,0.0292,0.5244,0.0512,0.4057,0.054,0.8711,0.0793,5
240,31.0,0.0003,0.007,4.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.5758,0.0166,0.5366,0.03,0.4343,0.0329,0.8155,0.068,5
93,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-4_0.pt,0.5755,0.0375,0.5296,0.0407,0.4557,0.0502,0.7824,0.0828,10
115,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-4_0.pt,0.5754,0.0385,0.5557,0.0189,0.4514,0.0588,0.7939,0.102,10
141,30.0,0.0003,0.01,4.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.5722,0.0333,0.5251,0.0324,0.3971,0.0275,0.8734,0.073,5
37,30.0,0.0001,0.005,6.0,weight/atpbind3d-1930_esm-t33-gearnet_4.pt,0.5716,0.017,0.5448,0.0103,0.4486,0.0078,0.7787,0.0378,5


### Imatinib

#### ESM (No Pretrain)

In [9]:
analyze_metrics('imatinib_esm-t33_stats.csv',
                ['model_key'],
                limit=10)

Unnamed: 0_level_0,model_key,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 10_level_1
0,esm-t33,0.5824,0.0232,0.5866,0.0191,0.4686,0.0445,0.781,0.108,5


In [10]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['empty'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
3,30.0,0.0003,0.003,8.0,empty,0.5956,0.0359,0.5858,0.0329,0.4629,0.0424,0.8165,0.073,5
0,30.0,0.0003,0.003,3.0,empty,0.5878,0.0327,0.5936,0.0253,0.4628,0.0514,0.8032,0.097,10
2,30.0,0.0003,0.003,6.0,empty,0.5765,0.0629,0.5896,0.0256,0.4707,0.0751,0.7879,0.188,20
1,30.0,0.0003,0.003,4.0,empty,0.5749,0.0772,0.5998,0.0186,0.4914,0.0896,0.7654,0.2032,20
4,30.0,0.0003,0.003,10.0,empty,0.5663,0.0725,0.5855,0.0284,0.4893,0.0695,0.7389,0.1869,20


#### ESM (Pretrain)

In [11]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-lowlr_0.pt'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
3,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-lowlr_0.pt,0.5714,0.0468,0.5903,0.0161,0.4914,0.0611,0.7351,0.1676,5
2,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33-lowlr_0.pt,0.567,0.0392,0.5955,0.0085,0.4514,0.078,0.7814,0.1402,5
1,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33-lowlr_0.pt,0.5349,0.0562,0.577,0.0182,0.4714,0.0969,0.698,0.2042,5
0,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33-lowlr_0.pt,0.5347,0.0346,0.5653,0.0178,0.36,0.0673,0.8569,0.0787,5


In [12]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': [
                        'weight/atpbind3d-1930_esm-t33-lr3e-4_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr1e-4_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr3e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr1e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt',
                    ],
                    'base_lr': [3e-4],
                    
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
19,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt,0.611,0.0222,0.5848,0.0129,0.4828,0.0113,0.8192,0.0493,10
0,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33-lr1e-4_0.pt,0.6099,0.0178,0.5932,0.0111,0.4753,0.0272,0.8302,0.0554,11
2,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33-lr3e-4_0.pt,0.6067,0.0286,0.5942,0.0163,0.4831,0.0382,0.8132,0.0874,11
18,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-lr3e-5_0.pt,0.6023,0.0064,0.582,0.0113,0.4343,0.04,0.8859,0.0781,10
6,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33-lr1e-5_0.pt,0.6016,0.0157,0.5894,0.0082,0.4686,0.0349,0.8231,0.0738,10
14,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt,0.5987,0.0143,0.5752,0.0068,0.4543,0.0335,0.8399,0.0749,10
11,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33-lr1e-5_0.pt,0.5881,0.0208,0.5787,0.0044,0.4514,0.0422,0.8221,0.1031,10
3,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33-lr3e-5_0.pt,0.5812,0.0311,0.5696,0.0193,0.4143,0.0356,0.8628,0.024,10
7,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33-lr3e-4_0.pt,0.5775,0.0309,0.5975,0.0096,0.4914,0.0576,0.7444,0.14,10
4,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt,0.5734,0.0143,0.5733,0.0233,0.4371,0.0691,0.8145,0.0976,10


In [13]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33_1.pt'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
1,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33_1.pt,0.5628,0.0205,0.5854,0.0138,0.4714,0.0337,0.7302,0.0912,15
0,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33_1.pt,0.5621,0.0206,0.5877,0.0152,0.4543,0.0309,0.7521,0.0863,5
2,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33_1.pt,0.5513,0.0149,0.5658,0.0219,0.4771,0.0357,0.6952,0.0638,15
4,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33_1.pt,0.5011,0.0214,0.5375,0.0148,0.42,0.0534,0.6684,0.0982,15
3,30.0,0.0003,0.003,8.0,weight/atpbind3d-1930_esm-t33_1.pt,0.4944,0.0279,0.5495,0.0215,0.4229,0.0837,0.6598,0.1411,5


In [14]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33_1_rmmlp.pt'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
1,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5699,0.0246,0.5847,0.0136,0.4647,0.048,0.7593,0.1009,15
0,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5598,0.0403,0.5694,0.0157,0.4485,0.054,0.7525,0.0379,5
3,30.0,0.0003,0.003,8.0,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5377,0.0387,0.5612,0.0333,0.4857,0.0474,0.6632,0.1431,5
2,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.5268,0.0364,0.5655,0.027,0.4838,0.0438,0.6382,0.0931,15
4,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33_1_rmmlp.pt,0.516,0.0407,0.5553,0.0248,0.4495,0.0894,0.6682,0.1139,15


In [15]:
analyze_metrics('imatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d_esm-t33_1.pt'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
3,30.0,0.0003,0.003,8.0,weight/atpbind3d_esm-t33_1.pt,0.5921,0.0239,0.5553,0.0148,0.4486,0.0372,0.8334,0.0875,5
0,30.0,0.0003,0.003,3.0,weight/atpbind3d_esm-t33_1.pt,0.5877,0.0213,0.5468,0.007,0.4486,0.0559,0.8257,0.0909,5
2,30.0,0.0003,0.003,6.0,weight/atpbind3d_esm-t33_1.pt,0.5699,0.0367,0.5471,0.0117,0.46,0.0412,0.7692,0.1346,15
1,30.0,0.0003,0.003,4.0,weight/atpbind3d_esm-t33_1.pt,0.5675,0.0475,0.5506,0.0133,0.4514,0.0632,0.7843,0.1548,15
4,30.0,0.0003,0.003,10.0,weight/atpbind3d_esm-t33_1.pt,0.5547,0.0293,0.5503,0.0289,0.4457,0.0287,0.7471,0.0843,15


#### Multiview (No Pretrain)

In [16]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['empty'],
                    'model_kwargs.lm_freeze_layer_count': [30],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
4,30.0,0.0003,0.003,10.0,empty,0.5523,0.0341,0.5543,0.0256,0.4243,0.0709,0.792,0.1399,20
3,30.0,0.0003,0.003,6.0,empty,0.523,0.0502,0.5085,0.036,0.3964,0.0558,0.7636,0.1521,20
2,30.0,0.0003,0.003,4.0,empty,0.3089,0.1541,0.3227,0.144,0.2636,0.1631,0.565,0.2857,20
1,30.0,0.0003,0.003,3.0,empty,0.1782,0.1303,0.2274,0.1121,0.1336,0.1477,0.4583,0.3048,20
0,30.0,0.0003,0.003,2.0,empty,-0.0145,0.0281,0.0564,0.0152,0.0371,0.092,0.0462,0.1126,20


#### Multiview (Pretrain)

In [17]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': [
                        # 'weight/atpbind3d-1930_esm-t33-gearnet-lr3e-4_0.pt',
                        # 'weight/atpbind3d-1930_esm-t33-gearnet-lr1e-4_0.pt',
                        'weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-gearnet-lr1e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-gearnet-lr3e-6_0.pt',
                    ],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=15)

Unnamed: 0_level_0,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
13,10.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt,0.5934,0.0403,0.5662,0.0224,0.47,0.0401,0.8078,0.1271,10
12,10.0,weight/atpbind3d-1930_esm-t33-gearnet-lr1e-5_0.pt,0.5933,0.0292,0.5609,0.0148,0.4486,0.0647,0.8434,0.0973,10
9,6.0,weight/atpbind3d-1930_esm-t33-gearnet-lr1e-5_0.pt,0.5658,0.0558,0.5432,0.0501,0.4357,0.072,0.7993,0.1295,10
14,10.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-6_0.pt,0.5641,0.0579,0.554,0.0192,0.4543,0.0624,0.7804,0.1901,10
10,6.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt,0.5625,0.087,0.541,0.0539,0.4557,0.0975,0.7615,0.1461,10
11,6.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-6_0.pt,0.5348,0.0816,0.5317,0.043,0.4743,0.0731,0.6952,0.2109,10
7,4.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt,0.4447,0.0931,0.4351,0.1014,0.3243,0.138,0.705,0.0869,10
6,4.0,weight/atpbind3d-1930_esm-t33-gearnet-lr1e-5_0.pt,0.3279,0.0741,0.3539,0.0759,0.2214,0.1271,0.6464,0.2385,10
8,4.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-6_0.pt,0.2837,0.0648,0.3472,0.035,0.1871,0.1011,0.5924,0.165,10
4,3.0,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt,0.2392,0.1072,0.2702,0.1088,0.1728,0.152,0.5717,0.2908,10


In [18]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-gearnet-lowlr_0.pt'],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=15)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
4,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-gearnet-lowlr_0.pt,0.5634,0.0404,0.5661,0.0334,0.4543,0.0383,0.7529,0.0783,5
3,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33-gearnet-lowlr_0.pt,0.5549,0.0401,0.5694,0.0201,0.4572,0.0553,0.7425,0.1577,5
1,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33-gearnet-lowlr_0.pt,0.5382,0.0769,0.5248,0.0446,0.4486,0.0491,0.7172,0.1903,5
2,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33-gearnet-lowlr_0.pt,0.4927,0.1378,0.5101,0.136,0.3971,0.1167,0.6762,0.1756,5
0,30.0,0.0003,0.003,2.0,weight/atpbind3d-1930_esm-t33-gearnet-lowlr_0.pt,0.3411,0.1461,0.4187,0.1213,0.3714,0.216,0.5391,0.3214,5


In [19]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-gearnet_1.pt'],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=15)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
1,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.5288,0.0247,0.5175,0.0088,0.4238,0.0257,0.7196,0.0796,15
3,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.5243,0.044,0.5034,0.0315,0.3905,0.0578,0.7761,0.1508,15
2,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.5093,0.0253,0.5025,0.0367,0.3933,0.0483,0.728,0.1074,15
4,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.501,0.0357,0.5342,0.0318,0.4352,0.0836,0.6567,0.1174,15
0,30.0,0.0003,0.003,2.0,weight/atpbind3d-1930_esm-t33-gearnet_1.pt,0.4673,0.0585,0.4205,0.0507,0.4819,0.0339,0.5264,0.1129,15


In [20]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt'],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
3,30.0,0.0003,0.003,6.0,weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt,0.5372,0.0467,0.5217,0.0276,0.419,0.0444,0.7559,0.1382,15
4,30.0,0.0003,0.003,10.0,weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt,0.5104,0.0434,0.5163,0.0377,0.3895,0.0828,0.7533,0.1503,15
2,30.0,0.0003,0.003,4.0,weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt,0.5001,0.0664,0.4844,0.062,0.4019,0.0897,0.6986,0.1176,15
1,30.0,0.0003,0.003,3.0,weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt,0.4935,0.0972,0.4778,0.0974,0.3714,0.1134,0.7339,0.1073,15
0,30.0,0.0003,0.003,2.0,weight/atpbind3d-1930_esm-t33-gearnet_1_rmmlp.pt,0.4277,0.1315,0.4399,0.1428,0.3867,0.1733,0.629,0.1934,15


In [21]:

analyze_metrics('imatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['weight/atpbind3d_esm-t33-gearnet_1.pt'],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=15)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
3,30.0,0.0003,0.003,6.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5819,0.042,0.5512,0.0222,0.4352,0.0497,0.8373,0.1243,15
4,30.0,0.0003,0.003,10.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5669,0.0361,0.5499,0.0218,0.4352,0.0608,0.8047,0.1311,15
1,30.0,0.0003,0.003,3.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5518,0.0426,0.5241,0.0364,0.4295,0.0721,0.7789,0.1252,15
2,30.0,0.0003,0.003,4.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.5068,0.0683,0.5141,0.0246,0.4076,0.0716,0.7193,0.1813,15
0,30.0,0.0003,0.003,2.0,weight/atpbind3d_esm-t33-gearnet_1.pt,0.3019,0.0724,0.305,0.0652,0.2715,0.1145,0.4348,0.0646,15


#### Adaboost (No Pretrain)

In [22]:

analyze_metrics('imatinib_esm-t33-gearnet-resiboost_stats.csv',
                ['boost_negative_use_ratio',
                    'boost_mask_positive'],
                {
                    'boost_mask_positive': [True], # Adaboost
                },
                limit=10)




Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
1,0.9,True,0.572,0.0474,0.5932,0.0143,0.4586,0.0647,0.788,0.1721,10
0,0.5,True,0.5622,0.0885,0.5936,0.0165,0.46,0.0731,0.7806,0.2454,5


#### Adaboost (Pretrain)

In [23]:
analyze_metrics('imatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
                ['boost_negative_use_ratio', 'boost_mask_positive'],
                {
                    'boost_mask_positive': [True],  # Adaboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
1,0.9,True,0.5952,0.0371,0.587,0.0151,0.4828,0.0521,0.7955,0.1403,10
0,0.5,True,0.5783,0.0547,0.589,0.0083,0.4514,0.0674,0.8162,0.1922,5


#### Resiboost (No Pretrain)

In [24]:

analyze_metrics('imatinib_esm-t33-gearnet-resiboost_stats.csv',
                ['boost_negative_use_ratio',
                    'boost_mask_positive'],
                {
                    'boost_mask_positive': [False],  # Resiboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
0,0.5,False,0.5909,0.0362,0.5918,0.0205,0.4543,0.0409,0.8256,0.1261,5
1,0.9,False,0.5731,0.0521,0.5896,0.0147,0.4671,0.0517,0.7738,0.1665,10


#### Resiboost (Pretrain)

In [25]:
analyze_metrics('imatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
                ['boost_negative_use_ratio', 'boost_mask_positive'],
                {
                    'boost_mask_positive': [False],  # Resiboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
1,0.9,False,0.597,0.0459,0.591,0.0136,0.48,0.0556,0.8081,0.1578,10
0,0.5,False,0.5797,0.05,0.6014,0.0158,0.4828,0.0566,0.7617,0.1508,5


In [26]:
def pick_and_aggregate(model_lists, metrics=['mcc', 'micro_auprc', 'sensitivity', 'precision']):
    '''
    model_lists: list of {'label': str, 'csv_path': str, 'filter_conditions': dict}

    For each model:
    - pick only the rows that match the filter_conditions.
    - make rows with columns listed in metrics, and the model_key column named as in the label
    
    After that, put all the rows into a single DataFrame, and aggregate the rows by the model_key column.
    Aggregate the metrics by mean and std, and also add the count of the rows aggregated.
    '''
    import pandas as pd
    import numpy as np

    all_data = []

    for model in model_lists:
        # Read CSV file
        df = pd.read_csv(model['csv_path'])
        
        # Apply filter conditions
        for col, values in model['filter_conditions'].items():
            df = df[df[col].isin(values)]
        
        # Select required columns
        df = df[['model_key'] + metrics]
        
        # Set model_key column's value to model['label']
        df['model_key'] = model['label']
        
        all_data.append(df)
    
    # Combine all dataframes
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Aggregate by model_key
    result = combined_df.groupby('model_key').agg({
        metric: ['mean', 'std'] for metric in metrics
    })
    
    # Add count of rows
    result['count'] = combined_df.groupby('model_key').size()
    
    # Reset index and sort according to the order in model_lists
    result = result.reset_index()
    result['order'] = result['model_key'].map({model['label']: i for i, model in enumerate(model_lists)})
    result = result.sort_values('order').drop('order', axis=1)
    
    # Round final result to 3 decimal places
    result = result.round(3)
    
    return result

# Example usage
result = pick_and_aggregate([
    {
        'label': 'ESM (No Pretrain)',
        'csv_path': 'imatinib_esm-t33-pretrained_stats.csv',
        'filter_conditions': {
            'model_kwargs.freeze_layer_count': [30],
            'pretrained_weight_path': ['empty'],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'model_kwargs.freeze_layer_count': [30],
            'cycle_size': [10],
        },
    },
    {
        'label': 'ESM (Pretrain)',
        'csv_path': 'imatinib_esm-t33-pretrained_stats.csv',
        'filter_conditions': {
            'model_kwargs.freeze_layer_count': [30],
            'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt'],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'model_kwargs.freeze_layer_count': [30],
            'cycle_size': [6],
        },
    },
    {
        'label': 'Multiview (No Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['empty'],
            'model_kwargs.lm_freeze_layer_count': [30],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [10],
        },
    },
    {
        'label': 'Multiview (Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt'],
            'model_kwargs.lm_freeze_layer_count': [30.0],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [10],
        },
    },
    {
        'label': 'Multiview (Adaboost) (No Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [True],
        },
    },
    {
        'label': 'Multiview (Adaboost) (Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [True],
        },
    },
    {
        'label': 'Multiview (Resiboost) (No Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [False],
        },
    },
    {
        'label': 'Multiview (Resiboost) (Pretrain)',
        'csv_path': 'imatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [False],
        },
    },
    
])

print_markdown_table(result)    

| model_key                           | mcc           | micro_auprc   | sensitivity   | precision     |   record_count |
|:------------------------------------|:--------------|:--------------|:--------------|:--------------|---------------:|
| ESM (No Pretrain)                   | 0.566 ± 0.073 | 0.586 ± 0.028 | 0.489 ± 0.069 | 0.739 ± 0.187 |             20 |
| ESM (Pretrain)                      | 0.599 ± 0.014 | 0.575 ± 0.007 | 0.454 ± 0.034 | 0.840 ± 0.075 |             10 |
| Multiview (No Pretrain)             | 0.552 ± 0.034 | 0.554 ± 0.026 | 0.424 ± 0.071 | 0.792 ± 0.140 |             20 |
| Multiview (Pretrain)                | 0.593 ± 0.040 | 0.566 ± 0.022 | 0.470 ± 0.040 | 0.808 ± 0.127 |             10 |
| Multiview (Adaboost) (No Pretrain)  | 0.572 ± 0.047 | 0.593 ± 0.014 | 0.459 ± 0.065 | 0.788 ± 0.172 |             10 |
| Multiview (Adaboost) (Pretrain)     | 0.595 ± 0.037 | 0.587 ± 0.015 | 0.483 ± 0.052 | 0.796 ± 0.140 |             10 |
| Multiview (Resiboost) (No Pret

  result = result.sort_values('order').drop('order', axis=1)


### Dasatinib

#### ESM (No Pretrain)

In [27]:
analyze_metrics('dasatinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['empty'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)


Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
2,30,0.0003,0.003,6,empty,0.5573,0.0707,0.6055,0.0626,0.5407,0.1254,0.6511,0.1503,10
0,30,0.0003,0.003,3,empty,0.5466,0.0721,0.6216,0.0545,0.5068,0.1299,0.6563,0.0593,10
3,30,0.0003,0.003,10,empty,0.5356,0.0846,0.5823,0.078,0.4746,0.1054,0.675,0.1292,10
1,30,0.0003,0.003,4,empty,0.5182,0.1043,0.6062,0.0845,0.4322,0.1972,0.7276,0.1212,10


#### ESM (Pretrain)

In [28]:
analyze_metrics('dasatinib_esm-t33-pretrained_stats.csv',
                [
                    # 'model_kwargs.freeze_layer_count', 
                    # 'base_lr',
                    # 'max_lr',
                 'cycle_size',
                 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': [
                        'weight/atpbind3d-1930_esm-t33-lr3e-4_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr1e-4_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr3e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr1e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt',
                    ],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'cycle_size': [10],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
1,10,weight/atpbind3d-1930_esm-t33-lr1e-5_0.pt,0.5796,0.0463,0.6643,0.0485,0.5864,0.082,0.638,0.1312,10
4,10,weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt,0.5618,0.0659,0.6682,0.0602,0.5797,0.0653,0.6023,0.0986,10
0,10,weight/atpbind3d-1930_esm-t33-lr1e-4_0.pt,0.5377,0.073,0.655,0.0215,0.4373,0.1509,0.739,0.0679,10
2,10,weight/atpbind3d-1930_esm-t33-lr3e-4_0.pt,0.5233,0.0476,0.6384,0.0369,0.4102,0.1007,0.7351,0.066,10
3,10,weight/atpbind3d-1930_esm-t33-lr3e-5_0.pt,0.5221,0.0696,0.616,0.1089,0.4983,0.0755,0.6051,0.0685,10


#### Multiview (No Pretrain)

In [29]:

analyze_metrics('dasatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['model_kwargs.lm_freeze_layer_count',
                    'base_lr', 'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['empty'],
                    'model_kwargs.lm_freeze_layer_count': [30],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.lm_freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
4,30,0.0003,0.003,10,empty,0.5574,0.0709,0.6266,0.0726,0.4797,0.0875,0.7095,0.1112,10
3,30,0.0003,0.003,6,empty,0.4958,0.0318,0.5553,0.0624,0.4254,0.1088,0.6662,0.145,10
2,30,0.0003,0.003,4,empty,0.3138,0.1754,0.3677,0.1761,0.2051,0.1189,0.5669,0.2942,10
1,30,0.0003,0.003,3,empty,0.0229,0.0976,0.0796,0.0719,0.0644,0.0941,0.0811,0.1318,10
0,30,0.0003,0.003,2,empty,-0.0057,0.0359,0.0592,0.0566,0.0627,0.1983,0.0119,0.0316,10


#### Multiview (Pretrain)

In [30]:

analyze_metrics('dasatinib_esm-t33-gearnet-pretrained_stats.csv',
                ['pretrained_weight_path', 'cycle_size'],
                {
                    'pretrained_weight_path': [
                        'weight/atpbind3d-1930_esm-t33-gearnet-lr3e-4_0.pt',
                        'weight/atpbind3d-1930_esm-t33-gearnet-lr1e-4_0.pt',
                        'weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-gearnet-lr1e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-gearnet-lr3e-6_0.pt',
                    ],
                    'model_kwargs.lm_freeze_layer_count': [30.0],
                    'cycle_size': [10],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                },
                limit=15)

Unnamed: 0_level_0,pretrained_weight_path,cycle_size,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
4,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-6_0.pt,10,0.6067,0.0538,0.6697,0.0426,0.4949,0.0945,0.8093,0.1456,10
3,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt,10,0.5929,0.0711,0.6613,0.0816,0.522,0.0833,0.7314,0.1163,10
1,weight/atpbind3d-1930_esm-t33-gearnet-lr1e-5_0.pt,10,0.5869,0.0524,0.6802,0.0487,0.4814,0.1191,0.7894,0.126,10
2,weight/atpbind3d-1930_esm-t33-gearnet-lr3e-4_0.pt,10,0.549,0.1169,0.6374,0.087,0.4407,0.1489,0.7692,0.0972,10
0,weight/atpbind3d-1930_esm-t33-gearnet-lr1e-4_0.pt,10,0.4996,0.1023,0.6216,0.0769,0.4407,0.1745,0.6777,0.1755,10


#### Adaboost (No Pretrain)

In [31]:

analyze_metrics('dasatinib_esm-t33-gearnet-resiboost_stats.csv',
                ['boost_negative_use_ratio',
                    'boost_mask_positive'],
                {
                    'boost_mask_positive': [True],  # Adaboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
0,0.5,True,0.5697,0.0752,0.6714,0.0457,0.4881,0.1551,0.7426,0.1291,5
1,0.9,True,0.5683,0.0741,0.695,0.0462,0.4288,0.1325,0.8274,0.0967,10


#### Adaboost (Pretrain)

In [32]:
analyze_metrics('dasatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
                ['boost_negative_use_ratio', 'boost_mask_positive'],
                {
                    'boost_mask_positive': [True],  # Adaboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
0,0.5,True,0.5608,0.0902,0.6933,0.0502,0.4881,0.1893,0.7396,0.1515,5
1,0.9,True,0.5586,0.0743,0.7054,0.05,0.4407,0.1409,0.7887,0.1156,10


#### Resiboost (No Pretrain)

In [33]:
analyze_metrics('dasatinib_esm-t33-gearnet-resiboost_stats.csv',
                ['boost_negative_use_ratio',
                    'boost_mask_positive'],
                {
                    'boost_mask_positive': [False],  # Resiboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
0,0.5,False,0.5993,0.036,0.6777,0.054,0.5729,0.05,0.6811,0.1008,5
1,0.9,False,0.572,0.0578,0.6954,0.0525,0.5,0.1377,0.7371,0.1476,10


In [34]:
#### Resiboost (Pretrain)
analyze_metrics('dasatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
                ['boost_negative_use_ratio', 'boost_mask_positive'],
                {
                    'boost_mask_positive': [False],  # Resiboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
0,0.5,False,0.6126,0.0707,0.6976,0.0497,0.539,0.1133,0.7496,0.0641,5
1,0.9,False,0.593,0.0722,0.7101,0.0476,0.4932,0.1272,0.7783,0.0845,10


In [35]:
result = pick_and_aggregate([
    {
        'label': 'ESM (No Pretrain)',
        'csv_path': 'dasatinib_esm-t33-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['empty'],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'model_kwargs.freeze_layer_count': [30],
            'cycle_size': [10],
        },
    },
    {
        'label': 'ESM (Pretrain)',
        'csv_path': 'dasatinib_esm-t33-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': [
                'weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt',
            ],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [10],
            'model_kwargs.freeze_layer_count': [30],
        },
    },
    {
        'label': 'Multiview (No Pretrain)',
        'csv_path': 'dasatinib_esm-t33-gearnet-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['empty'],
            'model_kwargs.lm_freeze_layer_count': [30],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [10],
        },
    },
    {
        'label': 'Multiview (Pretrain)',
        'csv_path': 'dasatinib_esm-t33-gearnet-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt'],
            'model_kwargs.lm_freeze_layer_count': [30.0],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [10],
        },
    },
    {
        'label': 'Multiview (Adaboost) (No Pretrain)',
        'csv_path': 'dasatinib_esm-t33-gearnet-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [True],
        },
    },
    {
        'label': 'Multiview (Adaboost) (Pretrain)',
        'csv_path': 'dasatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [True],
        },
    },
    {
        'label': 'Multiview (Resiboost) (No Pretrain)',
        'csv_path': 'dasatinib_esm-t33-gearnet-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [False],
        },
    },
    {
        'label': 'Multiview (Resiboost) (Pretrain)',
        'csv_path': 'dasatinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [False],
        },
    },

])

print_markdown_table(result)

| model_key                           | mcc           | micro_auprc   | sensitivity   | precision     |   record_count |
|:------------------------------------|:--------------|:--------------|:--------------|:--------------|---------------:|
| ESM (No Pretrain)                   | 0.536 ± 0.085 | 0.582 ± 0.078 | 0.475 ± 0.105 | 0.675 ± 0.129 |             10 |
| ESM (Pretrain)                      | 0.562 ± 0.066 | 0.668 ± 0.060 | 0.580 ± 0.065 | 0.602 ± 0.099 |             10 |
| Multiview (No Pretrain)             | 0.557 ± 0.071 | 0.627 ± 0.073 | 0.480 ± 0.088 | 0.710 ± 0.111 |             10 |
| Multiview (Pretrain)                | 0.593 ± 0.071 | 0.661 ± 0.082 | 0.522 ± 0.083 | 0.731 ± 0.116 |             10 |
| Multiview (Adaboost) (No Pretrain)  | 0.568 ± 0.074 | 0.695 ± 0.046 | 0.429 ± 0.133 | 0.827 ± 0.097 |             10 |
| Multiview (Adaboost) (Pretrain)     | 0.559 ± 0.074 | 0.705 ± 0.050 | 0.441 ± 0.141 | 0.789 ± 0.116 |             10 |
| Multiview (Resiboost) (No Pret

  result = result.sort_values('order').drop('order', axis=1)


### Bosutinib


In [36]:
analyze_metrics('bosutinib_esm-t33-pretrained_stats.csv',
                ['model_kwargs.freeze_layer_count', 'base_lr',
                    'max_lr', 'cycle_size', 'pretrained_weight_path'],
                {
                    'pretrained_weight_path': ['empty'],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,model_kwargs.freeze_layer_count,base_lr,max_lr,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 14_level_1
3,30,0.0003,0.003,10,empty,0.7632,0.0283,0.846,0.0234,0.6867,0.0652,0.8839,0.0355,10
2,30,0.0003,0.003,6,empty,0.747,0.0495,0.8582,0.0247,0.6467,0.1042,0.9043,0.0391,10
1,30,0.0003,0.003,4,empty,0.7264,0.0699,0.847,0.0424,0.67,0.1521,0.8483,0.1218,10
0,30,0.0003,0.003,3,empty,0.6868,0.0905,0.7786,0.0514,0.5917,0.1408,0.8484,0.0171,10


In [37]:
analyze_metrics('bosutinib_esm-t33-pretrained_stats.csv',
                [
                    # 'model_kwargs.freeze_layer_count',
                    # 'base_lr',
                    # 'max_lr',
                    'cycle_size',
                    'pretrained_weight_path'],
                {
                    'pretrained_weight_path': [
                        'weight/atpbind3d-1930_esm-t33-lr3e-4_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr1e-4_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr3e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr1e-5_0.pt',
                        'weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt',
                    ],
                    'base_lr': [3e-4],
                    'max_lr': [3e-3],
                    'cycle_size': [10],
                    'model_kwargs.freeze_layer_count': [30],
                },
                limit=10)

Unnamed: 0_level_0,cycle_size,pretrained_weight_path,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
4,10,weight/atpbind3d-1930_esm-t33-lr3e-6_0.pt,0.7839,0.033,0.8448,0.0287,0.7167,0.0527,0.8883,0.01,5
0,10,weight/atpbind3d-1930_esm-t33-lr1e-4_0.pt,0.7762,0.0224,0.8612,0.0212,0.74,0.0947,0.8519,0.0642,5
3,10,weight/atpbind3d-1930_esm-t33-lr3e-5_0.pt,0.7671,0.018,0.8589,0.0278,0.6733,0.0325,0.9065,0.02,5
2,10,weight/atpbind3d-1930_esm-t33-lr3e-4_0.pt,0.7447,0.0223,0.8359,0.0317,0.66,0.0742,0.8802,0.0562,5
1,10,weight/atpbind3d-1930_esm-t33-lr1e-5_0.pt,0.7374,0.0333,0.8539,0.0187,0.6567,0.1058,0.873,0.0657,5


In [38]:

analyze_metrics('bosutinib_esm-t33-gearnet-resiboost_stats.csv',
                ['boost_negative_use_ratio',
                    'boost_mask_positive'],
                {
                    'boost_mask_positive': [True],  # Adaboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
1,0.9,True,0.7402,0.0311,0.8108,0.0388,0.6967,0.0642,0.831,0.101,10
0,0.5,True,0.7253,0.0545,0.8099,0.0312,0.7133,0.101,0.7924,0.146,5


In [39]:

analyze_metrics('bosutinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
                ['boost_negative_use_ratio',
                    'boost_mask_positive'],
                {
                    'boost_mask_positive': [True],  # Adaboost
                },
                limit=10)

Unnamed: 0_level_0,boost_negative_use_ratio,boost_mask_positive,mcc,mcc,micro_auprc,micro_auprc,sensitivity,sensitivity,precision,precision,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std,Unnamed: 11_level_1
1,0.9,True,0.7856,0.0084,0.846,0.0256,0.7383,0.0629,0.8702,0.058,10
0,0.5,True,0.7641,0.0178,0.8312,0.0218,0.7467,0.0628,0.8216,0.092,5


In [40]:
result = pick_and_aggregate([
    {
        'label': 'ESM (No Pretrain)',
        'csv_path': 'bosutinib_esm-t33-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['empty'],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'model_kwargs.freeze_layer_count': [30],
            'cycle_size': [10],
        },
    },
    {
        'label': 'ESM (Pretrain)',
        'csv_path': 'bosutinib_esm-t33-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': [
                'weight/atpbind3d-1930_esm-t33-lr3e-5_0.pt',
            ],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [10],
            'model_kwargs.freeze_layer_count': [30],
        },
    },
    {
        'label': 'Multiview (No Pretrain)',
        'csv_path': 'bosutinib_esm-t33-gearnet-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['empty'],
            'model_kwargs.lm_freeze_layer_count': [30],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [10],
        },
    },
    {
        'label': 'Multiview (Pretrain)',
        'csv_path': 'bosutinib_esm-t33-gearnet-pretrained_stats.csv',
        'filter_conditions': {
            'pretrained_weight_path': ['weight/atpbind3d-1930_esm-t33-gearnet-lr3e-5_0.pt'],
            'model_kwargs.lm_freeze_layer_count': [30.0],
            'base_lr': [3e-4],
            'max_lr': [3e-3],
            'cycle_size': [10],
        },
    },
    {
        'label': 'Multiview (Adaboost) (No Pretrain)',
        'csv_path': 'bosutinib_esm-t33-gearnet-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [True],
        },
    },
    {
        'label': 'Multiview (Adaboost) (Pretrain)',
        'csv_path': 'bosutinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [True],
        },
    },
    {
        'label': 'Multiview (Resiboost) (No Pretrain)',
        'csv_path': 'bosutinib_esm-t33-gearnet-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [False],
        },
    },
    {
        'label': 'Multiview (Resiboost) (Pretrain)',
        'csv_path': 'bosutinib_esm-t33-gearnet-pretrained-resiboost_stats.csv',
        'filter_conditions': {
            'boost_negative_use_ratio': [0.9],
            'boost_mask_positive': [False],
        },
    },

])

print_markdown_table(result)

| model_key                           | mcc           | micro_auprc   | sensitivity   | precision     |   record_count |
|:------------------------------------|:--------------|:--------------|:--------------|:--------------|---------------:|
| ESM (No Pretrain)                   | 0.763 ± 0.028 | 0.846 ± 0.023 | 0.687 ± 0.065 | 0.884 ± 0.035 |             10 |
| ESM (Pretrain)                      | 0.767 ± 0.018 | 0.859 ± 0.028 | 0.673 ± 0.032 | 0.906 ± 0.020 |              5 |
| Multiview (No Pretrain)             | 0.704 ± 0.076 | 0.783 ± 0.080 | 0.652 ± 0.101 | 0.811 ± 0.103 |             10 |
| Multiview (Pretrain)                | 0.777 ± 0.035 | 0.828 ± 0.033 | 0.727 ± 0.066 | 0.864 ± 0.048 |             10 |
| Multiview (Adaboost) (No Pretrain)  | 0.740 ± 0.031 | 0.811 ± 0.039 | 0.697 ± 0.064 | 0.831 ± 0.101 |             10 |
| Multiview (Adaboost) (Pretrain)     | 0.786 ± 0.008 | 0.846 ± 0.026 | 0.738 ± 0.063 | 0.870 ± 0.058 |             10 |
| Multiview (Resiboost) (No Pret

  result = result.sort_values('order').drop('order', axis=1)
