This notebook is used to calculate mean and standard deviation of performance metrics. <br>

In [1]:
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Calculate mean and standard deviation for the performance metrics

-  Note: use Performance_Metrics_Calculation.ipynb to generate grand performance dataframe

## Benchmark datasets

In [3]:
#specify the dataset details
folder = 'benchmark'
task_setting = 'benchmark'
mol_props = ['BACE', 'BBBP', 'HIV', 'ESOL', 'FreeSolv',  'Lipop'] 
split_types = ['scaffold', 'random']
num_folds = 30

#specify model names
model_names = ['RF', 'molbert', 'grover_base', 'grover_base_rdkit']

In [4]:
#make an empty dataframe to attach the results
agg_perf_df = pd.DataFrame(columns=['mean_metric_score', 'std_metric_score', 'split_type', 'mol_prop', 'model_name'])

In [5]:
#read the grand perf df
grand_perf_df = pd.read_csv('../results/processed_performance/{folder}_grand_perf_df_{task}.csv'.format(folder=folder, task=task_setting))

for split_type in split_types:
    for model_name in model_names:
        perf_df = grand_perf_df.loc[(grand_perf_df['split_type']==split_type) & (grand_perf_df['model_name']==model_name)]        

        #calculate the mean and std of each metric score
        for mol_prop in mol_props:
            #get tmp1 by mol_prop
            tmp_1 = perf_df[perf_df['mol_prop'] == mol_prop]
            
            #get the metric_name
            if mol_prop in ['BACE', 'BBBP', 'HIV']:
                #cls
                metric_names = ["AUROC", "AUPRC", "Precision_PPV", "Precision_NPV"]
            elif mol_prop in ['ESOL', 'FreeSolv', 'Lipop']:
                #reg
                metric_names = ['RMSE', 'R2', 'Pearson_R', 'MAE']

            # loop through metric names
            for metric_name in metric_names:
                tmp_2 = tmp_1[tmp_1['metric_name'] == metric_name]

                values_to_add = {'metric_name': metric_name, 'mean_metric_score': tmp_2['metric_score'].mean(), 'std_metric_score': tmp_2['metric_score'].std(),\
                                 'split_type': split_type, 'mol_prop': mol_prop, 'model_name':model_name}
                row_to_add = pd.Series(values_to_add)

                #append new row
                agg_perf_df = agg_perf_df.append(row_to_add, ignore_index=True)

In [7]:
agg_perf_df[agg_perf_df['metric_name']=='Pearson_R'].head()

Unnamed: 0,mean_metric_score,std_metric_score,split_type,mol_prop,model_name,metric_name
14,0.758733,0.058147,scaffold,ESOL,RF,Pearson_R
18,0.602236,0.166422,scaffold,FreeSolv,RF,Pearson_R
22,0.655029,0.044809,scaffold,Lipop,RF,Pearson_R
38,0.763882,0.068974,scaffold,ESOL,molbert,Pearson_R
42,0.275915,0.145668,scaffold,FreeSolv,molbert,Pearson_R


In [8]:
agg_perf_df.to_csv('../results/processed_performance/{folder}_agg_perf_df_{task}.csv'.format(task=task_setting, folder=folder), index=False)

## Opioids datasets

In [18]:
#specify the dataset details
folder = 'opioids'
task_setting = 'reg' # cutoff6, reg
mol_props = ['MDR1', 'CYP3A4', 'CYP2D6', 'MOR', 'DOR', 'KOR'] 
split_types = ['scaffold', 'random']
num_folds = 30

#specify model names
model_names = ['RF', 'molbert', 'grover_base', 'grover_base_rdkit']

-  overall

In [19]:
#make an empty dataframe to attach the results
agg_perf_df = pd.DataFrame(columns=['mean_metric_score', 'std_metric_score', 'split_type', 'mol_prop', 'model_name'])

In [20]:
#read the grand perf df
grand_perf_df = pd.read_csv('../results/processed_performance/{folder}_grand_perf_df_{task}.csv'.format(folder=folder, task=task_setting))

for split_type in split_types:
    for model_name in model_names:
        perf_df = grand_perf_df.loc[(grand_perf_df['split_type']==split_type) & (grand_perf_df['model_name']==model_name)]        

        #calculate the mean and std of each metric score
        for mol_prop in mol_props:
            #get tmp1 by mol_prop
            tmp_1 = perf_df[perf_df['mol_prop'] == mol_prop]
            
            # get metric names
            if task_setting == 'cutoff6':
                metric_names =  ['AUROC', 'AUPRC', 'Precision_PPV', 'Precision_NPV']
            elif task_setting == 'reg':
                metric_names = ['RMSE', 'MAE', 'R2', 'Pearson_R']

            #get the metric_name
            for metric_name in metric_names:
                tmp_2 = tmp_1[tmp_1['metric_name'] == metric_name]

                values_to_add = {'metric_name': metric_name, 'mean_metric_score': tmp_2['metric_score'].mean(), 'std_metric_score': tmp_2['metric_score'].std(),\
                                 'split_type': split_type, 'mol_prop': mol_prop, 'model_name':model_name}
                row_to_add = pd.Series(values_to_add)

                #append new row
                agg_perf_df = agg_perf_df.append(row_to_add, ignore_index=True)

In [21]:
agg_perf_df.head()

Unnamed: 0,mean_metric_score,std_metric_score,split_type,mol_prop,model_name,metric_name
0,1.817701,0.322271,scaffold,MDR1,RF,RMSE
1,1.319866,0.212408,scaffold,MDR1,RF,MAE
2,0.684338,0.159458,scaffold,MDR1,RF,R2
3,0.827725,0.104071,scaffold,MDR1,RF,Pearson_R
4,1.594176,0.213225,scaffold,CYP3A4,RF,RMSE


In [22]:
agg_perf_df.to_csv('../results/processed_performance/{folder}_agg_perf_df_{task}.csv'.format(task=task_setting, folder=folder), index=False)

-  Ac molecules and non-AC molecules respectively

In [23]:
#make an empty dataframe to attach the results
agg_perf_df = pd.DataFrame(columns=['mean_metric_score', 'std_metric_score', 'split_type', 'mol_prop', 'model_name'])

In [24]:
AC_labels = ['AC', 'non-AC']

for AC_label in AC_labels:

    #read the grand perf df
    grand_perf_df = pd.read_csv('../results/processed_performance/AC_{folder}_grand_perf_df_{task}.csv'.format(folder=folder, task=task_setting))
    #get the AC label portion
    grand_perf_df = grand_perf_df[grand_perf_df['AC_label'] == AC_label]

    for split_type in split_types:
        for model_name in model_names:
            perf_df = grand_perf_df.loc[(grand_perf_df['split_type']==split_type) & (grand_perf_df['model_name']==model_name)]        

            #calculate the mean and std of each metric score
            for mol_prop in mol_props:
                #get tmp1 by mol_prop
                tmp_1 = perf_df[perf_df['mol_prop'] == mol_prop]
                # get metric names
                if task_setting == 'cutoff6':
                    metric_names =  ['AUROC', 'AUPRC', 'Precision_PPV', 'Precision_NPV']
                elif task_setting == 'reg':
                    metric_names = ['RMSE', 'MAE', 'R2', 'Pearson_R']

                #get the metric_name
                for metric_name in metric_names:
                    tmp_2 = tmp_1[tmp_1['metric_name'] == metric_name]

                    values_to_add = {'metric_name': metric_name, 'mean_metric_score': tmp_2['metric_score'].mean(), 'std_metric_score': tmp_2['metric_score'].std(),\
                     'split_type': split_type, 'mol_prop': mol_prop, 'model_name':model_name, 'AC_label':AC_label}
                    row_to_add = pd.Series(values_to_add)

                    #append new row
                    agg_perf_df = agg_perf_df.append(row_to_add, ignore_index=True)

In [27]:
agg_perf_df.head()

Unnamed: 0,mean_metric_score,std_metric_score,split_type,mol_prop,model_name,AC_label,metric_name
0,1.928461,0.540381,scaffold,MDR1,RF,AC,RMSE
1,1.379168,0.308204,scaffold,MDR1,RF,AC,MAE
2,0.583105,0.290243,scaffold,MDR1,RF,AC,R2
3,0.780169,0.189152,scaffold,MDR1,RF,AC,Pearson_R
4,1.890989,0.423392,scaffold,CYP3A4,RF,AC,RMSE


In [26]:
agg_perf_df.to_csv('../results/processed_performance/AC_{folder}_agg_perf_df_{task}.csv'\
                   .format(task=task_setting, folder=folder), index=False)