In [1]:
from utils import * 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import itertools
from pprint import pprint
import joblib
import statistics

#regression matrics
from sklearn.metrics import mean_absolute_error , mean_squared_error, r2_score

#classification metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, f1_score, matthews_corrcoef


## Test Set Results

This notebook evalute all the models (60 base models and 12 hierarchical models) on test set.

## Test lables

In [50]:
test_labels = pd.read_csv('../data/train_test_sets/test_labels.csv', index_col = 'CASRN')

In [19]:
test_labels.head(1)

Unnamed: 0_level_0,SMILES,logLD50_mmolkg,verytoxic,toxic,EPA_category,GHS_category
CASRN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
130209-82-4,CC(C)OC(=O)CCCC=CCC1C(O)CC(O)C1CCC(O)CCc1ccccc1,,1.0,1.0,1.0,1.0


## Functions

In [56]:
def report_reg_scores(labels, predictions):
    '''
    two dataframe: label, and predictions
    
    report four scores: RMSE, R2, MAE, MSE
    '''
    
    # get the labeled data
    labeled = labels[~labels['logLD50_mmolkg'].isnull()]
    
    labeled_preds = predictions.loc[labeled.index].values.astype('float32')
    labeled_Y = labeled['logLD50_mmolkg'].values
    
    score_rmse = rmse(labeled_Y, labeled_preds)
    score_r2 = r2_score(labeled_Y, labeled_preds)
    score_mae = mean_absolute_error(labeled_Y, labeled_preds)
    score_mse = mean_squared_error(labeled_Y, labeled_preds)

    print('RMSE:', score_rmse)
    print('R2:', score_r2) 
    print('MAE:', score_mae)
    print('MSE:', score_mse)
    
    return [score_rmse, score_r2, score_mae, score_mse]

def prob_to_pred(probs):
    classes = probs.argmax(axis=-1)
    return classes

def report_clf_scores(labels, predictions, target, encoder):
    '''
    two dataframe: label, and predictions
    target: ['toxic', 'EPA_category']
    encoder: label encoder
    
    report four scores: Accuracy, Balance Accuracy, MCC, f1_weight, AUROC (Only for binary model)
    '''
    
    # get the labeled data
    labeled = labels[~labels[target].isnull()]
    
    labeled_probs = predictions.loc[labeled.index].values.astype('float32')
    # predicted probabilities to predicted class
    labeled_preds = prob_to_pred(labeled_probs)
    
    # label encoding
    labeled_Y = encoder.transform(labeled[target].values)
    
    accuracy = accuracy_score(labeled_Y, labeled_preds)
    balance_acc = balanced_accuracy_score(labeled_Y, labeled_preds)
    f1= f1_score(labeled_Y, labeled_preds, average='weighted')
    mcc = matthews_corrcoef(labeled_Y, labeled_preds)
    
    score_list = [accuracy, balance_acc, f1, mcc]
    
    print('Accuracy:', accuracy)
    print('Balance Accuracy:', balance_acc) 
    print('F1_score:', f1)
    print('MCC:', mcc)
    
    if target == 'toxic':
        auroc = roc_auc_score(labeled_Y, labeled_preds)
        print('AUROC:', auroc)
        score_list.append(auroc)
        
    return score_list

## Base Models

In [204]:
encoder_epa = joblib.load('../data/label_encoders/encoder_epa.joblib')
encoder_toxic = joblib.load('../data/label_encoders/encoder_toxic.joblib')

endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['ecfp6bits', 'ecfp6counts', 'maccs', 'rdkit2d', 'mordred']
algorithms = ['knn', 'svm', 'RF', 'xgboost']

reg_cols = ['name','RMSE', 'R2', 'MAE', 'MSE']
binary_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC', 'AUROC']
multiclass_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC']

base_reg_scores = pd.DataFrame(columns = reg_cols)
base_toxic_scores = pd.DataFrame(columns = binary_cols)
base_epa_scores = pd.DataFrame(columns = multiclass_cols)

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            print(f'Eavluating {name}')
            df_preds = pd.read_csv(f'../data/Hmodel_features_test/{name}.csv',index_col = 'CASRN')
            if e == 'LD50':
                scores = report_reg_scores(test_labels, df_preds)
                base_reg_scores = base_reg_scores.append(pd.Series([name] +scores,index = reg_cols), ignore_index=True)
            if e == 'EPA':
                scores = report_clf_scores(test_labels, df_preds, target = 'EPA_category', encoder= encoder_epa)
                base_epa_scores = base_epa_scores.append(pd.Series([name] +scores,index = multiclass_cols), ignore_index=True)
            if e == 'Toxic':
                scores = report_clf_scores(test_labels, df_preds, target = 'toxic', encoder= encoder_toxic)
                base_toxic_scores = base_toxic_scores.append(pd.Series([name] +scores,index = binary_cols), ignore_index=True)
            print('\n')

Eavluating Toxic_knn_ecfp6bits
Accuracy: 0.7475386779184248
Balance Accuracy: 0.7353211944779461
F1_score: 0.7454861183892512
MCC: 0.47837192244535176
AUROC: 0.7353211944779461


Eavluating Toxic_svm_ecfp6bits
Accuracy: 0.7338255977496484
Balance Accuracy: 0.7085017200113977
F1_score: 0.7240685776666368
MCC: 0.4493373710477679
AUROC: 0.7085017200113977


Eavluating Toxic_RF_ecfp6bits
Accuracy: 0.7630098452883263
Balance Accuracy: 0.7448431289884552
F1_score: 0.7582520076151013
MCC: 0.5099046034668507
AUROC: 0.7448431289884552


Eavluating Toxic_xgboost_ecfp6bits
Accuracy: 0.7461322081575246
Balance Accuracy: 0.7334465667843708
F1_score: 0.7438910103005255
MCC: 0.4752274905819658
AUROC: 0.7334465667843707


Eavluating Toxic_knn_ecfp6counts
Accuracy: 0.7556258790436006
Balance Accuracy: 0.7434343746188367
F1_score: 0.7535756236226676
MCC: 0.49512307916289494
AUROC: 0.7434343746188367


Eavluating Toxic_svm_ecfp6counts
Accuracy: 0.7566807313642757
Balance Accuracy: 0.7450035655378242
F1_s

In [206]:
base_reg_scores

Unnamed: 0,name,RMSE,R2,MAE,MSE
0,LD50_knn_ecfp6bits,0.616647,0.528434,0.44494,0.380253
1,LD50_svm_ecfp6bits,0.627293,0.512011,0.458884,0.393496
2,LD50_RF_ecfp6bits,0.603986,0.547599,0.444927,0.364799
3,LD50_xgboost_ecfp6bits,0.59028,0.567898,0.43709,0.348431
4,LD50_knn_ecfp6counts,0.62006,0.523198,0.449202,0.384475
5,LD50_svm_ecfp6counts,0.598179,0.556257,0.440762,0.357818
6,LD50_RF_ecfp6counts,0.600334,0.553053,0.439981,0.360401
7,LD50_xgboost_ecfp6counts,0.588179,0.570968,0.430376,0.345955
8,LD50_knn_maccs,0.598698,0.555486,0.427933,0.358439
9,LD50_svm_maccs,0.575639,0.589069,0.418768,0.33136


In [207]:
base_toxic_scores

Unnamed: 0,name,Accuracy,Balance Accuracy,F1_score,MCC,AUROC
0,Toxic_knn_ecfp6bits,0.747539,0.735321,0.745486,0.478372,0.735321
1,Toxic_svm_ecfp6bits,0.733826,0.708502,0.724069,0.449337,0.708502
2,Toxic_RF_ecfp6bits,0.76301,0.744843,0.758252,0.509905,0.744843
3,Toxic_xgboost_ecfp6bits,0.746132,0.733447,0.743891,0.475227,0.733447
4,Toxic_knn_ecfp6counts,0.755626,0.743434,0.753576,0.495123,0.743434
5,Toxic_svm_ecfp6counts,0.756681,0.745004,0.754827,0.497543,0.745004
6,Toxic_RF_ecfp6counts,0.756329,0.734905,0.749642,0.49665,0.734905
7,Toxic_xgboost_ecfp6counts,0.766174,0.752706,0.763617,0.516623,0.752706
8,Toxic_knn_maccs,0.767581,0.754254,0.765083,0.51959,0.754254
9,Toxic_svm_maccs,0.779536,0.767142,0.777412,0.544705,0.767142


In [208]:
base_epa_scores

Unnamed: 0,name,Accuracy,Balance Accuracy,F1_score,MCC
0,EPA_knn_ecfp6bits,0.625089,0.555062,0.614394,0.406748
1,EPA_svm_ecfp6bits,0.623671,0.521866,0.59764,0.393756
2,EPA_RF_ecfp6bits,0.633239,0.516153,0.601637,0.411027
3,EPA_xgboost_ecfp6bits,0.631821,0.550116,0.617751,0.412339
4,EPA_knn_ecfp6counts,0.636428,0.571574,0.625627,0.426126
5,EPA_svm_ecfp6counts,0.633948,0.544923,0.613933,0.413016
6,EPA_RF_ecfp6counts,0.641035,0.524351,0.609924,0.425793
7,EPA_xgboost_ecfp6counts,0.639972,0.555431,0.624653,0.425785
8,EPA_knn_maccs,0.626506,0.562859,0.61578,0.410583
9,EPA_svm_maccs,0.631467,0.541665,0.61363,0.411214


In [210]:
# save the results
base_reg_scores.to_csv('../results/model_evaluation/base_reg_scores.csv', index =False)
base_toxic_scores.to_csv('../results/model_evaluation/base_toxic_scores.csv', index =False)
base_epa_scores.to_csv('../results/model_evaluation/base_epa_scores.csv', index =False)

## Hierarchial Models

Get the predictions on test set

In [200]:
encoder_epa = joblib.load('../data/label_encoders/encoder_epa.joblib')
encoder_toxic = joblib.load('../data/label_encoders/encoder_toxic.joblib')

result_path = '../results/Hierarchical_testset_preds/'

endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['Hmodel']
algorithms = ['knn', 'SVM', 'RF', 'xgboost']

reg_cols = ['name','RMSE', 'R2', 'MAE', 'MSE']
binary_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC', 'AUROC']
multiclass_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC']

H_reg_scores = pd.DataFrame(columns = reg_cols)
H_toxic_scores = pd.DataFrame(columns = binary_cols)
H_epa_scores = pd.DataFrame(columns = multiclass_cols)

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            print(f'Eavluating {name}')
            df_preds = pd.read_csv(f'{result_path}{name}.csv',index_col = 'CASRN')
            if e == 'LD50':
                scores = report_reg_scores(test_labels, df_preds)
                H_reg_scores = H_reg_scores.append(pd.Series([name] +scores,index = reg_cols), ignore_index=True)
            if e == 'EPA':
                scores = report_clf_scores(test_labels, df_preds, target = 'EPA_category', encoder= encoder_epa)
                H_epa_scores = H_epa_scores.append(pd.Series([name] +scores,index = multiclass_cols), ignore_index=True)
            if e == 'Toxic':
                scores = report_clf_scores(test_labels, df_preds, target = 'toxic', encoder= encoder_toxic)
                H_toxic_scores = H_toxic_scores.append(pd.Series([name] +scores,index = binary_cols), ignore_index=True)
            print('\n')

Eavluating Toxic_knn_Hmodel
Accuracy: 0.7988748241912799
Balance Accuracy: 0.7902522396588016
F1_score: 0.7979471620714984
MCC: 0.5858879795561192
AUROC: 0.7902522396588016


Eavluating Toxic_SVM_Hmodel
Accuracy: 0.7964135021097046
Balance Accuracy: 0.7876789285065382
F1_score: 0.7954594267257374
MCC: 0.5807797165332975
AUROC: 0.7876789285065382


Eavluating Toxic_RF_Hmodel
Accuracy: 0.80098452883263
Balance Accuracy: 0.7928465543340897
F1_score: 0.8001824084220864
MCC: 0.5904359829676826
AUROC: 0.7928465543340897


Eavluating Toxic_xgboost_Hmodel
Accuracy: 0.80098452883263
Balance Accuracy: 0.7932818080642394
F1_score: 0.8002947289738286
MCC: 0.5906455748791202
AUROC: 0.7932818080642394


Eavluating EPA_knn_Hmodel
Accuracy: 0.6789510985116939
Balance Accuracy: 0.6127815184840104
F1_score: 0.668278172734896
MCC: 0.4931024134101801


Eavluating EPA_SVM_Hmodel
Accuracy: 0.6785967399007796
Balance Accuracy: 0.6113076886369114
F1_score: 0.6681230014963366
MCC: 0.49259010239255113


Eavluat

In [201]:
H_reg_scores

Unnamed: 0,name,RMSE,R2,MAE,MSE
0,LD50_knn_Hmodel,0.533819,0.646607,0.381479,0.284963
1,LD50_SVM_Hmodel,0.529118,0.652804,0.376222,0.279966
2,LD50_RF_Hmodel,0.529594,0.652179,0.376696,0.28047
3,LD50_xgboost_Hmodel,0.531629,0.6495,0.377259,0.28263


In [202]:
H_toxic_scores

Unnamed: 0,name,Accuracy,Balance Accuracy,F1_score,MCC,AUROC
0,Toxic_knn_Hmodel,0.798875,0.790252,0.797947,0.585888,0.790252
1,Toxic_SVM_Hmodel,0.796414,0.787679,0.795459,0.58078,0.787679
2,Toxic_RF_Hmodel,0.800985,0.792847,0.800182,0.590436,0.792847
3,Toxic_xgboost_Hmodel,0.800985,0.793282,0.800295,0.590646,0.793282


In [203]:
H_epa_scores

Unnamed: 0,name,Accuracy,Balance Accuracy,F1_score,MCC
0,EPA_knn_Hmodel,0.678951,0.612782,0.668278,0.493102
1,EPA_SVM_Hmodel,0.678597,0.611308,0.668123,0.49259
2,EPA_RF_Hmodel,0.677888,0.615845,0.66761,0.492841
3,EPA_xgboost_Hmodel,0.671155,0.61506,0.662894,0.483329


In [211]:
# save the results
H_reg_scores.to_csv('../results/model_evaluation/H_reg_scores.csv', index =False)
H_toxic_scores.to_csv('../results/model_evaluation/H_toxic_scores.csv', index =False)
H_epa_scores.to_csv('../results/model_evaluation/H_epa_scores.csv', index =False)

In [221]:
reg_scores = pd.concat([H_reg_scores, base_reg_scores])
toxic_scores = pd.concat([H_toxic_scores, base_toxic_scores])
epa_scores = pd.concat([H_epa_scores, base_epa_scores])

In [224]:
# save the results
reg_scores.to_csv('../results/model_evaluation/reg_scores.csv', index =False)
toxic_scores.to_csv('../results/model_evaluation/toxic_scores.csv', index =False)
epa_scores.to_csv('../results/model_evaluation/epa_scores.csv', index =False)

## Averge predictions

Average predictions of hierarchial models

In [108]:
result_path = '../results/Hierarchical_testset_preds/'

endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['Hmodel']
algorithms = ['knn', 'SVM', 'RF', 'xgboost']

reg_array_sum = np.zeros((2849, 1)) #shape of predictions (number of samples, predictions)
binary_array_sum = np.zeros((2849, 2))
multiclass_array_sum = np.zeros((2849, 4))

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            print(f'Eavluating {name}')
            df_preds = pd.read_csv(f'{result_path}{name}.csv',index_col = 'CASRN')
            
            if e == 'LD50':
                reg_array_sum += df_preds.values
            if e == 'EPA':
                multiclass_array_sum += df_preds.values
            if e == 'Toxic':                
                binary_array_sum += df_preds.values

reg_array_avg = reg_array_sum/4 # len(descriptors) * len(algorithms)
binary_array_avg = binary_array_sum/4 # len(descriptors) * len(algorithms)
multiclass_array_avg = multiclass_array_sum/4 # len(descriptors) * len(algorithms)

Eavluating Toxic_knn_Hmodel
Eavluating Toxic_SVM_Hmodel
Eavluating Toxic_RF_Hmodel
Eavluating Toxic_xgboost_Hmodel
Eavluating EPA_knn_Hmodel
Eavluating EPA_SVM_Hmodel
Eavluating EPA_RF_Hmodel
Eavluating EPA_xgboost_Hmodel
Eavluating LD50_knn_Hmodel
Eavluating LD50_SVM_Hmodel
Eavluating LD50_RF_Hmodel
Eavluating LD50_xgboost_Hmodel


In [109]:
Hmodel_reg_avgp = pd.DataFrame(reg_array_avg, index = test_labels.index, columns= ['LD50_avg'])
Hmodel_toxic_avgp = pd.DataFrame(binary_array_avg, index = test_labels.index, columns= ['Toxic_avg-0', 'Toxic_avg-1'])
Hmodel_epa_avgp = pd.DataFrame(multiclass_array_avg, index = test_labels.index, 
                               columns= ['EPA_avg-1', 'EPA_avg-2', 'EPA_avg-3','EPA_avg-4'])

In [110]:
Hmodel_reg_avgp.to_csv('../results/avg_predictions/Hmodel_reg_avgp.csv')
Hmodel_toxic_avgp.to_csv('../results/avg_predictions/Hmodel_toxic_avgp.csv')
Hmodel_epa_avgp.to_csv('../results/avg_predictions/Hmodel_epa_avgp.csv')

In [111]:
Hmodel_avgp = pd.concat([Hmodel_reg_avgp, Hmodel_toxic_avgp, Hmodel_epa_avgp],axis=1)

In [112]:
Hmodel_avgp.to_csv('../results/avg_predictions/Hmodel_avgp.csv')

In [113]:
list(Hmodel_avgp)

['LD50_avg',
 'Toxic_avg-0',
 'Toxic_avg-1',
 'EPA_avg-1',
 'EPA_avg-2',
 'EPA_avg-3',
 'EPA_avg-4']

Average predictions of base models

In [114]:
result_path = '../data/Hmodel_features_test/'

endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['ecfp6bits', 'ecfp6counts', 'maccs', 'rdkit2d', 'mordred']
algorithms = ['knn', 'svm', 'RF', 'xgboost']

reg_array_sum = np.zeros((2849, 1)) #shape of predictions (number of samples, predictions)
binary_array_sum = np.zeros((2849, 2))
multiclass_array_sum = np.zeros((2849, 4))

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
            print(f'Eavluating {name}')
            df_preds = pd.read_csv(f'{result_path}{name}.csv',index_col = 'CASRN')
            
            if e == 'LD50':
                reg_array_sum += df_preds.values
            if e == 'EPA':
                multiclass_array_sum += df_preds.values
            if e == 'Toxic':                
                binary_array_sum += df_preds.values

reg_array_avg = reg_array_sum/20 # len(descriptors) * len(algorithms)
binary_array_avg = binary_array_sum/20 # len(descriptors) * len(algorithms)
multiclass_array_avg = multiclass_array_sum/20 # len(descriptors) * len(algorithms)

Eavluating Toxic_knn_ecfp6bits
Eavluating Toxic_svm_ecfp6bits
Eavluating Toxic_RF_ecfp6bits
Eavluating Toxic_xgboost_ecfp6bits
Eavluating Toxic_knn_ecfp6counts
Eavluating Toxic_svm_ecfp6counts
Eavluating Toxic_RF_ecfp6counts
Eavluating Toxic_xgboost_ecfp6counts
Eavluating Toxic_knn_maccs
Eavluating Toxic_svm_maccs
Eavluating Toxic_RF_maccs
Eavluating Toxic_xgboost_maccs
Eavluating Toxic_knn_rdkit2d
Eavluating Toxic_svm_rdkit2d
Eavluating Toxic_RF_rdkit2d
Eavluating Toxic_xgboost_rdkit2d
Eavluating Toxic_knn_mordred
Eavluating Toxic_svm_mordred
Eavluating Toxic_RF_mordred
Eavluating Toxic_xgboost_mordred
Eavluating EPA_knn_ecfp6bits
Eavluating EPA_svm_ecfp6bits
Eavluating EPA_RF_ecfp6bits
Eavluating EPA_xgboost_ecfp6bits
Eavluating EPA_knn_ecfp6counts
Eavluating EPA_svm_ecfp6counts
Eavluating EPA_RF_ecfp6counts
Eavluating EPA_xgboost_ecfp6counts
Eavluating EPA_knn_maccs
Eavluating EPA_svm_maccs
Eavluating EPA_RF_maccs
Eavluating EPA_xgboost_maccs
Eavluating EPA_knn_rdkit2d
Eavluating EP

In [115]:
Bmodel_reg_avgp = pd.DataFrame(reg_array_avg, index = test_labels.index, columns= ['LD50_avg'])
Bmodel_toxic_avgp = pd.DataFrame(binary_array_avg, index = test_labels.index, columns= ['Toxic_avg-0', 'Toxic_avg-1'])
Bmodel_epa_avgp = pd.DataFrame(multiclass_array_avg, index = test_labels.index, 
                               columns= ['EPA_avg-1', 'EPA_avg-2', 'EPA_avg-3','EPA_avg-4'])

In [116]:
Bmodel_reg_avgp.to_csv('../results/avg_predictions/Bmodel_reg_avgp.csv')
Bmodel_toxic_avgp.to_csv('../results/avg_predictions/Bmodel_toxic_avgp.csv')
Bmodel_epa_avgp.to_csv('../results/avg_predictions/Bmodel_epa_avgp.csv')

In [117]:
Bmodel_avgp = pd.concat([Bmodel_reg_avgp, Bmodel_toxic_avgp, Bmodel_epa_avgp],axis=1)
Bmodel_avgp.to_csv('../results/avg_predictions/Bmodel_avgp.csv')

In [118]:
Bmodel_avgp.head(1)

Unnamed: 0_level_0,LD50_avg,Toxic_avg-0,Toxic_avg-1,EPA_avg-1,EPA_avg-2,EPA_avg-3,EPA_avg-4
CASRN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
130209-82-4,0.424765,0.581789,0.418211,0.059674,0.169181,0.333094,0.43805


### Ealuation of the avergage predictons
and merge with other results

In [87]:
encoder_epa = joblib.load('../data/label_encoders/encoder_epa.joblib')
encoder_toxic = joblib.load('../data/label_encoders/encoder_toxic.joblib')



In [94]:
reg_scores = pd.read_csv('../results/model_evaluation/reg_scores.csv')
toxic_scores = pd.read_csv('../results/model_evaluation/toxic_scores.csv')
epa_scores = pd.read_csv('../results/model_evaluation/epa_scores.csv')

In [96]:
reg_cols = ['name','RMSE', 'R2', 'MAE', 'MSE']
binary_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC', 'AUROC']
multiclass_cols = ['name','Accuracy', 'Balance Accuracy', 'F1_score', 'MCC']


Base model

In [97]:
scores = report_reg_scores(test_labels, Bmodel_reg_avgp)
reg_scores = reg_scores.append(pd.Series(['Bmodel_avgp'] +scores,index = reg_cols), ignore_index=True)

RMSE: 0.551102116564148
R2: 0.6233535536399771
MAE: 0.3982596802134405
MSE: 0.3037135428814838


In [99]:
scores = report_clf_scores(test_labels, Bmodel_epa_avgp, target = 'EPA_category', encoder= encoder_epa)
epa_scores = epa_scores.append(pd.Series(['Bmodel_avgp'] +scores,index = multiclass_cols), ignore_index=True)

Accuracy: 0.6697377746279235
Balance Accuracy: 0.5786784342013656
F1_score: 0.6518590880022376
MCC: 0.4757353567066563


In [100]:
scores = report_clf_scores(test_labels, Bmodel_toxic_avgp, target = 'toxic', encoder= encoder_toxic)
toxic_scores = toxic_scores.append(pd.Series(['Bmodel_avgp'] +scores,index = binary_cols), ignore_index=True)

Accuracy: 0.790436005625879
Balance Accuracy: 0.7771546451442133
F1_score: 0.7880032360089256
MCC: 0.567372332233607
AUROC: 0.7771546451442133


Hierarchical Models

In [103]:
scores = report_reg_scores(test_labels, Hmodel_reg_avgp)
reg_scores = reg_scores.append(pd.Series(['Hmodel_avgp'] +scores,index = reg_cols), ignore_index=True)

RMSE: 0.5265871621865624
R2: 0.6561173613191993
MAE: 0.3736532183201069
MSE: 0.27729403937969704


In [102]:
scores = report_clf_scores(test_labels, Hmodel_epa_avgp, target = 'EPA_category', encoder= encoder_epa)
epa_scores = epa_scores.append(pd.Series(['Hmodel_avgp'] +scores,index = multiclass_cols), ignore_index=True)

Accuracy: 0.6807228915662651
Balance Accuracy: 0.6181178350834463
F1_score: 0.6703939611153991
MCC: 0.49705978651309735


In [101]:
scores = report_clf_scores(test_labels, Hmodel_toxic_avgp, target = 'toxic', encoder= encoder_toxic)
toxic_scores = toxic_scores.append(pd.Series(['Hmodel_avgp'] +scores,index = binary_cols), ignore_index=True)

Accuracy: 0.8002812939521801
Balance Accuracy: 0.791909240487302
F1_score: 0.7994186679483828
MCC: 0.5888867186680713
AUROC: 0.791909240487302


In [106]:
# save the results
reg_scores.to_csv('../results/model_evaluation/reg_scores.csv', index =False)
toxic_scores.to_csv('../results/model_evaluation/toxic_scores.csv', index =False)
epa_scores.to_csv('../results/model_evaluation/epa_scores.csv', index =False)

## Cross-validation Resluts

In [2]:
def report_reg_scores(score, decimal = 3):
    '''
    score: cross-validation score
    
    report four scores: RMSE, R2, MAE, MSE
    '''
    
    score_rmse = round(statistics.mean(score['test_RMSE']), decimal)
    std_rmse = round(statistics.stdev(score['test_RMSE']), decimal)
    score_r2 = round(statistics.mean(score['test_R2']), decimal)
    std_r2 = round(statistics.stdev(score['test_R2']), decimal)
    score_mae = round(statistics.mean(score['test_MAE']), decimal)
    std_mae = round(statistics.stdev(score['test_MAE']), decimal)
    score_mse = round(statistics.mean(score['test_MSE']), decimal)
    std_mse = round(statistics.stdev(score['test_MSE']), decimal)
    
    return [score_rmse,std_rmse, score_r2, std_r2, score_mae,std_mae, score_mse,std_mse]

def report_clf_scores(score, decimal = 3):
    '''
    score: cross-validation score
    
    report four scores: Accuracy, Balance Accuracy, MCC, f1_weight, AUROC
    '''
    
    accuracy = round(statistics.mean(score['test_Accuracy']), decimal)
    std_accuracy = round(statistics.stdev(score['test_Accuracy']), decimal)
    balance_acc = round(statistics.mean(score['test_Balance Accuracy']), decimal)
    std_balance_acc = round(statistics.stdev(score['test_Balance Accuracy']), decimal)
    mcc = round(statistics.mean(score['test_matthews_corrcoef']), decimal)
    std_mcc = round(statistics.stdev(score['test_matthews_corrcoef']), decimal)
    f1= round(statistics.mean(score['test_f1_score']), decimal)
    std_f1 = round(statistics.stdev(score['test_f1_score']), decimal)
    auroc = round(statistics.mean(score['test_AUROC']), decimal)
    std_auroc = round(statistics.stdev(score['test_AUROC']), decimal)

    return [accuracy, std_accuracy,balance_acc, std_balance_acc, mcc, std_mcc, f1, std_f1, auroc, std_auroc]

Get the cv scores from all the Base Models

In [3]:
endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['ecfp6bits', 'ecfp6counts', 'maccs', 'rdkit2d', 'mordred']
algorithms = ['knn', 'svm', 'RF', 'xgboost']

reg_cols = ['name','RMSE', 'RMSE (std)', 'R2','R2 (std)', 'MAE', 'MAE (std)', 'MSE', 'MSE (std)']
binary_cols = ['name','Accuracy', 'Accuracy (std)', 'Balance Accuracy', 'Balance Accuracy (std)',
               'MCC', 'MCC (std)','F1_score', 'F1_score (std)', 'AUROC', 'AUROC (std)']
multiclass_cols = ['name','Accuracy', 'Accuracy (std)', 'Balance Accuracy', 'Balance Accuracy (std)',
               'MCC', 'MCC (std)','F1_score', 'F1_score (std)', 'AUROC', 'AUROC (std)']

base_reg_cv_scores = pd.DataFrame(columns = reg_cols)
base_toxic_cv_scores = pd.DataFrame(columns = binary_cols)
base_epa_cv_scores = pd.DataFrame(columns = multiclass_cols)

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
#             print(f'Eavluating {name}')
            cv_score = joblib.load(f'../results/Base_models/{name}_CVScore')
            if e == 'LD50':
                scores = report_reg_scores(cv_score)
                base_reg_cv_scores = base_reg_cv_scores.append(pd.Series([name] +scores,index = reg_cols), ignore_index=True)
            if e == 'EPA':
                scores = report_clf_scores(cv_score)
                base_epa_cv_scores = base_epa_cv_scores.append(pd.Series([name] +scores,index = multiclass_cols), ignore_index=True)
            if e == 'Toxic':
                scores = report_clf_scores(cv_score)
                base_toxic_cv_scores = base_toxic_cv_scores.append(pd.Series([name] +scores,index = binary_cols), ignore_index=True)
#             print('\n')            

In [5]:
base_reg_cv_scores.head(1)

Unnamed: 0,name,RMSE,RMSE (std),R2,R2 (std),MAE,MAE (std),MSE,MSE (std)
0,LD50_knn_ecfp6bits,0.646,0.017,0.487,0.026,0.477,0.014,0.418,0.022


Get the cv scores from all the Hierarchical Models

In [4]:
endpoints = ['Toxic', 'EPA', 'LD50']
descriptors = ['Hmodel']
algorithms = ['knn', 'SVM', 'RF', 'xgboost']

reg_cols = ['name','RMSE', 'RMSE (std)', 'R2','R2 (std)', 'MAE', 'MAE (std)', 'MSE', 'MSE (std)']
binary_cols = ['name','Accuracy', 'Accuracy (std)', 'Balance Accuracy', 'Balance Accuracy (std)',
               'MCC', 'MCC (std)','F1_score', 'F1_score (std)', 'AUROC', 'AUROC (std)']
multiclass_cols = ['name','Accuracy', 'Accuracy (std)', 'Balance Accuracy', 'Balance Accuracy (std)',
               'MCC', 'MCC (std)','F1_score', 'F1_score (std)', 'AUROC', 'AUROC (std)']

H_reg_cv_scores = pd.DataFrame(columns = reg_cols)
H_toxic_cv_scores = pd.DataFrame(columns = binary_cols)
H_epa_cv_scores = pd.DataFrame(columns = multiclass_cols)

for e in endpoints:
    for d in descriptors:
        for a in algorithms:
            name = f'{e}_{a}_{d}'
#             print(f'Eavluating {name}')
            cv_score = joblib.load(f'../results/Hierarchical_models/{name}_CVScore')
            if e == 'LD50':
                scores = report_reg_scores(cv_score)
                H_reg_cv_scores = H_reg_cv_scores.append(pd.Series([name] +scores,index = reg_cols), ignore_index=True)
            if e == 'EPA':
                scores = report_clf_scores(cv_score)
                H_epa_cv_scores = H_epa_cv_scores.append(pd.Series([name] +scores,index = multiclass_cols), ignore_index=True)
            if e == 'Toxic':
                scores = report_clf_scores(cv_score)
                H_toxic_cv_scores = H_toxic_cv_scores.append(pd.Series([name] +scores,index = binary_cols), ignore_index=True)
#             print('\n')            

In [6]:
reg_cv_scores = pd.concat([H_reg_cv_scores, base_reg_cv_scores])
toxic_cv_scores = pd.concat([H_toxic_cv_scores, base_toxic_cv_scores])
epa_cv_scores = pd.concat([H_epa_cv_scores, base_epa_cv_scores])

In [8]:
# save the results
reg_cv_scores.to_csv('../results/model_evaluation/reg_cv_scores.csv', index =False)
toxic_cv_scores.to_csv('../results/model_evaluation/toxic_cv_scores.csv', index =False)
epa_cv_scores.to_csv('../results/model_evaluation/epa_cv_scores.csv', index =False)