In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score, balanced_accuracy_score,precision_score, f1_score, roc_auc_score, roc_curve, precision_recall_curve, auc, recall_score, confusion_matrix,matthews_corrcoef
import sys 
import pickle

from imblearn.under_sampling import RandomUnderSampler
sys.path.append('/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/')
from RF_atomver import *
from VisUtils import *

In [15]:
def add_cm2(df): 
    true_labels = df['y'] 
    predictions = df['prediction']
    cm = confusion_matrix(true_labels, predictions )
    cm_flattened = cm.flatten().tolist()
    df['cm'] = [cm_flattened]* len(df)
    df['prediction_type'] = df.apply(lambda x: prediction_type(x['y'], x['prediction']), axis=1)
    
    df['f1'] = f1_score(df['y'], df['prediction'])
    df['ROC-AUC'] = roc_auc_score(df['y'], df['prediction'])
    # df['ROC-AUC'] ='NA'
    df['MCC'] = matthews_corrcoef(df['y'], df['prediction'])
    df['Balanced Accuracy'] = balanced_accuracy_score(df['y'], df['prediction'])
    return df


In [7]:
drop_cols = ['subset', 'compound_id', 'base_rdkit_smiles', 'fold', 'active'] 
this_dir='/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/undersampler_validation/under_batch2_jp/'
RF_type = 'RF'
neks = ['2','3','5','9']
for i, nek in enumerate(neks):
    if nek in ['2','9']: 
        bind_inhib = ['binding', 'inhibition']
    else: 
        bind_inhib = ['binding'] 
    for bi in bind_inhib: 
        if bi == 'binding': 
            this_bi = 'bind'
        else: 
            this_bi = 'inhib' 
        for feat in ['moe', 'mfp']:
            print(f'NEK{nek} {bi} {feat}')
            
            df = pd.read_csv(f'{this_dir}NEK{nek}_{bi}_{feat}_UNDER_batch2.csv')
            if 'active.1' in drop_cols: 
                drop_cols.append('active.1') 
            if 'subset.1' in drop_cols: 
                drop_cols.append('subset.1') 
            train = df[df['subset'] == 'train'] 
            test = df[df['subset'] == 'test']
            
            trainX = train.drop(columns=drop_cols).to_numpy()
            train_y = train['active'].to_numpy() 

            testX = test.drop(columns=drop_cols).to_numpy()
            test_y = test['active'].to_numpy() 
    
            model = rf_models(trainX, train_y, testX, test_y, RF_type, {})
            train_df = gather_rf_results(model, trainX, train_y)
            test_df = gather_rf_results(model, testX, test_y)
            train_df['subset'] = 'train' 
            test_df['subset'] = 'test' 
            model_name = f'NEK{nek}_{bi}_{feat}_UNDER_RF_batch2' 
            train_df['model'] = model_name
            test_df['model'] = model_name
            
            train_df = add_cm2(train_df)
            test_df = add_cm2(test_df)
            
            train_df['NEK'] = f'NEK{nek}_{bi}'
            train_df['feat_type'] = feat
            train_df['strategy'] = 'UNDER'
            train_df['RF_type'] = RF_type
            test_df['NEK'] = f'NEK{nek}_{bi}'
            test_df['feat_type'] = feat
            test_df['strategy'] = 'UNDER'
            test_df['RF_type'] = RF_type
            
            with open(f'{this_dir}{model_name}.pkl', 'wb') as f: 
                pickle.dump(model,f)
    
            train_df.to_csv(f'{this_dir}{model_name}_train_results.csv',index=False) 
            test_df.to_csv(f'{this_dir}{model_name}_test_results.csv',index=False) 
            
            

NEK2 binding moe
TRAIN: accuracy: 1.000, precision: 1.000, recall: 1.000, specificity: 1.000
TEST: accuracy: 0.618, precision: 0.079, recall: 0.750, specificity: 0.613
NEK2 binding mfp
TRAIN: accuracy: 0.978, precision: 1.000, recall: 0.956, specificity: 1.000
TEST: accuracy: 0.403, precision: 0.056, recall: 0.833, specificity: 0.384
NEK2 inhibition moe
TRAIN: accuracy: 1.000, precision: 1.000, recall: 1.000, specificity: 1.000
TEST: accuracy: 0.743, precision: 0.171, recall: 0.714, specificity: 0.745
NEK2 inhibition mfp
TRAIN: accuracy: 0.946, precision: 1.000, recall: 0.893, specificity: 1.000
TEST: accuracy: 0.451, precision: 0.062, recall: 0.500, specificity: 0.447
NEK3 binding moe
TRAIN: accuracy: 1.000, precision: 1.000, recall: 1.000, specificity: 1.000
TEST: accuracy: 0.571, precision: 0.081, recall: 0.588, specificity: 0.570
NEK3 binding mfp
TRAIN: accuracy: 0.969, precision: 1.000, recall: 0.938, specificity: 1.000
TEST: accuracy: 0.489, precision: 0.050, recall: 0.412, speci

In [9]:
results = [] 
metric_cols = ['accuracy', 'precision', 'recall', 'specificity','TN', 'FN', 'FP', 'TP','f1', 'ROC-AUC', 'MCC', 'Balanced Accuracy',
       'model', 'cm', 'prediction_type', 'NEK', 'feat_type', 'strategy','RF_type']
for i, nek in enumerate(neks):
    if nek in ['2','9']: 
        bind_inhib = ['binding', 'inhibition']
    else: 
        bind_inhib = ['binding'] 
    for bi in bind_inhib: 
        if bi == 'binding': 
            this_bi = 'bind'
        else: 
            this_bi = 'inhib' 
        for feat in ['moe', 'mfp']: 
            result_df = pd.read_csv(f'{this_dir}NEK{nek}_{bi}_{feat}_UNDER_RF_batch2_test_results.csv')
            results.append(result_df.iloc[[0]][metric_cols].values.flatten())
results_df =  pd.DataFrame(results,columns=metric_cols)
results_df['model'] = results_df['model'].str.replace('scaled', 'raw')
results_df['strategy'] =results_df['strategy'].str.replace('scaled', 'raw')
results_df['strategy'] = results_df['strategy'].str.replace('scaled', 'raw')
results_df.to_csv(this_dir+'RF_results_UDNER_batch2.csv', index=False) 
results_df['modeling_type'] = 'RF' 
results_df['set'] = 'UNDER_batch2'

In [13]:
original_results = pd.read_csv('/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/general_NEK/final_paper_models.csv')
original_results = original_results[(original_results['set']=='original') & (original_results['modeling_type'] == 'RF')]
only_under = original_results[(original_results['strategy'] == 'UNDER') &(original_results['RF_type'] == 'RF')]
all_under_rf = pd.concat([results_df, only_under])
comparison = all_under_rf[['NEK', 'feat_type','set','cm', 'recall', 'specificity']]
comparison = comparison.sort_values(['NEK', 'feat_type'])
comparison 

Unnamed: 0,NEK,feat_type,set,cm,recall,specificity
1,NEK2_binding,mfp,UNDER_batch2,"[104, 167, 2, 10]",0.833333,0.383764
68,NEK2_binding,mfp,original,"[153, 118, 4, 8]",0.666667,0.564576
0,NEK2_binding,moe,UNDER_batch2,"[166, 105, 3, 9]",0.75,0.612546
52,NEK2_binding,moe,original,"[191, 80, 6, 6]",0.5,0.704797
3,NEK2_inhibition,mfp,UNDER_batch2,"[170, 210, 14, 14]",0.5,0.447368
100,NEK2_inhibition,mfp,original,"[171, 209, 14, 14]",0.5,0.45
2,NEK2_inhibition,moe,UNDER_batch2,"[283, 97, 8, 20]",0.714286,0.744737
84,NEK2_inhibition,moe,original,"[305, 75, 4, 24]",0.857143,0.802632
5,NEK3_binding,mfp,UNDER_batch2,"[131, 134, 10, 7]",0.411765,0.49434
132,NEK3_binding,mfp,original,"[127, 138, 5, 12]",0.705882,0.479245


In [16]:
def df_tree_probs(model, x_data): 
    test_proba0_df = pd.DataFrame()
    test_proba1_df = pd.DataFrame()
    for tree_num in np.arange(len(model.estimators_)):
        test_proba_temp = model.estimators_[tree_num].predict_proba(x_data)
        print(f'tree_num: {tree_num}, test_proba_temp shape: {test_proba_temp.shape}')
        test_proba0_df[tree_num] = test_proba_temp[:,0]
        test_proba1_df[tree_num] = test_proba_temp[:,1]
        print(f'test_proba0_df shape: {test_proba0_df.shape}, test_proba1_df shape: {test_proba1_df.shape}')
    std0 = test_proba0_df.std(axis=1)
    std1 = test_proba1_df.std(axis=1)

    print(f'in df tree prob function. std0 shape: {std0.shape}, std1 shape: {std1.shape}')
    return std0, std1

In [24]:
datapath = f'/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/undersampler_validation/under_batch2_jp/'
prob_path = '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/undersampler_validation/under_batch2_jp/'
neks = ['2','3','5','9']
feat_types=['moe','mfp']
for i, nek in enumerate(neks):
    if nek in ['2','9']: 
        bind_inhib = ['binding', 'inhibition']
    else: 
        bind_inhib = ['binding'] 
    for bi in bind_inhib: 
        if bi == 'binding': 
            this_bi = 'bind'
        else: 
            this_bi = 'inhib' 
        combined_feats= []
        df = pd.DataFrame()
        std0s = pd.DataFrame() 
        std1s = pd.DataFrame() 
        for feat in feat_types: 
            print(f'NEK{nek} {bi} {feat}')
            NEK = f'NEK{nek}_{bi}'
            df = pd.read_csv(f'{datapath}NEK{nek}_{bi}_{feat}_UNDER_batch2.csv')
            test = df[df['subset']=='test']
            testX = test.drop(columns=['active', 'compound_id', 'subset','base_rdkit_smiles','fold']).to_numpy()
            print(f'test X shape {testX.shape}') 
            model_name = f'NEK{nek}_{bi}_{feat}_UNDER_RF_batch2'
            results_df = pd.read_csv(f'{datapath}{model_name}_test_results.csv')
 
            predictions = results_df['prediction'].to_numpy()
            test_y =  test['active'].to_numpy()
            with open(datapath+model_name+'.pkl', 'rb') as f: 
                model = pickle.load(f)
                std0, std1 = df_tree_probs(model, testX)
                
                this_std0 = pd.DataFrame(std0) 
                this_std1 = pd.DataFrame(std1) 
                label0 = f'std0_{feat}'
                label1 = f'std1_{feat}'
                std0s[label0] = this_std0 
                std1s[label1] = this_std1 
    
            print('feat: '+feat)
            df = pd.concat([std0s, std1s],axis=1)
            df['model'] = model_name 
            df['NEK'] = NEK
            samp='UNDER_batch2'
            if samp == 'scaled': 
                
                df['strategy'] = 'raw'
            else: 
                df['strategy'] = 'UNDER_batch2'
            
            df['feat_type'] = feat 
            df['RF_type'] = 'RF'
            display(df)
            print('done')
            df.to_csv(f'{prob_path}{NEK}_std_prob_UNDER_batch2_RF.csv')
            nan_rows = df[df.isna().any(axis=1)]
    
            print(f'concated df shape: {df.shape}')
                


NEK2 binding moe
test X shape (283, 306)
tree_num: 0, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 1), test_proba1_df shape: (283, 1)
tree_num: 1, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 2), test_proba1_df shape: (283, 2)
tree_num: 2, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 3), test_proba1_df shape: (283, 3)
tree_num: 3, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 4), test_proba1_df shape: (283, 4)
tree_num: 4, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 5), test_proba1_df shape: (283, 5)
tree_num: 5, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 6), test_proba1_df shape: (283, 6)
tree_num: 6, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 7), test_proba1_df shape: (283, 7)
tree_num: 7, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 8), test_proba1_df shape: (283, 8)
tree_num: 8, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 9), test_proba1_df sha

Unnamed: 0,std0_moe,std1_moe,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.000000,NEK2_binding_moe_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,moe,RF
1,0.314466,0.314466,NEK2_binding_moe_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,moe,RF
2,0.502418,0.502418,NEK2_binding_moe_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,moe,RF
3,0.487832,0.487832,NEK2_binding_moe_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,moe,RF
4,0.196946,0.196946,NEK2_binding_moe_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,moe,RF
...,...,...,...,...,...,...,...
278,0.000000,0.000000,NEK2_binding_moe_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,moe,RF
279,0.416333,0.416333,NEK2_binding_moe_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,moe,RF
280,0.460566,0.460566,NEK2_binding_moe_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,moe,RF
281,0.000000,0.000000,NEK2_binding_moe_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,moe,RF


done
concated df shape: (283, 7)
NEK2 binding mfp
test X shape (283, 2048)
tree_num: 0, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 1), test_proba1_df shape: (283, 1)
tree_num: 1, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 2), test_proba1_df shape: (283, 2)
tree_num: 2, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 3), test_proba1_df shape: (283, 3)
tree_num: 3, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 4), test_proba1_df shape: (283, 4)
tree_num: 4, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 5), test_proba1_df shape: (283, 5)
tree_num: 5, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 6), test_proba1_df shape: (283, 6)
tree_num: 6, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 7), test_proba1_df shape: (283, 7)
tree_num: 7, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 8), test_proba1_df shape: (283, 8)
tree_num: 8, test_proba_temp shape: (283, 2)
test_proba0_df s

Unnamed: 0,std0_moe,std0_mfp,std1_moe,std1_mfp,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.000000,0.000000,0.000000,NEK2_binding_mfp_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,mfp,RF
1,0.314466,0.196946,0.314466,0.196946,NEK2_binding_mfp_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,mfp,RF
2,0.502418,0.100000,0.502418,0.100000,NEK2_binding_mfp_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,mfp,RF
3,0.487832,0.000000,0.487832,0.000000,NEK2_binding_mfp_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,mfp,RF
4,0.196946,0.462618,0.196946,0.462618,NEK2_binding_mfp_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,mfp,RF
...,...,...,...,...,...,...,...,...,...
278,0.000000,0.464361,0.000000,0.464361,NEK2_binding_mfp_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,mfp,RF
279,0.416333,0.456048,0.416333,0.456048,NEK2_binding_mfp_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,mfp,RF
280,0.460566,0.000000,0.460566,0.000000,NEK2_binding_mfp_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,mfp,RF
281,0.000000,0.140705,0.000000,0.140705,NEK2_binding_mfp_UNDER_RF_batch2,NEK2_binding,UNDER_batch2,mfp,RF


done
concated df shape: (283, 9)
NEK2 inhibition moe
test X shape (408, 306)
tree_num: 0, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 1), test_proba1_df shape: (408, 1)
tree_num: 1, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 2), test_proba1_df shape: (408, 2)
tree_num: 2, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 3), test_proba1_df shape: (408, 3)
tree_num: 3, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 4), test_proba1_df shape: (408, 4)
tree_num: 4, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 5), test_proba1_df shape: (408, 5)
tree_num: 5, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 6), test_proba1_df shape: (408, 6)
tree_num: 6, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 7), test_proba1_df shape: (408, 7)
tree_num: 7, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 8), test_proba1_df shape: (408, 8)
tree_num: 8, test_proba_temp shape: (408, 2)
test_proba0_df

Unnamed: 0,std0_moe,std1_moe,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.000000,NEK2_inhibition_moe_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,moe,RF
1,0.487832,0.487832,NEK2_inhibition_moe_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,moe,RF
2,0.422953,0.422953,NEK2_inhibition_moe_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,moe,RF
3,0.000000,0.000000,NEK2_inhibition_moe_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,moe,RF
4,0.409360,0.409360,NEK2_inhibition_moe_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,moe,RF
...,...,...,...,...,...,...,...
403,0.326599,0.326599,NEK2_inhibition_moe_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,moe,RF
404,0.272660,0.272660,NEK2_inhibition_moe_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,moe,RF
405,0.487832,0.487832,NEK2_inhibition_moe_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,moe,RF
406,0.000000,0.000000,NEK2_inhibition_moe_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,moe,RF


done
concated df shape: (408, 7)
NEK2 inhibition mfp
test X shape (408, 2048)
tree_num: 0, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 1), test_proba1_df shape: (408, 1)
tree_num: 1, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 2), test_proba1_df shape: (408, 2)
tree_num: 2, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 3), test_proba1_df shape: (408, 3)
tree_num: 3, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 4), test_proba1_df shape: (408, 4)
tree_num: 4, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 5), test_proba1_df shape: (408, 5)
tree_num: 5, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 6), test_proba1_df shape: (408, 6)
tree_num: 6, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 7), test_proba1_df shape: (408, 7)
tree_num: 7, test_proba_temp shape: (408, 2)
test_proba0_df shape: (408, 8), test_proba1_df shape: (408, 8)
tree_num: 8, test_proba_temp shape: (408, 2)
test_proba0_d

Unnamed: 0,std0_moe,std0_mfp,std1_moe,std1_mfp,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.219043,0.000000,0.219043,NEK2_inhibition_mfp_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,mfp,RF
1,0.487832,0.100000,0.487832,0.100000,NEK2_inhibition_mfp_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,mfp,RF
2,0.422953,0.483960,0.422953,0.483960,NEK2_inhibition_mfp_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,mfp,RF
3,0.000000,0.000000,0.000000,0.000000,NEK2_inhibition_mfp_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,mfp,RF
4,0.409360,0.000000,0.409360,0.000000,NEK2_inhibition_mfp_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,mfp,RF
...,...,...,...,...,...,...,...,...,...
403,0.326599,0.368453,0.326599,0.368453,NEK2_inhibition_mfp_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,mfp,RF
404,0.272660,0.497468,0.272660,0.497468,NEK2_inhibition_mfp_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,mfp,RF
405,0.487832,0.402015,0.487832,0.402015,NEK2_inhibition_mfp_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,mfp,RF
406,0.000000,0.136330,0.000000,0.136330,NEK2_inhibition_mfp_UNDER_RF_batch2,NEK2_inhibition,UNDER_batch2,mfp,RF


done
concated df shape: (408, 9)
NEK3 binding moe
test X shape (282, 306)
tree_num: 0, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 1), test_proba1_df shape: (282, 1)
tree_num: 1, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 2), test_proba1_df shape: (282, 2)
tree_num: 2, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 3), test_proba1_df shape: (282, 3)
tree_num: 3, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 4), test_proba1_df shape: (282, 4)
tree_num: 4, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 5), test_proba1_df shape: (282, 5)
tree_num: 5, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 6), test_proba1_df shape: (282, 6)
tree_num: 6, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 7), test_proba1_df shape: (282, 7)
tree_num: 7, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 8), test_proba1_df shape: (282, 8)
tree_num: 8, test_proba_temp shape: (282, 2)
test_proba0_df sh

Unnamed: 0,std0_moe,std1_moe,model,NEK,strategy,feat_type,RF_type
0,0.429235,0.429235,NEK3_binding_moe_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,moe,RF
1,0.460566,0.460566,NEK3_binding_moe_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,moe,RF
2,0.140705,0.140705,NEK3_binding_moe_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,moe,RF
3,0.000000,0.000000,NEK3_binding_moe_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,moe,RF
4,0.000000,0.000000,NEK3_binding_moe_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,moe,RF
...,...,...,...,...,...,...,...
277,0.000000,0.000000,NEK3_binding_moe_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,moe,RF
278,0.000000,0.000000,NEK3_binding_moe_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,moe,RF
279,0.000000,0.000000,NEK3_binding_moe_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,moe,RF
280,0.000000,0.000000,NEK3_binding_moe_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,moe,RF


done
concated df shape: (282, 7)
NEK3 binding mfp
test X shape (282, 2048)
tree_num: 0, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 1), test_proba1_df shape: (282, 1)
tree_num: 1, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 2), test_proba1_df shape: (282, 2)
tree_num: 2, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 3), test_proba1_df shape: (282, 3)
tree_num: 3, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 4), test_proba1_df shape: (282, 4)
tree_num: 4, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 5), test_proba1_df shape: (282, 5)
tree_num: 5, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 6), test_proba1_df shape: (282, 6)
tree_num: 6, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 7), test_proba1_df shape: (282, 7)
tree_num: 7, test_proba_temp shape: (282, 2)
test_proba0_df shape: (282, 8), test_proba1_df shape: (282, 8)
tree_num: 8, test_proba_temp shape: (282, 2)
test_proba0_df s

Unnamed: 0,std0_moe,std0_mfp,std1_moe,std1_mfp,model,NEK,strategy,feat_type,RF_type
0,0.429235,0.000000,0.429235,0.000000,NEK3_binding_mfp_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,mfp,RF
1,0.460566,0.500908,0.460566,0.500908,NEK3_binding_mfp_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,mfp,RF
2,0.140705,0.000000,0.140705,0.000000,NEK3_binding_mfp_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,mfp,RF
3,0.000000,0.460566,0.000000,0.460566,NEK3_binding_mfp_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,mfp,RF
4,0.000000,0.386123,0.000000,0.386123,NEK3_binding_mfp_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,mfp,RF
...,...,...,...,...,...,...,...,...,...
277,0.000000,0.386123,0.000000,0.386123,NEK3_binding_mfp_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,mfp,RF
278,0.000000,0.427909,0.000000,0.427909,NEK3_binding_mfp_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,mfp,RF
279,0.000000,0.287623,0.000000,0.287623,NEK3_binding_mfp_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,mfp,RF
280,0.000000,0.394277,0.000000,0.394277,NEK3_binding_mfp_UNDER_RF_batch2,NEK3_binding,UNDER_batch2,mfp,RF


done
concated df shape: (282, 9)
NEK5 binding moe
test X shape (248, 306)
tree_num: 0, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 1), test_proba1_df shape: (248, 1)
tree_num: 1, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 2), test_proba1_df shape: (248, 2)
tree_num: 2, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 3), test_proba1_df shape: (248, 3)
tree_num: 3, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 4), test_proba1_df shape: (248, 4)
tree_num: 4, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 5), test_proba1_df shape: (248, 5)
tree_num: 5, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 6), test_proba1_df shape: (248, 6)
tree_num: 6, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 7), test_proba1_df shape: (248, 7)
tree_num: 7, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 8), test_proba1_df shape: (248, 8)
tree_num: 8, test_proba_temp shape: (248, 2)
test_proba0_df sh

Unnamed: 0,std0_moe,std1_moe,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.000000,NEK5_binding_moe_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,moe,RF
1,0.502418,0.502418,NEK5_binding_moe_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,moe,RF
2,0.377525,0.377525,NEK5_binding_moe_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,moe,RF
3,0.000000,0.000000,NEK5_binding_moe_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,moe,RF
4,0.000000,0.000000,NEK5_binding_moe_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,moe,RF
...,...,...,...,...,...,...,...
243,0.000000,0.000000,NEK5_binding_moe_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,moe,RF
244,0.140705,0.140705,NEK5_binding_moe_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,moe,RF
245,0.000000,0.000000,NEK5_binding_moe_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,moe,RF
246,0.140705,0.140705,NEK5_binding_moe_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,moe,RF


done
concated df shape: (248, 7)
NEK5 binding mfp
test X shape (248, 2048)
tree_num: 0, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 1), test_proba1_df shape: (248, 1)
tree_num: 1, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 2), test_proba1_df shape: (248, 2)
tree_num: 2, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 3), test_proba1_df shape: (248, 3)
tree_num: 3, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 4), test_proba1_df shape: (248, 4)
tree_num: 4, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 5), test_proba1_df shape: (248, 5)
tree_num: 5, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 6), test_proba1_df shape: (248, 6)
tree_num: 6, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 7), test_proba1_df shape: (248, 7)
tree_num: 7, test_proba_temp shape: (248, 2)
test_proba0_df shape: (248, 8), test_proba1_df shape: (248, 8)
tree_num: 8, test_proba_temp shape: (248, 2)
test_proba0_df s

Unnamed: 0,std0_moe,std0_mfp,std1_moe,std1_mfp,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.070353,0.000000,0.070353,NEK5_binding_mfp_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,mfp,RF
1,0.502418,0.208167,0.502418,0.208167,NEK5_binding_mfp_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,mfp,RF
2,0.377525,0.000000,0.377525,0.000000,NEK5_binding_mfp_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,mfp,RF
3,0.000000,0.000000,0.000000,0.000000,NEK5_binding_mfp_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,mfp,RF
4,0.000000,0.000000,0.000000,0.000000,NEK5_binding_mfp_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,mfp,RF
...,...,...,...,...,...,...,...,...,...
243,0.000000,0.000000,0.000000,0.000000,NEK5_binding_mfp_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,mfp,RF
244,0.140705,0.000000,0.140705,0.000000,NEK5_binding_mfp_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,mfp,RF
245,0.000000,0.000000,0.000000,0.000000,NEK5_binding_mfp_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,mfp,RF
246,0.140705,0.000000,0.140705,0.000000,NEK5_binding_mfp_UNDER_RF_batch2,NEK5_binding,UNDER_batch2,mfp,RF


done
concated df shape: (248, 9)
NEK9 binding moe
test X shape (283, 306)
tree_num: 0, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 1), test_proba1_df shape: (283, 1)
tree_num: 1, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 2), test_proba1_df shape: (283, 2)
tree_num: 2, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 3), test_proba1_df shape: (283, 3)
tree_num: 3, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 4), test_proba1_df shape: (283, 4)
tree_num: 4, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 5), test_proba1_df shape: (283, 5)
tree_num: 5, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 6), test_proba1_df shape: (283, 6)
tree_num: 6, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 7), test_proba1_df shape: (283, 7)
tree_num: 7, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 8), test_proba1_df shape: (283, 8)
tree_num: 8, test_proba_temp shape: (283, 2)
test_proba0_df sh

Unnamed: 0,std0_moe,std1_moe,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.000000,NEK9_binding_moe_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,moe,RF
1,0.429235,0.429235,NEK9_binding_moe_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,moe,RF
2,0.287623,0.287623,NEK9_binding_moe_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,moe,RF
3,0.337998,0.337998,NEK9_binding_moe_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,moe,RF
4,0.219043,0.219043,NEK9_binding_moe_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,moe,RF
...,...,...,...,...,...,...,...
278,0.000000,0.000000,NEK9_binding_moe_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,moe,RF
279,0.409360,0.409360,NEK9_binding_moe_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,moe,RF
280,0.000000,0.000000,NEK9_binding_moe_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,moe,RF
281,0.000000,0.000000,NEK9_binding_moe_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,moe,RF


done
concated df shape: (283, 7)
NEK9 binding mfp
test X shape (283, 2048)
tree_num: 0, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 1), test_proba1_df shape: (283, 1)
tree_num: 1, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 2), test_proba1_df shape: (283, 2)
tree_num: 2, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 3), test_proba1_df shape: (283, 3)
tree_num: 3, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 4), test_proba1_df shape: (283, 4)
tree_num: 4, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 5), test_proba1_df shape: (283, 5)
tree_num: 5, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 6), test_proba1_df shape: (283, 6)
tree_num: 6, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 7), test_proba1_df shape: (283, 7)
tree_num: 7, test_proba_temp shape: (283, 2)
test_proba0_df shape: (283, 8), test_proba1_df shape: (283, 8)
tree_num: 8, test_proba_temp shape: (283, 2)
test_proba0_df s

Unnamed: 0,std0_moe,std0_mfp,std1_moe,std1_mfp,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.429235,0.000000,0.429235,NEK9_binding_mfp_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,mfp,RF
1,0.429235,0.431582,0.429235,0.431582,NEK9_binding_mfp_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,mfp,RF
2,0.287623,0.000000,0.287623,0.000000,NEK9_binding_mfp_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,mfp,RF
3,0.337998,0.368453,0.337998,0.368453,NEK9_binding_mfp_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,mfp,RF
4,0.219043,0.219043,0.219043,0.219043,NEK9_binding_mfp_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,mfp,RF
...,...,...,...,...,...,...,...,...,...
278,0.000000,0.219043,0.000000,0.219043,NEK9_binding_mfp_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,mfp,RF
279,0.409360,0.000000,0.409360,0.000000,NEK9_binding_mfp_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,mfp,RF
280,0.000000,0.000000,0.000000,0.000000,NEK9_binding_mfp_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,mfp,RF
281,0.000000,0.386123,0.000000,0.386123,NEK9_binding_mfp_UNDER_RF_batch2,NEK9_binding,UNDER_batch2,mfp,RF


done
concated df shape: (283, 9)
NEK9 inhibition moe
test X shape (80, 306)
tree_num: 0, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 1), test_proba1_df shape: (80, 1)
tree_num: 1, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 2), test_proba1_df shape: (80, 2)
tree_num: 2, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 3), test_proba1_df shape: (80, 3)
tree_num: 3, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 4), test_proba1_df shape: (80, 4)
tree_num: 4, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 5), test_proba1_df shape: (80, 5)
tree_num: 5, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 6), test_proba1_df shape: (80, 6)
tree_num: 6, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 7), test_proba1_df shape: (80, 7)
tree_num: 7, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 8), test_proba1_df shape: (80, 8)
tree_num: 8, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 9), test_prob

Unnamed: 0,std0_moe,std1_moe,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.000000,NEK9_inhibition_moe_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,moe,RF
1,0.238683,0.238683,NEK9_inhibition_moe_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,moe,RF
2,0.000000,0.000000,NEK9_inhibition_moe_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,moe,RF
3,0.196946,0.196946,NEK9_inhibition_moe_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,moe,RF
4,0.497570,0.497570,NEK9_inhibition_moe_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,moe,RF
...,...,...,...,...,...,...,...
75,0.000000,0.000000,NEK9_inhibition_moe_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,moe,RF
76,0.368453,0.368453,NEK9_inhibition_moe_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,moe,RF
77,0.000000,0.000000,NEK9_inhibition_moe_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,moe,RF
78,0.000000,0.000000,NEK9_inhibition_moe_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,moe,RF


done
concated df shape: (80, 7)
NEK9 inhibition mfp
test X shape (80, 2048)
tree_num: 0, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 1), test_proba1_df shape: (80, 1)
tree_num: 1, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 2), test_proba1_df shape: (80, 2)
tree_num: 2, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 3), test_proba1_df shape: (80, 3)
tree_num: 3, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 4), test_proba1_df shape: (80, 4)
tree_num: 4, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 5), test_proba1_df shape: (80, 5)
tree_num: 5, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 6), test_proba1_df shape: (80, 6)
tree_num: 6, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 7), test_proba1_df shape: (80, 7)
tree_num: 7, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 8), test_proba1_df shape: (80, 8)
tree_num: 8, test_proba_temp shape: (80, 2)
test_proba0_df shape: (80, 9), test_prob

Unnamed: 0,std0_moe,std0_mfp,std1_moe,std1_mfp,model,NEK,strategy,feat_type,RF_type
0,0.000000,0.070353,0.000000,0.070353,NEK9_inhibition_mfp_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,mfp,RF
1,0.238683,0.070353,0.238683,0.070353,NEK9_inhibition_mfp_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,mfp,RF
2,0.000000,0.236291,0.000000,0.236291,NEK9_inhibition_mfp_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,mfp,RF
3,0.196946,0.085723,0.196946,0.085723,NEK9_inhibition_mfp_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,mfp,RF
4,0.497570,0.000000,0.497570,0.000000,NEK9_inhibition_mfp_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,mfp,RF
...,...,...,...,...,...,...,...,...,...
75,0.000000,0.429235,0.000000,0.429235,NEK9_inhibition_mfp_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,mfp,RF
76,0.368453,0.150756,0.368453,0.150756,NEK9_inhibition_mfp_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,mfp,RF
77,0.000000,0.000000,0.000000,0.000000,NEK9_inhibition_mfp_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,mfp,RF
78,0.000000,0.000000,0.000000,0.000000,NEK9_inhibition_mfp_UNDER_RF_batch2,NEK9_inhibition,UNDER_batch2,mfp,RF


done
concated df shape: (80, 9)


array(['mfp'], dtype=object)