# Accessing Pickled Metrics And Prediction Probabilities - Testing Data- (for AutoMLPipe-BC)
The pipeline outputs pickled objects with all the metric results, elements needed to build the ROC and PRC plots, as well as the prediction probabilities on the testing data across all datasets, algorithm models, and CV dataset partitions. 

This notebook illustrates how the user can access the pickled metric information saved as a list object. 

It includes (1) grabbing and calculating all average metric scores over the CV partitions, (2) grabbing the elements needed to build the average ROC plot, (3) grabbing the elementes needed to build the average PRC plot, (4) grabbing and reporting average model feature importance scores, and (5) grabbing and reporting the model testing prediction probabilities for each instance of the dataset. 

When run, this last item will generate a new folder in the pipeline's output experiment folder in the 'model_evaluation' folder for each dataset. Here the (case/i.e. code 1) prediction probabilities are reported as a .csv file for each algorithm and CV partition pair.  In these files is the instance's true outcome value, the unique instance ID, and the predicted probability of the instance being case/code 1. 
 

## Import Packages

In [1]:
import os
import pandas as pd
import pickle
import numpy as np
from statistics import mean
from scipy import interp,stats
import warnings
warnings.filterwarnings('ignore')

# Jupyter Notebook Hack: This code ensures that the results of multiple commands within a given cell are all displayed, rather than just the last. 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Set Run Parameters

In [2]:
#experiment_path = "C:/Users/ryanu/Documents/Analysis/AutoMLPipe_Experiments/hcc_demo"
experiment_path = "C:/Users/ryanurb/Documents/Analysis/AutoMLPipe_Experiments/hcc_demo"
targetDataName = 'None' # 'None' if user wants to generate visualizations for all analyzed datasets
algorithms = [] #use empty list if user wishes re-evaluate all modeling algorithms that were run in pipeline.

#available_algorithms = ['Naive Bayes','Logistic Regression','Decision Tree','Random Forest','Gradient Boosting','XGB','LGB','SVM','ANN','K Neighbors','eLCS','XCS','ExSTraCS']

## Automatically Detect Dataset Names

In [3]:
# Get dataset paths for all completed dataset analyses in experiment folder
datasets = os.listdir(experiment_path)
experiment_name = experiment_path.split('/')[-1] #Name of experiment folder
datasets.remove('metadata.csv')
try:
    datasets.remove('jobsCompleted')
except:
    pass
try:
    datasets.remove('UsefulNotebooks')
except:
    pass
try:
    datasets.remove('logs')
    datasets.remove('jobs')
except:
    pass
try:
    datasets.remove('DatasetComparisons') #If it has been run previously (overwrite)
except:
    pass
try:
    datasets.remove('KeyFileCopy') #If it has been run previously (overwrite)
except:
    pass
try:
    datasets.remove(experiment_name+'_ML_Pipeline_Report.pdf') #If it has been run previously (overwrite)
except:
    pass
datasets = sorted(datasets) #ensures consistent ordering of datasets
print("Analyzed Datasets: "+str(datasets))

Analyzed Datasets: ['hcc-data_example', 'hcc-data_example_no_covariates']


## Load Other Necessary Parameters

In [4]:
metadata = pd.read_csv(experiment_path + '/' + 'metadata.csv').values
class_label = metadata[0, 1]
instance_label = metadata[1, 1]
cv_partitions = int(metadata[6,1])
do_NB = metadata[20,1]
do_LR = metadata[21,1]
do_DT = metadata[22,1]
do_RF = metadata[23,1]
do_GB = metadata[24, 1]
do_XGB = metadata[25,1]
do_LGB = metadata[26,1]
do_SVM = metadata[27,1]
do_ANN = metadata[28,1]
do_KN = metadata[29, 1]
do_eLCS = metadata[30,1]
do_XCS = metadata[31,1]
do_ExSTraCS = metadata[32,1]

possible_algos = ['Naive Bayes','Logistic Regression','Decision Tree','Random Forest','Gradient Boosting','XGB','LGB','SVM','ANN','K Neighbors','eLCS','XCS','ExSTraCS']
abbrev = {'Naive Bayes':'NB','Logistic Regression':'LR','Decision Tree':'DT','Random Forest':'RF','Gradient Boosting':'GB','XGB':'XGB','LGB':'LGB','SVM':'SVM','ANN':'ANN','K Neighbors':'KN','eLCS':'eLCS','XCS':'XCS','ExSTraCS':'ExSTraCS'}

#Create algorithms list (i.e. modeling algorithms that were run in the pipeline)
if eval(do_NB):
    algorithms.append('Naive Bayes')
if eval(do_LR):
    algorithms.append('Logistic Regression')
if eval(do_DT):
    algorithms.append('Decision Tree')
if eval(do_RF):
    algorithms.append('Random Forest')
if eval(do_GB):
    algorithms.append('Gradient Boosting')
if eval(do_XGB):
    algorithms.append('XGB')
if eval(do_LGB):
    algorithms.append('LGB')
if eval(do_SVM):
    algorithms.append('SVM')
if eval(do_ANN):
    algorithms.append('ANN')
if eval(do_KN):
    algorithms.append('K Neighbors')
if eval(do_eLCS):
    algorithms.append('eLCS')
if eval(do_XCS):
    algorithms.append('XCS')
if eval(do_ExSTraCS):
    algorithms.append('ExSTraCS')

## Extract Metric List and Cacluate CV Averages

In [5]:
if not targetDataName == 'None': # User specified one analyzed dataset above (if more than one were analyzed)
    for each in datasets:
        if not each == targetDataName:
            datasets.remove(each)
    print("Vizualized Datasets: "+str(datasets))

for each in datasets: 
    print("---------------------------------------")
    print(each)
    print("---------------------------------------")
    full_path = experiment_path+'/'+each
    for algorithm in algorithms: #loop through algorithms
        print(algorithm)
        # Define evaluation stats variable lists
        s_bac = [] # balanced accuracies
        s_ac = [] # standard accuracies
        s_f1 = [] # F1 scores
        s_re = [] # recall values
        s_sp = [] # specificities
        s_pr = [] # precision values
        s_tp = [] # true positives
        s_tn = [] # true negatives
        s_fp = [] # false positives
        s_fn = [] # false negatives
        s_npv = [] # negative predictive values
        s_lrp = [] # likelihood ratio positive values
        s_lrm = [] # likelihood ratio negative values
        
        aucs = [] #areas under ROC curve
        praucs = [] #area under PRC curve
        aveprecs = [] #average precisions for PRC
        
        for cvCount in range(0,cv_partitions): #loop through cv's
            #Load pickled metric file for given algorithm and cv
            result_file = full_path+'/model_evaluation/pickled_metrics/'+abbrev[algorithm]+"_CV_"+str(cvCount)+"_metrics"
            file = open(result_file, 'rb')
            results = pickle.load(file)
            file.close()
            
            #Separate pickled results
            metricList = results[0] #First item in pickled list is the metric list (set of standard classification metrics)
            roc_auc = results[3] #Fourth item is the ROC AUC
            prec_rec_auc = results[6] #Seventh item is the PRC AUC
            ave_prec = results[7] #Eighth item is the average precision of PRC
            
            #Separate metrics from metricList
            s_bac.append(metricList[0])
            s_ac.append(metricList[1])
            s_f1.append(metricList[2])
            s_re.append(metricList[3])
            s_sp.append(metricList[4])
            s_pr.append(metricList[5])
            s_tp.append(metricList[6])
            s_tn.append(metricList[7])
            s_fp.append(metricList[8])
            s_fn.append(metricList[9])
            s_npv.append(metricList[10])
            s_lrp.append(metricList[11])
            s_lrm.append(metricList[12])
            
            aucs.append(roc_auc)
            praucs.append(prec_rec_auc)
            aveprecs.append(ave_prec)
            
        results = {'Balanced Accuracy': mean(s_bac), 'Accuracy': mean(s_ac), 'F1_Score': mean(s_f1), 'Sensitivity (Recall)': mean(s_re), 'Specificity': mean(s_sp),'Precision (PPV)': mean(s_pr), 'TP': mean(s_tp), 'TN': mean(s_tn), 'FP': mean(s_fp), 'FN': mean(s_fn), 'NPV': mean(s_npv), 'LR+': mean(s_lrp), 'LR-': mean(s_lrm), 'ROC_AUC': mean(aucs),'PRC_AUC': mean(praucs), 'PRC_APS': mean(aveprecs)}
        #results = {'Balanced Accuracy': s_bac, 'Accuracy': s_ac, 'F1_Score': s_f1, 'Sensitivity (Recall)': s_re, 'Specificity': s_sp,'Precision (PPV)': s_pr, 'TP': s_tp, 'TN': s_tn, 'FP': s_fp, 'FN': s_fn, 'NPV': s_npv, 'LR+': s_lrp, 'LR-': s_lrm, 'ROC_AUC': aucs,'PRC_AUC': praucs, 'PRC_APS': aveprecs}
        print(results)

---------------------------------------
hcc-data_example
---------------------------------------
Naive Bayes
{'Balanced Accuracy': 0.5450513538748832, 'Accuracy': 0.5575757575757576, 'F1_Score': 0.44524336543115883, 'Sensitivity (Recall)': 0.49206349206349204, 'Specificity': 0.5980392156862745, 'Precision (PPV)': 0.5786111111111112, 'TP': 10, 'TN': 20, 'FP': 13, 'FN': 10, 'NPV': 0.5219256933542648, 'LR+': 3.670308527451384, 'LR-': 2.6602533269199937, 'ROC_AUC': 0.6760037348272642, 'PRC_AUC': 0.5644719911096832, 'PRC_APS': 0.5522637454938832}
Logistic Regression
{'Balanced Accuracy': 0.7089169000933707, 'Accuracy': 0.7151515151515152, 'F1_Score': 0.6459627329192547, 'Sensitivity (Recall)': 0.6825396825396826, 'Specificity': 0.7352941176470589, 'Precision (PPV)': 0.6146825396825397, 'TP': 14, 'TN': 25, 'FP': 9, 'FN': 6, 'NPV': 0.7903858317520557, 'LR+': 2.5859788359788363, 'LR-': 0.43050061050061045, 'ROC_AUC': 0.7670401493930906, 'PRC_AUC': 0.6400439293545385, 'PRC_APS': 0.6552905278980

## Extract list of increasing false positive rates and true positive rates for constructing ROC

In [6]:
if not targetDataName == 'None': # User specified one analyzed dataset above (if more than one were analyzed)
    for each in datasets:
        if not each == targetDataName:
            datasets.remove(each)
    print("Vizualized Datasets: "+str(datasets))

for each in datasets: 
    print("---------------------------------------")
    print(each)
    print("---------------------------------------")
    full_path = experiment_path+'/'+each
    for algorithm in algorithms: #loop through algorithms
        print(algorithm)
        # Define evaluation stats variable lists
        tprs = [] # true postitive rates
        mean_fpr = np.linspace(0, 1, 100) #used to plot all CVs in single ROC plot
        
        for cvCount in range(0,cv_partitions): #loop through cv's
            #Load pickled metric file for given algorithm and cv
            result_file = full_path+'/model_evaluation/pickled_metrics/'+abbrev[algorithm]+"_CV_"+str(cvCount)+"_metrics"
            file = open(result_file, 'rb')
            results = pickle.load(file)
            file.close()
            
            #Separate pickled results
            fpr = results[1]
            tpr = results[2]

            tprs.append(interp(mean_fpr, fpr, tpr))
            tprs[-1][0] = 0.0

        results = {'tprs': np.mean(tprs, axis=0)}
        
        print(results)
        #print('fprs: '+str(mean_fpr))

---------------------------------------
hcc-data_example
---------------------------------------
Naive Bayes
{'tprs': array([0.        , 0.04264871, 0.05355139, 0.15969216, 0.17059484,
       0.18149751, 0.19143819, 0.19688953, 0.20234087, 0.22366522,
       0.22911656, 0.2345679 , 0.25396825, 0.25396825, 0.25396825,
       0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333,
       0.33333333, 0.3968254 , 0.3968254 , 0.3968254 , 0.41269841,
       0.41269841, 0.41269841, 0.50793651, 0.50793651, 0.50793651,
       0.53968254, 0.53968254, 0.53968254, 0.61904762, 0.61904762,
       0.63492063, 0.63492063, 0.63492063, 0.6984127 , 0.6984127 ,
       0.6984127 , 0.6984127 , 0.6984127 , 0.6984127 , 0.77777778,
       0.77777778, 0.77777778, 0.82539683, 0.82539683, 0.82539683,
       0.82539683, 0.82539683, 0.82539683, 0.84126984, 0.84126984,
       0.84126984, 0.84126984, 0.84126984, 0.84126984, 0.85714286,
       0.85714286, 0.85714286, 0.88888889, 0.88888889, 0.88888889,
       0.90

## Extract list of increasing precision and recall values for constructing PRC

In [7]:
if not targetDataName == 'None': # User specified one analyzed dataset above (if more than one were analyzed)
    for each in datasets:
        if not each == targetDataName:
            datasets.remove(each)
    print("Vizualized Datasets: "+str(datasets))

for each in datasets: 
    print("---------------------------------------")
    print(each)
    print("---------------------------------------")
    full_path = experiment_path+'/'+each
    for algorithm in algorithms: #loop through algorithms
        print(algorithm)
        # Define evaluation stats variable lists
        precs = [] # true postitive rates
        mean_recall = np.linspace(0, 1, 100) #used to plot all CVs in single PRC plot
        
        for cvCount in range(0,cv_partitions): #loop through cv's
            #Load pickled metric file for given algorithm and cv
            result_file = full_path+'/model_evaluation/pickled_metrics/'+abbrev[algorithm]+"_CV_"+str(cvCount)+"_metrics"
            file = open(result_file, 'rb')
            results = pickle.load(file)
            file.close()
            
            #Separate pickled results
            prec = results[4]
            recall = results[5]

            precs.append(interp(mean_recall, recall, prec))

        results = {'precs': np.mean(precs, axis=0)}

        print(results)
        #print('recall: '+str(mean_recall))

---------------------------------------
hcc-data_example
---------------------------------------
Naive Bayes
{'precs': array([1.        , 0.97348485, 0.9469697 , 0.92045455, 0.89393939,
       0.86742424, 0.84090909, 0.81439394, 0.78787879, 0.76136364,
       0.60791246, 0.61203704, 0.61616162, 0.6202862 , 0.62441077,
       0.58299663, 0.58260382, 0.582211  , 0.58181818, 0.58294052,
       0.59315376, 0.603367  , 0.61358025, 0.62379349, 0.61458743,
       0.62269885, 0.63081027, 0.63892168, 0.6470331 , 0.57763209,
       0.58426573, 0.59089938, 0.59753302, 0.58114035, 0.58671215,
       0.59228395, 0.59785575, 0.60342755, 0.51160279, 0.51759718,
       0.52359156, 0.52958594, 0.53558033, 0.5034684 , 0.5089532 ,
       0.514438  , 0.51992281, 0.52540761, 0.47971561, 0.48470545,
       0.48969529, 0.49468514, 0.48645228, 0.49100099, 0.4955497 ,
       0.50009841, 0.50464712, 0.49950326, 0.50365821, 0.50781317,
       0.51196812, 0.51612307, 0.51156289, 0.5153772 , 0.5191915 ,
       0.5

## Extract Average Model Feature Importance Estimates (Over CVs)

In [8]:
if not targetDataName == 'None': # User specified one analyzed dataset above (if more than one were analyzed)
    for each in datasets:
        if not each == targetDataName:
            datasets.remove(each)
    print("Vizualized Datasets: "+str(datasets))

for each in datasets: 
    print("---------------------------------------")
    print(each)
    print("---------------------------------------")
    full_path = experiment_path+'/'+each
    original_headers = pd.read_csv(full_path+"/exploratory/OriginalFeatureNames.csv",sep=',').columns.values.tolist() #Get Original Headers
    for algorithm in algorithms: #loop through algorithms
        print(algorithm)
        # Define evaluation stats variable lists
        FI_ave = [0] * len(original_headers)  # used to save average FI scores over all cvs. (all original features in dataset prior to feature selection included)
        
        for cvCount in range(0,cv_partitions): #loop through cv's
            #Load pickled metric file for given algorithm and cv
            result_file = full_path+'/model_evaluation/pickled_metrics/'+abbrev[algorithm]+"_CV_"+str(cvCount)+"_metrics"
            file = open(result_file, 'rb')
            results = pickle.load(file)
            file.close()
            
            #Separate pickled results
            fi = results[8]
            
            # Format feature importance scores as list (takes into account that all features are not in each CV partition)
            tempList = []
            j = 0
            headers = pd.read_csv(full_path+'/CVDatasets/'+each+'_CV_'+str(cvCount)+'_Test.csv').columns.values.tolist()
            if instance_label != 'None':
                headers.remove(instance_label)
            headers.remove(class_label)
            for feature in original_headers:
                if feature in headers:  # Check if current feature from original dataset was in the partition
                    # Deal with features not being in original order (find index of current feature list.index()
                    f_index = headers.index(feature)
                    FI_ave[j] += fi[f_index]
                j += 1
            
        #Turn FI sums into averages
        for i in range(0, len(FI_ave)):
            FI_ave[i] = FI_ave[i] / float(cv_partitions)

        fi_dict = {}
        for key in original_headers:
            for value in FI_ave:
                fi_dict[key] = value
                FI_ave.remove(value)
                break  
                
        print(fi_dict)
        #print('recall: '+str(mean_recall))

---------------------------------------
hcc-data_example
---------------------------------------
Naive Bayes
{'Gender': -0.005882352941176461, 'Symptoms ': 0.007633053221288537, 'Alcohol': 0.0, 'Hepatitis B Surface Antigen': -0.000711951447245555, 'Hepatitis B e Antigen': -0.00011671335200748404, 'Hepatitis B Core Antibody': 0.00030345471521942, 'Hepatitis C Virus Antibody': 0.0019841269841269957, 'Cirrhosis': -0.00015172735760971, 'Endemic Countries': 0.0011904761904761862, 'Smoking': 0.0011904761904761862, 'Diabetes': 0.002450980392156865, 'Obesity': 0.0, 'Hemochromatosis': 0.0, 'Arterial Hypertension': 0.0, 'Chronic Renal Insufficiency': 0.00440009337068159, 'Human Immunodeficiency Virus': 0.0, 'Nonalcoholic Steatohepatitis': 0.010317460317460302, 'Esophageal Varices': -0.003431372549019599, 'Splenomegaly': 0.00024509803921569283, 'Portal Hypertension': 0.0, 'Portal Vein Thrombosis': 0.013690476190476197, 'Liver Metastasis': -0.0009687208216619909, 'Radiological Hallmark': 0.0, 'Age

## Extract and Report Case (i.e. class 1) Prediction Probabilities For all instances in each Testing Dataset

In [9]:


if not targetDataName == 'None': # User specified one analyzed dataset above (if more than one were analyzed)
    for each in datasets:
        if not each == targetDataName:
            datasets.remove(each)
    print("Vizualized Datasets: "+str(datasets))

for each in datasets: 
    print("---------------------------------------")
    print(each)
    print("---------------------------------------")

    full_path = experiment_path+'/'+each
    
    #Make folder in experiment folder/datafolder to store all prediction probabilities per algorithm/CV combination
    if not os.path.exists(full_path+'/model_evaluation/prediction_probas'):
        os.mkdir(full_path+'/model_evaluation/prediction_probas')
        
    original_headers = pd.read_csv(full_path+"/exploratory/OriginalFeatureNames.csv",sep=',').columns.values.tolist() #Get Original Headers
    for algorithm in algorithms: #loop through algorithms
        print(algorithm)

        for cvCount in range(0,cv_partitions): #loop through cv's
            print(cvCount)
            #Load pickled metric file for given algorithm and cv
            result_file = full_path+'/model_evaluation/pickled_metrics/'+abbrev[algorithm]+"_CV_"+str(cvCount)+"_metrics"
            file = open(result_file, 'rb')
            results = pickle.load(file)
            file.close()
            
            #Load associated testing dataset
            test_data = pd.read_csv(full_path+'/CVDatasets/'+each+'_CV_'+str(cvCount)+'_Test.csv')
            probas_summary = test_data[[class_label,instance_label]]

            #Separate pickled results
            probas_ = results[9]
            print(probas_[:,1])
            probas_summary['1_prob']=probas_[:,1]
            file_name = full_path+'/model_evaluation/prediction_probas/'+algorithm+'_CV_'+str(cvCount)+'_case_probas.csv'
            probas_summary.to_csv(file_name, index=False)

---------------------------------------
hcc-data_example
---------------------------------------
Naive Bayes
0
[2.59351142e-03 8.97663652e-04 8.08525595e-07 2.03323992e-15
 1.77195505e-09 2.66918833e-08 7.74997624e-04 8.19797076e-04
 1.76963061e-08 1.51478647e-06 3.68103723e-04 1.55873726e-09
 9.39434296e-07 1.14043636e-08 3.44649330e-09 1.27951549e-09
 8.67450989e-09 5.01298309e-10 2.59643505e-07 1.11343288e-07
 4.91032464e-09 2.88637603e-04 8.52547617e-09 2.05690761e-07
 1.13668154e-10 3.86274929e-05 1.17025301e-08 1.47097835e-04
 1.84228092e-10 3.89146285e-10 7.76495892e-08 4.92162667e-10
 4.52254534e-09 9.99998751e-01 5.95975747e-02 1.00000000e+00
 2.76743044e-08 7.05549200e-06 2.47817867e-07 2.84425765e-08
 1.03937834e-02 9.64182195e-01 1.38167279e-01 1.00000000e+00
 1.25580549e-06 1.03591715e-04 9.77191136e-06 5.53866105e-14
 9.87920697e-06 2.34244668e-07 1.60498275e-05 4.31595946e-04
 9.80292125e-01 9.99420966e-01 2.00515575e-06]
1
[9.94394090e-01 9.99999035e-01 9.99644763e-01 9

1
[0.11777667 0.95458899 0.29662817 0.22596607 0.06429841 0.3386381
 0.28509037 0.15109731 0.7242459  0.04066686 0.01386052 0.34704402
 0.02664177 0.92595416 0.3565687  0.33378359 0.96632699 0.04577418
 0.01922359 0.14132358 0.01138868 0.06163342 0.00446854 0.02092331
 0.28280411 0.97936562 0.17524497 0.39971352 0.93435257 0.1841566
 0.6509055  0.46861504 0.02423076 0.40150725 0.73583367 0.27788589
 0.50105506 0.23174452 0.1645667  0.0878604  0.5956761  0.06069035
 0.61486026 0.88781901 0.94959416 0.48372524 0.32979206 0.29185307
 0.81080777 0.03537664 0.99611372 0.89514953 0.60607545 0.99969917
 0.95203315]
2
[0.34321901 0.53587199 0.02243602 0.04402568 0.04243505 0.16315297
 0.05328486 0.01048995 0.18331389 0.00183299 0.06493595 0.01333599
 0.07886118 0.04569605 0.05273994 0.07657385 0.49752682 0.09035587
 0.48669058 0.34529645 0.46175134 0.08051731 0.18809498 0.31474186
 0.09400259 0.06323874 0.83318872 0.02894378 0.18894856 0.43688952
 0.99244557 0.01953689 1.         0.14775046 0.