In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, roc_curve, auc
from scipy import stats

# Compile bootstrap results to get point estimates and confidence intervals

In [2]:
predictions = pd.read_csv('../results/bayes_net/bootstrap_predictions.csv')

In [3]:
n_bootstrap = 1000
bootstrap_metrics = pd.DataFrame()
for i in range(1, n_bootstrap+1):
    final_predictions = predictions.loc[predictions['iteration'] == i]
    report_dict = classification_report(final_predictions['y_true'].values,
                                        final_predictions['y_pred'].values, output_dict=True, zero_division=0)
    
    # Compute ROC curve and ROC area for each class
    n_classes = 5
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    sen = dict()
    spec = dict()
    for j in range(n_classes):
        y_test_bin = np.int32(final_predictions['y_true'].values == j)
        y_pred_bin = np.int32(final_predictions['y_pred'].values == j)
        y_score = final_predictions['y_prob_%d' %j].values
        fpr[j], tpr[j], _ = roc_curve(y_test_bin, y_score, pos_label=1)
        roc_auc[j] = auc(fpr[j], tpr[j])
        
        tp = len(np.where((y_pred_bin == 1) & (y_test_bin == y_pred_bin))[0])
        fp = len(np.where((y_pred_bin == 1) & (y_test_bin != y_pred_bin))[0])
        tn = len(np.where((y_pred_bin == 0) & (y_test_bin == y_pred_bin))[0])
        fn = len(np.where((y_pred_bin == 0) & (y_test_bin != y_pred_bin))[0])
        sen[j] = tp / (tp+fn)
        spec[j] = tn / (tn+fp)

    temp_metrics = pd.DataFrame({'iteration': [i],
                                 'endo_precision': [report_dict['1']['precision']],
                                 'endo_f1': [report_dict['1']['f1-score']],
                                 'endo_auc': [roc_auc[1]],
                                 'endo_sen': [sen[1]],
                                 'endo_spec': [spec[1]],
                                 'accuracy': [report_dict['accuracy']]})
    bootstrap_metrics = pd.concat([bootstrap_metrics, temp_metrics])

In [4]:
metrics_summary = pd.DataFrame()
for col in bootstrap_metrics.drop(columns='iteration').columns.values:
    data = bootstrap_metrics[col].values
    
    mean = np.mean(data)
    se = stats.sem(data)
    
    ci = stats.norm.interval(alpha=0.95, loc=mean, scale=se)

    temp_summary = pd.DataFrame({'metric': [col],
                                 'mean': [mean],
                                 'se': [se],
                                 'ci_low': [ci[0]],
                                 'ci_high': [ci[1]]})
    metrics_summary = pd.concat([metrics_summary, temp_summary])
metrics_summary.to_csv('../results/bayes_net/metrics_summary.csv', index=False)

## Bootstrap results

In [5]:
bootstrap_probabilites = pd.read_csv('../results/bayes_net/bootstrap_probabilities.csv')

In [6]:
prob_summary = pd.DataFrame()

for node in set(bootstrap_probabilites['node'].values):
    for col in bootstrap_probabilites.drop(columns=['iteration', 'node']):
        data = bootstrap_probabilites.loc[bootstrap_probabilites['node'] == node, col].values

        mean = np.mean(data)
        se = stats.sem(data)

        ci = stats.norm.interval(alpha=0.95, loc=mean, scale=se)

        temp_prob = pd.DataFrame({'node': [node],
                                  'metric': [col],
                                  'mean': [mean],
                                  'se': [se],
                                  'ci_low': [ci[0]],
                                  'ci_high': [ci[1]]})
        prob_summary = pd.concat([prob_summary, temp_prob])
prob_summary.to_csv('../results/bayes_net/probabilities_summary.csv', index=False)