---

# Process the Performance Report

---

In [10]:
# package imports go here
import pandas as pd
import numpy as np
import fastparquet as fp
import os
import sys
import pickle
import matplotlib.pyplot as plt
import importlib

sys.path.insert(1, 'pkgs')
import ml_analysis as mlanlys

In [11]:
# Path to performance report
report_path = 'reports/'
performance_report              = report_path + 'performance_report.pkl'
performance_report_text_file    = report_path + 'performance_report.txt'

---

## 1. Read performance report

---

In [12]:
if os.path.exists(performance_report):
    # Load Performance Report
    with open(performance_report, 'rb') as file: perf_report = pickle.load(file)
    print(f"The file {performance_report} exists and the Performance Report was read successfully")
else:
    print(f"******************************************************")
    print(f"The file: '{performance_report}' DOES NOT EXIST")
    print(f"******************************************************")

The file reports/performance_report.pkl exists and the Performance Report was read successfully


---

## 2. Write the Performance Report

---

In [13]:

perf_datasets_in = perf_report.keys()

perf_datasets_in

dict_keys(['RandomUndersampled Dataset', 'Binary Dataset', 'Standard Scaled Dataset', 'Cluster Dataset', 'Base Dataset', 'RandomOverSample Dataset', 'SMOTEEN Dataset', 'MinMax Scaled Dataset', 'SMOTE Dataset'])

#### 2.1 Modify the order of the reported datasets

---

In [14]:
# Use this cell to re-order the datasets for the report

perf_datasets = perf_datasets_in

#### 2.2 Print the report

---

In [15]:
def print_performance_report(perf_report):
    # Print header and then every performance report in the reports dictionary

    print(f"**********************************************************")
    print(f"Diabetes Predictions Performance Summary Report")
    print(f"**********************************************************")

    print(f"----------------------------------------")
    print(f"Report Summary")
    print(f"----------------------------------------")

    for report in perf_report:
        print(f"Performance for: {report}")

    print(f"----------------------------------------")

    for report in perf_report:
        report_dict = perf_report[report]
        dataset_size = report_dict['dataset_size']
        report_df = report_dict['report']
        print(f"----------------------------------------")
        print(f"Performance for: {report}")
        print(f"Dataset Size:    {dataset_size[0]} Rows, {dataset_size[1]} Columns")
        print(f"----------------------------------------")
        print(f"{report_df.to_string(index=False)}")

In [16]:
# Generate the Performance Report and send prints to osc.stdout
with mlanlys.OutStreamCapture() as osc:
    print_performance_report(perf_report)

In [17]:
# osc.stdout contains the details of the performance report
# write the performance report to the performance_report_text_file
with open(performance_report_text_file, "w") as file:
    file.write(osc.stdout)

In [18]:
# Display the performance report here:
print(osc.stdout)

**********************************************************
Diabetes Predictions Performance Summary Report
**********************************************************
----------------------------------------
Report Summary
----------------------------------------
Performance for: RandomUndersampled Dataset
Performance for: Binary Dataset
Performance for: Standard Scaled Dataset
Performance for: Cluster Dataset
Performance for: Base Dataset
Performance for: RandomOverSample Dataset
Performance for: SMOTEEN Dataset
Performance for: MinMax Scaled Dataset
Performance for: SMOTE Dataset
----------------------------------------
----------------------------------------
Performance for: RandomUndersampled Dataset
Dataset Size:    253680 Rows, 22 Columns
----------------------------------------
                     model slice    score  balanced_accuracy  roc_auc_score
      KNeighborsClassifier Train 0.830986           0.830986       0.913746
      KNeighborsClassifier  Test 0.680921           