# BTAP_ML Phase 3, Task 2 Analytics

**Description**    
This notebook provides the ability to analyze the results from a set of training tests which have been performed. This requires a directory to exist which contains the set of *.json* output files.

In [None]:
import pandas as pd
import numpy as np
import os
import math
import random
import json
import csv

In [None]:
ANALYSIS_FOLDER = "../../btap-gpu-vol-1/btap_tests/_task2/"

In [None]:
CSV_HEADERS = [
    "activation",
    "learning_rate",
    "batch_size",
    "layers",
    "dropout",
    "daily_test_mse",
    "daily_test_rmse",
    "daily_test_mae",
    "daily_test_mape",
    "daily_val_mse",
    "daily_val_rmse",
    "daily_val_mae",
    "daily_val_mape",
    "annual_test_mse",
    "annual_test_rmse",
    "annual_test_mae",
    "annual_test_mape",
    "annual_val_mse",
    "annual_val_rmse",
    "annual_val_mae",
    "annual_val_mape"
]

In [None]:
df = pd.DataFrame(columns=CSV_HEADERS)
# Go through each analysis file
for filename in os.listdir(ANALYSIS_FOLDER):
    if ".json" in filename:
        with open(ANALYSIS_FOLDER + filename, 'r', encoding='utf-8') as json_contents:
            print("Loading file:", ANALYSIS_FOLDER + filename)
            analysis_json = json.load(json_contents)
            # Decompose the filename
            filename_split = filename.replace(".json", "").split("_")
            activation, learning_rate, batch_size, layers, dropout = filename_split[1], filename_split[2], filename_split[3], filename_split[4], filename_split[5]
            df.loc[len(df.index)] = [activation,
                                     learning_rate,
                                     batch_size,
                                     layers,
                                     dropout,
                                     analysis_json["test_daily_metric"]["mse"],
                                     analysis_json["test_daily_metric"]["rmse"],
                                     analysis_json["test_daily_metric"]["mae"],
                                     analysis_json["test_daily_metric"]["mape"],
                                     analysis_json["val_daily_metric"]["mse"],
                                     analysis_json["val_daily_metric"]["rmse"],
                                     analysis_json["val_daily_metric"]["mae"],
                                     analysis_json["val_daily_metric"]["mape"],
                                     analysis_json["test_annual_metric"]["mse"],
                                     analysis_json["test_annual_metric"]["rmse"],
                                     analysis_json["test_annual_metric"]["mae"],
                                     analysis_json["test_annual_metric"]["mape"],
                                     analysis_json["val_annual_metric"]["mse"],
                                     analysis_json["val_annual_metric"]["rmse"],
                                     analysis_json["val_annual_metric"]["mae"],
                                     analysis_json["val_annual_metric"]["mape"]
                                    ]

In [None]:
pd.set_option('display.max_columns', None)
df.head(5)

In [None]:
df.sort_values(['annual_val_rmse'], ascending=[True]).head(60)