# Data Visualisation

In [None]:
# Change directory to keep paths consistent
%cd /Users/brk/projects/masters/SU/ergo/src

In [None]:
# Imports and setup
%load_ext autoreload
%autoreload 2
import seaborn as sns
import seaborn.objects as so
import matplotlib.pyplot as plt
import ipywidgets as widgets
import datetime
from ipywidgets import interact, interactive, fixed, interact_manual
import pandas as pd
import numpy as np
import models
import vis
import common
import read
import tensorflow as tf
from tensorflow import keras
from keras import layers
from sklearn.model_selection import train_test_split
import sklearn
import tqdm
import logging as l
import tqdm
import yaml
import glob
from matplotlib.colors import LogNorm
import re
from sklearn.metrics import classification_report

In [None]:
# Utility functions
def heatmap(fn, X_val, y_val, axis=1):
    y_val_pred = np.argmax(tf.nn.softmax(fn(X_val)).numpy(), axis=axis)

    cm_val = tf.math.confusion_matrix(
        y_val.flatten(), 
        y_val_pred.flatten()
    ).numpy()
    cm_val[-1, -1] = 0
    return sns.heatmap(
        cm_val,
        annot=False,
        fmt='d',
        square=True,
        mask=(cm_val==0),
        cmap='viridis'
    )

def prettify_col_name(x):
    return x.split('.')[-1].replace('_', ' ').title()

## Load data

In [None]:
# Read in data from hpar optimisation
df_ffnn = pd.read_json(
    '../saved_models/results_ffnn_opt_bigger.jsonl',
    lines=True
)
df_cusum = pd.read_json(
    '../saved_models/results_cusum_opt_bigger.jsonl',
    lines=True
)
df_hmm = pd.read_json(
    '../saved_models/results_hmm_opt_bigger.jsonl',
    lines=True
)
# Concat the dataframes together, and then do a 
# copy to avoid a dataframe fragmentation warning
# Reset the index to avoid a seaborn error https://github.com/mwaskom/seaborn/issues/3291
df = pd.concat((df_ffnn, df_cusum, df_hmm)).reset_index(drop=True).copy()

In [None]:
# Preprocess the data a little bit, and get a list of dependant variables
# Preprocess the df a bit to get some nice-to-use columns
df['ffnn.nodes_per_layer.1'] = df['ffnn.nodes_per_layer'].apply(
    lambda x: x[0] if isinstance(x, list) else x
)
df['ffnn.nodes_per_layer.2'] = df['ffnn.nodes_per_layer'].apply(
    lambda x: x[1] if isinstance(x, list) else x
)

df['ratio.macro avg.f1-score'] = df['trn.macro avg.f1-score'] / df['val.macro avg.f1-score']

# Print out a list of dependant variables
dep_vars = sorted([
    c for c in df.columns 
    if 'val' not in c and 'trn' not in c and 'ratio' not in c and c not in (
        'saved_at', 'fit_time', 'preprocessing.gesture_allowlist', 
)], key=lambda c: str(c))
print(f"Dependant variables: {dep_vars}")
print("\nVariables which change:")
max_len = max(map(lambda x: len(x), dep_vars))
# Print out all dependant variables that change
for var in dep_vars:
    uniq = df[var].apply(lambda x: str(x) if isinstance(x, list) else x).unique()
    if len(uniq) > 1:
        print(f"{var: <{max_len}} {uniq}")

# Plot the data

## FFNN vs HMM vs CuSUM

In [None]:
subset = df
(
    so.Plot(subset, x='model_type', y='val.macro avg.f1-score', color='model_type')
    .layout(size=(8, 6))
    .add(so.Dots(pointsize=3), so.Jitter())
    .facet(row='preprocessing.num_gesture_classes')
    .label(
        x="Model Type", 
        color='Model Type',
        y="Macro Average\n$F_1$ Score",
        title="{} Gesture Classes".format,
    )
)

## CuSUM Hyperparameter Comparison

In [None]:
subset = df[df.model_type=='CuSUM']
(
    so.Plot(subset, x='cusum.thresh', y='val.macro avg.f1-score')
    .layout(size=(8, 6))
    .add(so.Dots(pointsize=3), so.Jitter())
    .facet(row='preprocessing.num_gesture_classes')
    .limit(y=(-.1, 1.1))
    .label(
        x="CuSUM Threshold", 
#         color='Model Type',
        y="Macro Average\n$F_1$ Score",
        title="CuSUM models\n({} Gesture Classes)".format,
    )
)

## HMM Comparison

In [None]:
subset = df[df.model_type=='HMM']
(
    so.Plot(subset, x='preprocessing.num_gesture_classes', y='val.macro avg.f1-score')
#     .layout(size=(8, 6))
    .add(so.Dots(pointsize=3), so.Jitter())
    .limit(y=(-.1, 1.1))
    .label(
        x="Number of Gesture Classes", 
        y="Macro Average\n$F_1$ Score",
        title="HMM models",
    )
)

## FFNN Hyperparameter Comparison

In [None]:
changing_ffnn_vars = [
    'ffnn.dropout_rate',
    'nn.learning_rate',
    'ffnn.l2_coefficient',
    'ffnn.nodes_per_layer.1',
    'ffnn.nodes_per_layer.2',
]

for var_of_interest in changing_ffnn_vars:
    subset = df[df.model_type=='FFNN'].assign(**{
        # Convertnum_gesture_classes to a str so it is treated categorically
        'preprocessing.num_gesture_classes': lambda df: df['preprocessing.num_gesture_classes'].apply(str),
#         var_of_interest: lambda df: df[var_of_interest].apply(lambda x: np.round(x, 6)),
    })
    (
        so.Plot(subset, x='preprocessing.num_gesture_classes')
        .add(so.Dots(pointsize=3), so.Jitter())
        .pair(y=['val.macro avg.f1-score', 'val.macro avg.recall', 'val.macro avg.precision'])
        .facet(col=var_of_interest)
        .limit(y=(-.1, 1.1))
        .label(
            title=lambda s: var_of_interest + ': \n{}'.format(s),
            x="#Gesture Classes",
            y0="Macro Average\n$F_1$ Score",
            y1="Macro Average\nRecall",
            y2="Macro Average\nPrecision",
        )
        .show()
    )