# Context
This notebook drives the training process for different models.

In [1]:
# Set project's environment variables
import os
import sys
from dotenv import load_dotenv
load_dotenv(dotenv_path="../project.env")
sys.path.append(os.environ["PYTHONPATH"])

In [None]:
# Import project-wide and PH2 specific variables and functions
import superheader as sup
import TRAINheader as train

# Datasets

In [None]:
sup.report_dir_if_not_exists(sup.PH3_DATA_ROOT)

# Model training

## KNN

### Record keeping

In [None]:
best_scores = {
    "active_hand": {
        "score": -1,
        "data_unit": "",
        "subcode": "",
        "sub2code": "",
        "reducer": "",
        "kernel": "",
        "n": -1,
        "k": -1
    },
    "class_numeric": {
        "score": -1,
        "data_unit": "",
        "subcode": "",
        "sub2code": "",
        "reducer": "",
        "kernel": "",
        "n": -1,
        "k": -1
    }
}

def update_best_knn(score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name):
    key = (
        "active_hand" if label_col == sup.active_hand_col
        else "class_numeric" if label_col == sup.class_numeric_column
        else None
    )
    if key and score > best_scores[key]["score"]:
        print("updating best...")
        best_scores[key].update({
            "score": score,
            "data_unit": data_unit,
            "subcode": subcode,
            "sub2code": sub2code,
            "reducer": reducer_name,
            "kernel": kernel_name,
            "n": n,
            "k": k
        })

score_tracker = []

def keep_scores_knn(score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name):
    score_tracker.append([score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name])

def print_best_knn(label_col):
    key = (
        "active_hand" if label_col == sup.active_hand_col
        else "class_numeric" if label_col == sup.class_numeric_column
        else None
    )
    if key:
        best = best_scores[key]
        print(f"Best score: {best['score']}")
        print(f"Best k: {best['k']}")
        print(f"Best n: {best['n']}")
        print(f"Data unit: {best['data_unit']}")
        print(f"PH2 or no PH2: {best['subcode']}")
        print(f"Reduced or scaled: {best['sub2code']}")
        print(f"Best reducer: {best['reducer']}")
        print(f"Best kernel: {best['kernel']}")


### Helper functions

In [None]:
def try_all_n_k(data_unit, label_col, subcode, sub2code, reducer_name="", n="", kernel_name=""):
  for k in train.TRAIN_KNN_K_CANDIDATES:
    data_path = os.path.join(sup.PH3_DATA_ROOT, subcode, sub2code, reducer_name, kernel_name, f"{data_unit}{n}.csv")
    model = train.arch.KNN(data_path=data_path, label_col=label_col, k=k)
    model.fit()
    score = model.score()
    #print(f"n={n}; k={k}; score: {score}")

    keep_scores_knn(score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name)
    update_best_knn(score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name)

In [None]:
def best_KNN(data_unit, label_col):
  for subcode in sup.PH3_SUB_CODES:
    for sub2code in sup.PH3_SUB2_CODES:
      if sub2code != sup.PH3_REDUCED_CODE:
        try_all_n_k(data_unit, label_col, subcode, sub2code)
      else:
        for reducer_name in sup.PH3_REDUCER_NAMES:
          for n in sup.PH3_N_CANDIDATES:
            if reducer_name != sup.PH3_REDUCER_NAME_KPCA:
              try_all_n_k(data_unit, label_col, subcode, sub2code, reducer_name, n)
            else:
              for kernel_name in sup.PH3_REDUCER_KERNEL_NAMES:
                try_all_n_k(data_unit, label_col, subcode, sub2code, reducer_name, n, kernel_name)
  
  print_best_knn(label_col)
  

### Active Hand prediction

In [None]:
best_KNN(sup.DATA_AH_PF, sup.active_hand_col)

### Sign prediction

In [None]:
best_KNN(sup.DATA_S_PF, sup.class_numeric_column)

In [None]:
best_KNN(sup.DATA_S_PV, sup.class_numeric_column)

# SCORE ANALYSIS

In [None]:
import pandas as pd

In [None]:
tracker_columns = ['score', 'label_col', 'n', 'k', 'data_unit', 'subcode', 'sub2code', 'reducer_name', 'kernel_name']
all_scores_df = pd.DataFrame(score_tracker, columns=tracker_columns)

## Active hand detection

In [None]:
ah_scores = all_scores_df[[all_scores_df['label_col'] == sup.active_hand_col]]

## Sign recognition

In [None]:
s_scores = all_scores_df[[all_scores_df['label_col'] == sup.class_numeric_column]]

### Only per frame data

In [None]:
s_pf_scores = s_scores[[s_scores['data_unit'] == sup.DATA_S_PF]]

### Only per video data

In [None]:
s_pv_scores = s_scores[[s_scores['data_unit'] == sup.DATA_S_PV]]