# Context
This notebook drives the training process for different models.

In [1]:
# Set project's environment variables
import os
import sys
from dotenv import load_dotenv
load_dotenv(dotenv_path="../project.env")
sys.path.append(os.environ["PYTHONPATH"])

import pickle

In [2]:
# Import project-wide and PH2 specific variables and functions
import superheader as sup
import TRAINheader as train



Chosen class grouping: three-classes


Directory /Users/diego/Desktop/iteso/TOG/ exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/src exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/bin exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/media exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/scores exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data/PH3/three-classes exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes exists. Continuing with execution


# Datasets

In [3]:
sup.report_dir_if_not_exists(sup.PH3_DATA_ROOT)

Directory /Users/diego/Desktop/iteso/TOG/data/PH3/three-classes exists. Continuing with execution


# Model training

## KNN

### Record keeping

In [4]:
best_scores = {
    sup.DATA_AH_PF: {
        "score": -1,
        "subcode": "",
        "sub2code": "",
        "reducer": "",
        "kernel": "",
        "n": -1,
        "k": -1
    },
    sup.DATA_S_PF: {
        "score": -1,
        "subcode": "",
        "sub2code": "",
        "reducer": "",
        "kernel": "",
        "n": -1,
        "k": -1
    },
    sup.DATA_S_PV: {
        "score": -1,
        "subcode": "",
        "sub2code": "",
        "reducer": "",
        "kernel": "",
        "n": -1,
        "k": -1
    }
}

def update_best_knn(score, data_unit, n, k, subcode, sub2code, reducer_name, kernel_name, model):
    if score > best_scores[data_unit]["score"]:
        print(f"updating best... {score}")

        model_path_dir = os.path.join(sup.TRAIN_BINGEN_ROOT, sup.TRAIN_KNN_CODE, data_unit)
        sup.create_dir_if_not_exists(model_path_dir)
        model_path = os.path.join(model_path_dir, f"{subcode}-{sub2code}-{reducer_name}-{kernel_name}_n{n}_k{k}.pkl")
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)

        best_scores[data_unit].update({
            "score": score,
            "data_unit": data_unit,
            "subcode": subcode,
            "sub2code": sub2code,
            "reducer": reducer_name,
            "kernel": kernel_name,
            "n": n,
            "k": k
        })

knn_score_tracker = []

def keep_scores_knn(score, data_unit, n, k, subcode, sub2code, reducer_name, kernel_name):
    knn_score_tracker.append([score, data_unit, n, k, subcode, sub2code, reducer_name, kernel_name])

def print_best_knn(data_unit):
    best = best_scores[data_unit]
    print(f"Data Unit: {data_unit}")
    print(f"Best score: {best['score']}")
    print(f"Best k: {best['k']}")
    print(f"Best n: {best['n']}")
    print(f"PH2 or no PH2: {best['subcode']}")
    print(f"Reduced or scaled: {best['sub2code']}")
    print(f"Best reducer: {best['reducer']}")
    print(f"Best kernel: {best['kernel']}")


### Helper functions

In [5]:
def try_all_n_k(data_unit, label_col, subcode, sub2code, reducer_name="", n="", kernel_name=""):
  for k in sup.TRAIN_KNN_K_CANDIDATES:
    data_path = os.path.join(sup.PH3_DATA_ROOT, subcode, sub2code, reducer_name, kernel_name, f"{data_unit}{n}.csv")
    model = train.arch.KNN(data_path=data_path, label_col=label_col, k=k)
    model.fit()
    score = model.score()
    #print(f"n={n}; k={k}; score: {score}")

    keep_scores_knn(score, data_unit, n, k, subcode, sub2code, reducer_name, kernel_name)
    update_best_knn(score, data_unit, n, k, subcode, sub2code, reducer_name, kernel_name, model)

In [6]:
def best_KNN(data_unit, label_col):
  for subcode in sup.PH3_SUB_CODES:
    for sub2code in sup.PH3_SUB2_CODES:
      if sub2code != sup.PH3_REDUCED_CODE:
        try_all_n_k(data_unit, label_col, subcode, sub2code)
      else:
        for reducer_name in sup.PH3_REDUCER_NAMES:
          for n in sup.PH3_N_CANDIDATES:
            if reducer_name != sup.PH3_REDUCER_NAME_KPCA:
              try_all_n_k(data_unit, label_col, subcode, sub2code, reducer_name, n)
            else:
              for kernel_name in sup.PH3_REDUCER_KERNEL_NAMES:
                try_all_n_k(data_unit, label_col, subcode, sub2code, reducer_name, n, kernel_name)
  
  print_best_knn(data_unit)
  

### Active Hand prediction

In [7]:
best_KNN(sup.DATA_AH_PF, sup.active_hand_col)

updating best... 0.9833887043189369
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/AHpf exists. Continuing with execution
updating best... 0.9867109634551495
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/AHpf exists. Continuing with execution
updating best... 0.9900332225913622
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/AHpf exists. Continuing with execution
updating best... 0.9920265780730897
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/AHpf exists. Continuing with execution
updating best... 0.9926910299003322
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/AHpf exists. Continuing with execution
updating best... 0.9940199335548173
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/AHpf exists. Continuing with execution
updating best... 0.9953488372093023
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/AHp

### Sign prediction

In [8]:
best_KNN(sup.DATA_S_PF, sup.class_numeric_column)

updating best... 0.9581699346405229
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spf exists. Continuing with execution
updating best... 0.9647058823529412
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spf exists. Continuing with execution
updating best... 0.9830065359477124
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spf exists. Continuing with execution
updating best... 0.984313725490196
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spf exists. Continuing with execution
updating best... 0.9856209150326798
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spf exists. Continuing with execution
updating best... 0.9882352941176471
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spf exists. Continuing with execution
updating best... 0.9908496732026144
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spf exists

In [9]:
best_KNN(sup.DATA_S_PV, sup.class_numeric_column)

updating best... 0.8253968253968254
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spv exists. Continuing with execution
updating best... 0.8412698412698413
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spv exists. Continuing with execution
updating best... 0.873015873015873
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spv exists. Continuing with execution
updating best... 0.8888888888888888
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spv exists. Continuing with execution
updating best... 0.9206349206349206
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spv exists. Continuing with execution
updating best... 0.9365079365079365
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spv exists. Continuing with execution
updating best... 0.9523809523809523
Directory /Users/diego/Desktop/iteso/TOG/bin/gen/TRAIN/three-classes/KNN/Spv exists

# SCORE ANALYSIS

In [10]:
import pandas as pd
from datetime import datetime

In [11]:
tracker_columns = ['score', 'data_unit', 'n', 'k', 'subcode', 'sub2code', 'reducer_name', 'kernel_name']
knn_scores_df = pd.DataFrame(knn_score_tracker, columns=tracker_columns)

In [12]:
knn_scores_df

Unnamed: 0,score,data_unit,n,k,subcode,sub2code,reducer_name,kernel_name
0,0.983389,AHpf,,1,w2,scale,,
1,0.983389,AHpf,,2,w2,scale,,
2,0.986711,AHpf,,3,w2,scale,,
3,0.984718,AHpf,,4,w2,scale,,
4,0.986047,AHpf,,5,w2,scale,,
...,...,...,...,...,...,...,...,...
34777,0.619048,Spv,31,27,wo2,reduce,umap,
34778,0.507937,Spv,31,28,wo2,reduce,umap,
34779,0.666667,Spv,31,29,wo2,reduce,umap,
34780,0.634921,Spv,31,30,wo2,reduce,umap,


In [13]:
now = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
sup.create_dir_if_not_exists(os.path.join(sup.TRAIN_SCORES_ROOT, sup.TRAIN_KNN_CODE))
knn_scores_df.to_csv(os.path.join(sup.TRAIN_SCORES_ROOT, sup.TRAIN_KNN_CODE, f"{now}.csv"), index=False)

Directory /Users/diego/Desktop/iteso/TOG/scores/three-classes/KNN exists. Continuing with execution


## Active hand detection

In [14]:
ah_scores = knn_scores_df[knn_scores_df['data_unit'] == sup.DATA_AH_PF]

## Sign recognition

### Only per frame data

In [15]:
s_pf_scores = knn_scores_df[knn_scores_df['data_unit'] == sup.DATA_S_PF]

### Only per video data

In [16]:
s_pv_scores = knn_scores_df[knn_scores_df['data_unit'] == sup.DATA_S_PV]