# Context
This notebook drives the training process for different models.

In [1]:
# Set project's environment variables
import os
import sys
from dotenv import load_dotenv
load_dotenv(dotenv_path="../project.env")
sys.path.append(os.environ["PYTHONPATH"])

In [2]:
# Import project-wide and PH2 specific variables and functions
import superheader as sup
import TRAINheader as train

Directory /Users/diego/Desktop/iteso/TOG/ exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/src exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/bin exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/media exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data/PH3/two-classes exists. Continuing with execution


# Datasets

In [3]:
sup.report_dir_if_not_exists(sup.PH3_DATA_ROOT)

Directory /Users/diego/Desktop/iteso/TOG/data/PH3/two-classes exists. Continuing with execution


# Model training

## KNN

### Record keeping

In [4]:
best_scores = {
    "active_hand": {
        "score": -1,
        "data_unit": "",
        "subcode": "",
        "sub2code": "",
        "reducer": "",
        "kernel": "",
        "n": -1,
        "k": -1
    },
    "class_numeric": {
        "score": -1,
        "data_unit": "",
        "subcode": "",
        "sub2code": "",
        "reducer": "",
        "kernel": "",
        "n": -1,
        "k": -1
    }
}

def update_best_knn(score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name):
    key = (
        "active_hand" if label_col == sup.active_hand_col
        else "class_numeric" if label_col == sup.class_numeric_column
        else None
    )
    if key and score > best_scores[key]["score"]:
        print(f"updating best... {score}")
        best_scores[key].update({
            "score": score,
            "data_unit": data_unit,
            "subcode": subcode,
            "sub2code": sub2code,
            "reducer": reducer_name,
            "kernel": kernel_name,
            "n": n,
            "k": k
        })

score_tracker = []

def keep_scores_knn(score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name):
    score_tracker.append([score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name])

def print_best_knn(label_col):
    key = (
        "active_hand" if label_col == sup.active_hand_col
        else "class_numeric" if label_col == sup.class_numeric_column
        else None
    )
    if key:
        best = best_scores[key]
        print(f"Best score: {best['score']}")
        print(f"Best k: {best['k']}")
        print(f"Best n: {best['n']}")
        print(f"Data unit: {best['data_unit']}")
        print(f"PH2 or no PH2: {best['subcode']}")
        print(f"Reduced or scaled: {best['sub2code']}")
        print(f"Best reducer: {best['reducer']}")
        print(f"Best kernel: {best['kernel']}")


### Helper functions

In [5]:
def try_all_n_k(data_unit, label_col, subcode, sub2code, reducer_name="", n="", kernel_name=""):
  for k in train.TRAIN_KNN_K_CANDIDATES:
    data_path = os.path.join(sup.PH3_DATA_ROOT, subcode, sub2code, reducer_name, kernel_name, f"{data_unit}{n}.csv")
    model = train.arch.KNN(data_path=data_path, label_col=label_col, k=k)
    model.fit()
    score = model.score()
    #print(f"n={n}; k={k}; score: {score}")

    keep_scores_knn(score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name)
    update_best_knn(score, label_col, n, k, data_unit, subcode, sub2code, reducer_name, kernel_name)

In [6]:
def best_KNN(data_unit, label_col):
  for subcode in sup.PH3_SUB_CODES:
    for sub2code in sup.PH3_SUB2_CODES:
      if sub2code != sup.PH3_REDUCED_CODE:
        try_all_n_k(data_unit, label_col, subcode, sub2code)
      else:
        for reducer_name in sup.PH3_REDUCER_NAMES:
          for n in sup.PH3_N_CANDIDATES:
            if reducer_name != sup.PH3_REDUCER_NAME_KPCA:
              try_all_n_k(data_unit, label_col, subcode, sub2code, reducer_name, n)
            else:
              for kernel_name in sup.PH3_REDUCER_KERNEL_NAMES:
                try_all_n_k(data_unit, label_col, subcode, sub2code, reducer_name, n, kernel_name)
  
  print_best_knn(label_col)
  

### Active Hand prediction

In [7]:
best_KNN(sup.DATA_AH_PF, sup.active_hand_col)

updating best... 0.47352647352647353
updating best... 0.4835164835164835
updating best... 0.5084915084915085
updating best... 0.5134865134865135
updating best... 0.5234765234765235
updating best... 0.5694305694305695
updating best... 0.5744255744255744
updating best... 0.5834165834165834
updating best... 0.5924075924075924
updating best... 0.6093906093906094
updating best... 0.6163836163836164
updating best... 0.6223776223776224
Best score: 0.6223776223776224
Best k: 28
Best n: 18
Data unit: AHpf
PH2 or no PH2: wo2
Reduced or scaled: reduce
Best reducer: kpca
Best kernel: cosine


### Sign prediction

In [8]:
best_KNN(sup.DATA_S_PF, sup.class_numeric_column)

updating best... 0.4774951076320939
updating best... 0.5283757338551859
updating best... 0.5499021526418787
updating best... 0.5772994129158513
updating best... 0.5909980430528375
updating best... 0.6046966731898239
updating best... 0.6086105675146771
updating best... 0.6164383561643836
updating best... 0.6340508806262231
updating best... 0.639921722113503
updating best... 0.6673189823874756
Best score: 0.6673189823874756
Best k: 29
Best n: 12
Data unit: Spf
PH2 or no PH2: wo2
Reduced or scaled: reduce
Best reducer: kpca
Best kernel: rbf


In [9]:
best_KNN(sup.DATA_S_PV, sup.class_numeric_column)

updating best... 0.6904761904761905
updating best... 0.7857142857142857
updating best... 0.8333333333333334
Best score: 0.8333333333333334
Best k: 25
Best n: 6
Data unit: Spv
PH2 or no PH2: w2
Reduced or scaled: reduce
Best reducer: kpca
Best kernel: cosine


# SCORE ANALYSIS

In [10]:
import pandas as pd

In [11]:
tracker_columns = ['score', 'label_col', 'n', 'k', 'data_unit', 'subcode', 'sub2code', 'reducer_name', 'kernel_name']
all_scores_df = pd.DataFrame(score_tracker, columns=tracker_columns)

In [13]:
all_scores_df

Unnamed: 0,score,label_col,n,k,data_unit,subcode,sub2code,reducer_name,kernel_name
0,0.473526,active_hand,,1,AHpf,w2,scale,,
1,0.461538,active_hand,,2,AHpf,w2,scale,,
2,0.483516,active_hand,,3,AHpf,w2,scale,,
3,0.508492,active_hand,,4,AHpf,w2,scale,,
4,0.474525,active_hand,,5,AHpf,w2,scale,,
...,...,...,...,...,...,...,...,...,...
15805,0.452381,class_numeric,30,27,Spv,wo2,reduce,umap,
15806,0.714286,class_numeric,30,28,Spv,wo2,reduce,umap,
15807,0.500000,class_numeric,30,29,Spv,wo2,reduce,umap,
15808,0.595238,class_numeric,30,30,Spv,wo2,reduce,umap,


## Active hand detection

In [14]:
ah_scores = all_scores_df[all_scores_df['label_col'] == sup.active_hand_col]

## Sign recognition

In [15]:
s_scores = all_scores_df[all_scores_df['label_col'] == sup.class_numeric_column]

### Only per frame data

In [16]:
s_pf_scores = s_scores[s_scores['data_unit'] == sup.DATA_S_PF]

### Only per video data

In [17]:
s_pv_scores = s_scores[s_scores['data_unit'] == sup.DATA_S_PV]