In [24]:
from itertools import product
from typing import Dict, List

import warnings

import numpy as np
import pandas as pd
import seaborn as sns

# ML procedures
from sklearn.feature_selection import mutual_info_classif, SelectKBest
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import (
    LeaveOneGroupOut,
)

# ML models
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import (
    AdaBoostClassifier,
    BaggingClassifier,
    RandomForestClassifier,
)
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

# DL models
from keras.layers import (
    Activation,
    BatchNormalization,
    Bidirectional,
    Dense,
    Dropout,
    Input,
)
from keras.models import Model
from keras.initializers.initializers_v2 import GlorotNormal
from keras.optimizers import adam_v2
from keras.regularizers import L2

In [7]:
COGLOAD_DIR = "CogLoad"
SNAKE_DIR = "Snake"

COGLOAD_IDS: Dict[int, str] = {
    0: "1mpau",
    1: "2nxs5",
    2: "5gpsc",
    3: "7swyk",
    4: "8a1ep",
    5: "b7mrd",
    6: "c24ur",
    7: "dkhty",
    8: "e4gay",
    9: "ef5rq",
    10: "f3j25",
    11: "hpbxa",
    12: "ibvx8",
    13: "iz2ps",
    14: "rc1in",
    15: "tn4vl",
    16: "wjxci",
    17: "yljm5",
    18: "3caqi",
    19: "6frz4",
    20: "bd47a",
    21: "f1gjp",
    22: "iz3x1",
}
COGLOAD_GROUPS: Dict[str, int] = {
    "1mpau": 0,
    "2nxs5": 1,
    "5gpsc": 2,
    "7swyk": 3,
    "8a1ep": 4,
    "b7mrd": 5,
    "c24ur": 6,
    "dkhty": 7,
    "e4gay": 8,
    "ef5rq": 9,
    "f3j25": 10,
    "hpbxa": 11,
    "ibvx8": 12,
    "iz2ps": 13,
    "rc1in": 14,
    "tn4vl": 15,
    "wjxci": 16,
    "yljm5": 17,
    "3caqi": 18,
    "6frz4": 19,
    "bd47a": 20,
    "f1gjp": 21,
    "iz3x1": 22,
}

SNAKE_IDS: Dict[int, str] = {
    0: "0dah3",
    1: "3190o",
    2: "bos1y",
    3: "deztl",
    4: "kru08",
    5: "la3bh",
    6: "m8rvq",
    7: "ptoj6",
    8: "wymuo",
    9: "hu32w",
    10: "xrxbc",
    11: "o9m76",
}
SNAKE_GROUPS: Dict[str, int] = {
    "0dah3": 0,
    "3190o": 1,
    "bos1y": 2,
    "deztl": 3,
    "kru08": 4,
    "la3bh": 5,
    "m8rvq": 6,
    "ptoj6": 7,
    "wymuo": 8,
    "hu32w": 9,
    "xrxbc": 10,
    "o9m76": 11,
}

In [8]:
LOGO_GROUPS_COGLOAD: List[int] = [
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    18,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    19,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    3,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    4,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    5,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    20,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    6,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    7,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    9,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    21,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    10,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    11,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    12,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    13,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    22,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    14,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    15,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    16,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
    17,
]
LOGO_GROUPS_SNAKE: List[int] = [
    0,
    0,
    1,
    1,
    2,
    2,
    3,
    3,
    9,
    9,
    4,
    4,
    5,
    5,
    6,
    6,
    11,
    11,
    7,
    7,
    8,
    8,
    10,
    10,
]

In [9]:
N_REPETITIONS: int = 5
N_SPLITS: int = 5
FEATURE_TYPES: List[str] = ["raw", "0-1", "std"]
SELECT_FEATURES: List[str] = ["OFF", "ON"]
N_SELECTED_FEATURES: int = 50

N_EPOCHS: int = 50
BATCH_SIZE: int = 32
VERBOSE: int = 1
SHUFFLE: bool = False

In [10]:
warnings.simplefilter("ignore")

sns.set()

In [11]:
def binarize(label, dataset: str) -> int:
    match dataset:
        case "cogload":
            return 0 if label == "rest" else 1
        case "snake":
            return 0 if label == 0 else 1
        case _:
            raise Exception("Can’t binarize datasets other than CogLoad and Snake.")


def get_io(
    features_path: str,
    labels_path: str,
    dataset: str,
) -> [pd.DataFrame, np.ndarray]:
    feature_df: pd.DataFrame = pd.read_csv(features_path).drop("Unnamed: 0", axis=1)
    label_df: pd.DataFrame = pd.read_csv(labels_path)

    X_ = []
    y_ = []

    session_ids = (
        COGLOAD_GROUPS.keys() if dataset.lower() == "cogload" else SNAKE_GROUPS.keys()
    )
    for session_id in session_ids:
        X_.append(feature_df[label_df.user_id == session_id])
        y_.extend(
            np.array(
                [
                    binarize(label, dataset=dataset)
                    for label in label_df.loc[
                        label_df.user_id == session_id, "level"
                    ].values
                ]
            )
        )

    return pd.concat(X_), np.array(y_)

In [12]:
def get_logo_groups(dataset: str) -> List[int]:
    data_dir: str = "CogLoad" if dataset.lower() == "cogload" else "Snake"
    groups: Dict[str, int] = (
        COGLOAD_GROUPS if dataset.lower() == "cogload" else SNAKE_GROUPS
    )

    label_df: pd.DataFrame = pd.read_csv(f"final_data/{data_dir}/labels.csv")

    return label_df["user_id"].map(groups).tolist()

In [13]:
def test_model(model, dataset: str, deep: bool = False):
    data_dir: str = COGLOAD_DIR if dataset.lower() == "cogload" else SNAKE_DIR
    logo_groups: List[int] = (
        LOGO_GROUPS_COGLOAD if dataset.lower() == "cogload" else LOGO_GROUPS_SNAKE
    )

    model_name: str = model.__repr__().removesuffix("()")
    print(f"Testing {model_name}")

    for (feature_type, select) in product(FEATURE_TYPES, SELECT_FEATURES):
        X, y = get_io(
            features_path=f"final_data/{data_dir}/features_{feature_type}.csv",
            labels_path=f"final_data/{data_dir}/labels.csv",
            dataset=dataset,
        )

        if select == "ON":
            selector = SelectKBest(mutual_info_classif, k=N_SELECTED_FEATURES)
            X_selected = pd.DataFrame(selector.fit_transform(X, y))
            X = X_selected

        accuracy_scores: List[float] = []
        for i in range(N_REPETITIONS):
            i_accuracy_scores: List[float] = []
            score_weights: List[int] = []  # Ns of test examples
            logo = LeaveOneGroupOut()
            for j, (train_is, test_is) in enumerate(
                logo.split(X, y, groups=logo_groups)
            ):
                X_train: pd.DataFrame = X.iloc[lambda x: train_is, :]
                y_train: np.ndarray = y[train_is]
                X_test: pd.DataFrame = X.iloc[lambda x: test_is, :]
                y_test: np.ndarray = y[test_is]

                if deep:
                    model.fit(
                        X_train,
                        y_train,
                        validation_data=(X_test, y_test),
                        epochs=N_EPOCHS,
                        batch_size=BATCH_SIZE,
                        shuffle=SHUFFLE,
                        verbose=VERBOSE,
                    )
                else:
                    model.fit(X_train, y_train)
                y_pred = model.predict(X_test)

                accuracy: float = accuracy_score(y_test, y_pred)
                i_accuracy_scores.append(accuracy)
                score_weights.append(len(y_test))

            accuracy: float = sum(
                score * weight
                for score, weight in zip(i_accuracy_scores, score_weights)
            ) / sum(score_weights)

            accuracy_scores.append(accuracy)
        accuracy = np.mean(accuracy_scores)
        print(
            f"{model_name} accuracy: {accuracy:.2%} (features: {feature_type}, selection: {select})"
        )

## CogLoad

### Majority

In [9]:
test_model(DummyClassifier(), "cogload")

Testing DummyClassifier
DummyClassifier accuracy: 46.83% (features: raw, selection: OFF)
DummyClassifier accuracy: 46.83% (features: raw, selection: ON)
DummyClassifier accuracy: 46.83% (features: 0-1, selection: OFF)
DummyClassifier accuracy: 46.83% (features: 0-1, selection: ON)
DummyClassifier accuracy: 46.83% (features: std, selection: OFF)
DummyClassifier accuracy: 46.83% (features: std, selection: ON)


### Random forest

In [12]:
test_model(RandomForestClassifier(), "cogload")

Testing RandomForestClassifier
RandomForestClassifier accuracy: 62.89% (features: raw, selection: OFF)
RandomForestClassifier accuracy: 61.91% (features: raw, selection: ON)
RandomForestClassifier accuracy: 63.78% (features: 0-1, selection: OFF)
RandomForestClassifier accuracy: 63.63% (features: 0-1, selection: ON)
RandomForestClassifier accuracy: 66.79% (features: std, selection: OFF)
RandomForestClassifier accuracy: 66.67% (features: std, selection: ON)


### AdaBoost

In [13]:
test_model(AdaBoostClassifier(), "cogload")

Testing AdaBoostClassifier
AdaBoostClassifier accuracy: 60.57% (features: raw, selection: OFF)
AdaBoostClassifier accuracy: 58.78% (features: raw, selection: ON)
AdaBoostClassifier accuracy: 58.78% (features: 0-1, selection: OFF)
AdaBoostClassifier accuracy: 60.93% (features: 0-1, selection: ON)
AdaBoostClassifier accuracy: 59.38% (features: std, selection: OFF)
AdaBoostClassifier accuracy: 62.37% (features: std, selection: ON)


### *k*-Nearest neighbors

In [14]:
test_model(KNeighborsClassifier(), "cogload")

Testing KNeighborsClassifier
KNeighborsClassifier accuracy: 55.91% (features: raw, selection: OFF)
KNeighborsClassifier accuracy: 55.08% (features: raw, selection: ON)
KNeighborsClassifier accuracy: 59.98% (features: 0-1, selection: OFF)
KNeighborsClassifier accuracy: 59.86% (features: 0-1, selection: ON)
KNeighborsClassifier accuracy: 59.98% (features: std, selection: OFF)
KNeighborsClassifier accuracy: 61.17% (features: std, selection: ON)


### Naïve Bayes

In [15]:
test_model(GaussianNB(), "cogload")

Testing GaussianNB
GaussianNB accuracy: 61.41% (features: raw, selection: OFF)
GaussianNB accuracy: 61.53% (features: raw, selection: ON)
GaussianNB accuracy: 62.60% (features: 0-1, selection: OFF)
GaussianNB accuracy: 62.96% (features: 0-1, selection: ON)
GaussianNB accuracy: 63.08% (features: std, selection: OFF)
GaussianNB accuracy: 63.56% (features: std, selection: ON)


### Decision tree

In [16]:
test_model(DecisionTreeClassifier(max_depth=2), "cogload")

Testing DecisionTreeClassifier(max_depth=2)
DecisionTreeClassifier(max_depth=2) accuracy: 61.89% (features: raw, selection: OFF)
DecisionTreeClassifier(max_depth=2) accuracy: 59.62% (features: raw, selection: ON)
DecisionTreeClassifier(max_depth=2) accuracy: 64.04% (features: 0-1, selection: OFF)
DecisionTreeClassifier(max_depth=2) accuracy: 64.76% (features: 0-1, selection: ON)
DecisionTreeClassifier(max_depth=2) accuracy: 62.49% (features: std, selection: OFF)
DecisionTreeClassifier(max_depth=2) accuracy: 63.44% (features: std, selection: ON)


### Logistic regression

In [17]:
test_model(LogisticRegression(), "cogload")

Testing LogisticRegression
LogisticRegression accuracy: 61.05% (features: raw, selection: OFF)
LogisticRegression accuracy: 61.41% (features: raw, selection: ON)
LogisticRegression accuracy: 62.37% (features: 0-1, selection: OFF)
LogisticRegression accuracy: 63.68% (features: 0-1, selection: ON)
LogisticRegression accuracy: 65.23% (features: std, selection: OFF)
LogisticRegression accuracy: 66.07% (features: std, selection: ON)


### Bagging

In [18]:
test_model(BaggingClassifier(), "cogload")

Testing BaggingClassifier
BaggingClassifier accuracy: 58.49% (features: raw, selection: OFF)
BaggingClassifier accuracy: 58.57% (features: raw, selection: ON)
BaggingClassifier accuracy: 61.84% (features: 0-1, selection: OFF)
BaggingClassifier accuracy: 61.08% (features: 0-1, selection: ON)
BaggingClassifier accuracy: 61.82% (features: std, selection: OFF)
BaggingClassifier accuracy: 62.96% (features: std, selection: ON)


### XGBoost

In [19]:
test_model(XGBClassifier(n_jobs=-1), "cogload")

Testing XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=-1, num_parallel_tree=None,
              predictor=None, random_state=None, ...)
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=Non

## Snake

### Majority

In [22]:
test_model(DummyClassifier(), "snake")

Testing DummyClassifier
DummyClassifier accuracy: 50.00% (features: raw, selection: OFF)
DummyClassifier accuracy: 50.00% (features: raw, selection: ON)
DummyClassifier accuracy: 50.00% (features: 0-1, selection: OFF)
DummyClassifier accuracy: 50.00% (features: 0-1, selection: ON)
DummyClassifier accuracy: 50.00% (features: std, selection: OFF)
DummyClassifier accuracy: 50.00% (features: std, selection: ON)


### Random forest

In [23]:
test_model(RandomForestClassifier(), "snake")

Testing RandomForestClassifier
RandomForestClassifier accuracy: 61.67% (features: raw, selection: OFF)
RandomForestClassifier accuracy: 73.33% (features: raw, selection: ON)
RandomForestClassifier accuracy: 74.17% (features: 0-1, selection: OFF)
RandomForestClassifier accuracy: 77.50% (features: 0-1, selection: ON)
RandomForestClassifier accuracy: 75.00% (features: std, selection: OFF)
RandomForestClassifier accuracy: 77.50% (features: std, selection: ON)


### AdaBoost

In [24]:
test_model(AdaBoostClassifier(), "snake")

Testing AdaBoostClassifier
AdaBoostClassifier accuracy: 59.17% (features: raw, selection: OFF)
AdaBoostClassifier accuracy: 58.33% (features: raw, selection: ON)
AdaBoostClassifier accuracy: 58.33% (features: 0-1, selection: OFF)
AdaBoostClassifier accuracy: 78.33% (features: 0-1, selection: ON)
AdaBoostClassifier accuracy: 65.00% (features: std, selection: OFF)
AdaBoostClassifier accuracy: 74.17% (features: std, selection: ON)


### *k*-Nearest neighbors

In [25]:
test_model(KNeighborsClassifier(), "snake")

Testing KNeighborsClassifier
KNeighborsClassifier accuracy: 54.17% (features: raw, selection: OFF)
KNeighborsClassifier accuracy: 54.17% (features: raw, selection: ON)
KNeighborsClassifier accuracy: 66.67% (features: 0-1, selection: OFF)
KNeighborsClassifier accuracy: 75.00% (features: 0-1, selection: ON)
KNeighborsClassifier accuracy: 66.67% (features: std, selection: OFF)
KNeighborsClassifier accuracy: 87.50% (features: std, selection: ON)


### Naïve Bayes

In [26]:
test_model(GaussianNB(), "snake")

Testing GaussianNB
GaussianNB accuracy: 54.17% (features: raw, selection: OFF)
GaussianNB accuracy: 62.50% (features: raw, selection: ON)
GaussianNB accuracy: 62.50% (features: 0-1, selection: OFF)
GaussianNB accuracy: 70.83% (features: 0-1, selection: ON)
GaussianNB accuracy: 66.67% (features: std, selection: OFF)
GaussianNB accuracy: 75.00% (features: std, selection: ON)


### Decision tree

In [19]:
test_model(DecisionTreeClassifier(max_depth=6), "snake")

Testing DecisionTreeClassifier(max_depth=6)
DecisionTreeClassifier(max_depth=6) accuracy: 49.17% (features: raw, selection: OFF)
DecisionTreeClassifier(max_depth=6) accuracy: 52.50% (features: raw, selection: ON)
DecisionTreeClassifier(max_depth=6) accuracy: 63.33% (features: 0-1, selection: OFF)
DecisionTreeClassifier(max_depth=6) accuracy: 62.50% (features: 0-1, selection: ON)
DecisionTreeClassifier(max_depth=6) accuracy: 52.50% (features: std, selection: OFF)
DecisionTreeClassifier(max_depth=6) accuracy: 56.67% (features: std, selection: ON)


### Logistic regression

In [28]:
test_model(LogisticRegression(), "snake")

Testing LogisticRegression
LogisticRegression accuracy: 66.67% (features: raw, selection: OFF)
LogisticRegression accuracy: 50.00% (features: raw, selection: ON)
LogisticRegression accuracy: 75.00% (features: 0-1, selection: OFF)
LogisticRegression accuracy: 83.33% (features: 0-1, selection: ON)
LogisticRegression accuracy: 75.00% (features: std, selection: OFF)
LogisticRegression accuracy: 91.67% (features: std, selection: ON)


### Bagging

In [29]:
test_model(BaggingClassifier(), "snake")

Testing BaggingClassifier
BaggingClassifier accuracy: 59.17% (features: raw, selection: OFF)
BaggingClassifier accuracy: 57.50% (features: raw, selection: ON)
BaggingClassifier accuracy: 64.17% (features: 0-1, selection: OFF)
BaggingClassifier accuracy: 70.00% (features: 0-1, selection: ON)
BaggingClassifier accuracy: 63.33% (features: std, selection: OFF)
BaggingClassifier accuracy: 68.33% (features: std, selection: ON)


### XGBoost

In [20]:
test_model(XGBClassifier(n_jobs=-1), "snake")

Testing XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=-1, num_parallel_tree=None,
              predictor=None, random_state=None, ...)
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=Non

## Deep models

In [33]:
def mlp_model(input_shape):
    learning_rate = 0.0001
    decay = 0.0001
    dropout_rate = 0.25
    n_kernels_1 = 512
    n_kernels_2 = 32
    n_classes = 2

    _input = Input(shape=input_shape, dtype="float", name="m_input")

    x1 = Dense(
        n_kernels_1, activation="relu", name="dense1", kernel_initializer=GlorotNormal()
    )(_input)

    x2 = Dropout(rate=dropout_rate)(x1)

    x3 = Dense(
        n_kernels_2, activation="relu", name="dense2", kernel_initializer=GlorotNormal()
    )(x2)

    output = Dense(
        n_classes,
        activation="sigmoid",
        name="output",
        kernel_initializer=GlorotNormal(),
    )(x3)

    model = Model(inputs=[_input], outputs=[output])

    optimizer = adam_v2.Adam(learning_rate=learning_rate, decay=decay)

    model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["acc"])

    return model

In [34]:
print(mlp_model(120).summary())

Model: "model_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 m_input (InputLayer)        [(None, 120)]             0         
                                                                 
 dense1 (Dense)              (None, 512)               61952     
                                                                 
 dropout_16 (Dropout)        (None, 512)               0         
                                                                 
 dense2 (Dense)              (None, 32)                16416     
                                                                 
 output (Dense)              (None, 2)                 66        
                                                                 
Total params: 78,434
Trainable params: 78,434
Non-trainable params: 0
_________________________________________________________________
None


In [35]:
def test_mlp(dataset: str):
    data_dir: str = COGLOAD_DIR if dataset.lower() == "cogload" else SNAKE_DIR
    logo_groups: List[int] = (
        LOGO_GROUPS_COGLOAD if dataset.lower() == "cogload" else LOGO_GROUPS_SNAKE
    )

    print(f"Testing multilayer perceptron")

    X, y = get_io(
        features_path=f"final_data/{data_dir}/segments.csv",
        labels_path=f"final_data/{data_dir}/labels.csv",
        dataset=dataset,
    )

    accuracy_scores: List[float] = []
    for i in range(N_REPETITIONS):
        i_accuracy_scores: List[float] = []
        score_weights: List[int] = []  # Ns of test examples
        logo = LeaveOneGroupOut()
        for j, (train_is, test_is) in enumerate(logo.split(X, y, groups=logo_groups)):
            X_train: pd.DataFrame = X.iloc[lambda x: train_is, :]
            y_train: np.ndarray = y[train_is]
            X_test: pd.DataFrame = X.iloc[lambda x: test_is, :]
            y_test: np.ndarray = y[test_is]

            model = mlp_model(X_train.shape[1])
            model.fit(
                X_train,
                y_train,
                validation_data=(X_test, y_test),
                epochs=N_EPOCHS,
                batch_size=BATCH_SIZE,
                shuffle=SHUFFLE,
                verbose=VERBOSE,
            )
            y_pred = np.argmax(model.predict(X_test), axis=1)
            print(y_pred)

            accuracy: float = accuracy_score(y_test, y_pred)
            i_accuracy_scores.append(accuracy)
            score_weights.append(len(y_test))

        accuracy: float = sum(
            score * weight for score, weight in zip(i_accuracy_scores, score_weights)
        ) / sum(score_weights)

        accuracy_scores.append(accuracy)

    accuracy = np.mean(accuracy_scores)
    print(f"Multilayer perceptron accuracy: {accuracy:.2%}")

### Multilayer perceptron

In [36]:
test_mlp("cogload")

Testing multilayer perceptron
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/

### Multitask learning

In [37]:
test_mlp("snake")

Testing multilayer perceptron
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[0 0]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/5