In [44]:
from sklearn.compose import ColumnTransformer, make_column_transformer, make_column_selector
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
import pandas as pd
from sklearn.pipeline import Pipeline

In [150]:
def normalize(df: pd.DataFrame) -> pd.DataFrame:
    mean = df.mean(axis=1)
    std = df.std(axis=1)
    mean_diff = df.subtract(mean, axis='index')
    return mean_diff.divide(std, axis='index')

In [65]:
LANDMARK_NAME = [
    "WRIST",
    "THUMB_CMC",
    "THUMB_MCP",
    "THUMB_IP",
    "THUMB_TIP",
    "INDEX_FINGER_MCP",
    "INDEX_FINGER_PIP",
    "INDEX_FINGER_DIP",
    "INDEX_FINGER_TIP",
    "MIDDLE_FINGER_MCP",
    "MIDDLE_FINGER_PIP",
    "MIDDLE_FINGER_DIP",
    "MIDDLE_FINGER_TIP",
    "RING_FINGER_MCP",
    "RING_FINGER_PIP",
    "RING_FINGER_DIP",
    "RING_FINGER_TIP",
    "PINKY_MCP",
    "PINKY_PIP",
    "PINKY_DIP",
    "PINKY_TIP"
]

LANDMARK_NAME_XYZ = [name + coordinate for name in LANDMARK_NAME for coordinate in ["_X", "_Y", "_Z"]]

In [124]:
# https://scikit-learn.org/stable/auto_examples/miscellaneous/plot_set_output.html#sphx-glr-auto-examples-miscellaneous-plot-set-output-py
def create_preprocessing() -> Pipeline:
    """

    """
    frame_normalizer = ColumnTransformer(
        [
            ("x", FunctionTransformer(normalize, feature_names_out="one-to-one"),
             make_column_selector(pattern="_(?:x|X)$")),
            ("y", FunctionTransformer(normalize, feature_names_out="one-to-one"),
             make_column_selector(pattern="_(?:y|Y)$")),
            ("z", FunctionTransformer(normalize, feature_names_out="one-to-one"),
             make_column_selector(pattern="_(?:z|Z)$")),
        ],
        remainder='drop',
        verbose_feature_names_out=False
    )
    
    column_reorder = FunctionTransformer(
        lambda df: df[LANDMARK_NAME_XYZ], feature_names_out=lambda x,y: LANDMARK_NAME_XYZ
    )
    
    return Pipeline([
        ("normalize_frame", frame_normalizer),
        ("reorder_columns", column_reorder)
    ]).set_output(transform="pandas")

## Check KNN

In [144]:
from sklearn.neighbors import KNeighborsClassifier

preproc = create_preprocessing()
full_pipeline = Pipeline([("preproc", preproc), ("knn", KNeighborsClassifier())])

In [149]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

data = pd.read_csv("raw_data/images_ds.csv").dropna()
X = data.drop(columns = ["Unnamed: 0", "PATH", "TARGET"])
y = data["TARGET"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

full_pipeline.fit(X_train, y_train_encoded)

display(full_pipeline.score(X_train, y_train_encoded))
display(full_pipeline.score(X_test, y_test_encoded))

0.9670710571923743

0.9488272921108742

In [151]:
data = pd.read_csv("raw_data/dataset-3.csv").dropna()
X = data.drop(columns = ["Unnamed: 0", "PATH", "TARGET"])
y = data["TARGET"]


In [152]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

full_pipeline.fit(X_train, y_train_encoded)

display(full_pipeline.score(X_train, y_train_encoded))
display(full_pipeline.score(X_test, y_test_encoded))

0.9795534406604031

0.9691566711434261

## Check CNN

In [None]:
# See how to plug Keras into a SKLearn Pipeline 
# https://towardsdatascience.com/are-you-using-the-scikit-learn-wrapper-in-your-keras-deep-learning-model-a3005696ff38
# https://www.adriangb.com/scikeras/stable/