In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("data/yoga_ds_Xy.csv")
df.head()

Unnamed: 0,pose,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,...,right_hip_x,right_hip_y,left_knee_x,left_knee_y,right_knee_x,right_knee_y,left_ankle_x,left_ankle_y,right_ankle_x,right_ankle_y
0,hanumanasana,0.261698,0.512255,0.237344,0.517355,0.238441,0.486026,0.242483,0.497878,0.248089,...,0.651845,0.414223,0.678855,0.221365,0.722782,0.674401,0.68382,0.050615,0.79544,0.92466
1,hanumanasana,0.394864,0.491497,0.370693,0.487102,0.375714,0.46112,0.381811,0.450995,0.40014,...,0.807697,0.396388,0.854739,0.710447,0.805349,0.17453,0.908731,0.943022,0.702517,0.003883
2,hanumanasana,0.430713,0.75343,0.439265,0.780632,0.419011,0.763575,0.482887,0.78736,0.431683,...,0.622483,0.48675,0.743239,0.589924,0.71926,0.579058,0.728648,0.731057,0.723543,0.73271
3,hanumanasana,0.369197,0.440116,0.368485,0.4606,0.366005,0.458955,0.407975,0.471238,0.402241,...,0.778978,0.435595,0.848863,0.726469,0.786196,0.241361,0.85416,0.963011,0.808341,0.036816
4,hanumanasana,0.262922,0.54876,0.245587,0.531681,0.244322,0.53235,0.257699,0.483209,0.25808,...,0.624212,0.459428,0.69059,0.708224,0.755988,0.29893,0.741362,0.910159,0.699116,0.09235


In [3]:
df.shape

(5994, 35)

In [4]:
TARGET_COL = "pose"
SEED = 42

In [5]:
X, y = df.drop(TARGET_COL, axis=1).to_numpy(), df[TARGET_COL].to_numpy()

In [6]:
print(f"{X.shape=},\n{y.shape=}")

X.shape=(5994, 34),
y.shape=(5994,)


In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=SEED)

In [8]:
print(f"{X_train.shape=},\n{y_train.shape=}\n\n{X_test.shape=},\n{y_test.shape=}")

X_train.shape=(4195, 34),
y_train.shape=(4195,)

X_test.shape=(1799, 34),
y_test.shape=(1799,)


In [9]:
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from tqdm import tqdm
import numpy as np


classifiers = [
    KNeighborsClassifier(),
    SVC(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    MLPClassifier(),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]

metrics_rows = []
for clf in tqdm(classifiers):
    clf_name = clf.__class__.__name__
    
    clf.fit(X_train, y_train)
    y_preds = clf.predict(X_test)

    clf_report = classification_report(y_test, y_preds, output_dict=True, zero_division=0)
    micro_f1 = f1_score(y_test, y_preds, average="micro")
    micro_precision = precision_score(y_test, y_preds, average="micro")
    micro_recall = recall_score(y_test, y_preds, average="micro")

    metrics_rows.append({
        "model": clf_name,
        "pose": "_overall",
        "f1": micro_f1,
        "precision": micro_precision,
        "recall": micro_recall,
        "support": np.nan,
    })
    for key, value in clf_report.items():
        if key == "accuracy":
            continue
        metrics_rows.append({
            "model": clf_name,
            "pose": key,
            "f1": value["f1-score"],
            "precision": value["precision"],
            "recall": value["recall"],
            "support": value["support"],
        })

100%|██████████| 8/8 [00:17<00:00,  2.22s/it]


In [10]:
metrics_df = pd.DataFrame.from_records(metrics_rows)
metrics_df

Unnamed: 0,model,pose,f1,precision,recall,support
0,KNeighborsClassifier,_overall,0.711506,0.711506,0.711506,
1,KNeighborsClassifier,adho mukha svanasana,0.913043,0.840000,1.000000,21.0
2,KNeighborsClassifier,adho mukha vriksasana,0.652174,0.535714,0.833333,18.0
3,KNeighborsClassifier,agnistambhasana,0.631579,0.666667,0.600000,10.0
4,KNeighborsClassifier,ananda balasana,0.857143,0.750000,1.000000,18.0
...,...,...,...,...,...,...
875,QuadraticDiscriminantAnalysis,vriksasana,0.642857,1.000000,0.473684,19.0
876,QuadraticDiscriminantAnalysis,vrischikasana,0.518519,0.583333,0.466667,15.0
877,QuadraticDiscriminantAnalysis,yoganidrasana,0.000000,0.000000,0.000000,14.0
878,QuadraticDiscriminantAnalysis,macro avg,0.339639,0.394996,0.369490,1799.0


In [11]:
metrics_df.sort_values(by=["pose", "f1"], inplace=True)
metrics_df.head(len(classifiers))

Unnamed: 0,model,pose,f1,precision,recall,support
550,AdaBoostClassifier,_overall,0.029461,0.029461,0.029461,
770,QuadraticDiscriminantAnalysis,_overall,0.437465,0.437465,0.437465,
220,DecisionTreeClassifier,_overall,0.496387,0.496387,0.496387,
660,GaussianNB,_overall,0.574208,0.574208,0.574208,
440,MLPClassifier,_overall,0.68149,0.68149,0.68149,
110,SVC,_overall,0.696498,0.696498,0.696498,
0,KNeighborsClassifier,_overall,0.711506,0.711506,0.711506,
330,RandomForestClassifier,_overall,0.714286,0.714286,0.714286,


In [12]:
metrics_df.to_csv("results/baseline_clfs_metrics.csv", index=False)