In [1]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
import csv
import os

import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix

#drawing pose and landmarks
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

In [4]:
print(os.getcwd())

C:\Users\Ria\Downloads


In [13]:
def describe_dataset_points(dataset_path):
    data = pd.read_csv(dataset_path)
        #printing headers
    print(f"Headers: {list(data.columns.values)}")

    duplicate = data[data.duplicated()] #A list of duplicated data
    # .duplicated() returns a boolean of T/F for duplicate, non duplicate rows

    #print the column, axis = 1
    print(f"Duplicate Rows : {len(duplicate.sum(axis=1))}")

    remove_duplicate = pd.read_csv(dataset_path)

    remove_duplicate.drop_duplicates(keep = 'first', inplace=True)
    remove_duplicate.to_csv(f"cleaned_train.csv", sep=',', encoding='utf-8', index=False)
    return remove_duplicate
    

In [2]:
def round_up_metric_results(results) -> list:
    #Round up metrics results such as precision score, accuracy score, recall score etc.
    return list(map(lambda el: round(el, 3), results))

In [14]:
# Extract features and class
df = describe_dataset_points("train.csv")
X = df.drop("label", axis=1)
#print("x:", X)
y = df["label"].astype("int")
#print("y:", y)
sc = StandardScaler()
X = pd.DataFrame(sc.fit_transform(X))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
y_test.head(3)

Headers: ['label', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'l

291    0
59     0
6      0
Name: label, dtype: int32

In [15]:
algorithms =[("LR", LogisticRegression()),
         ("DTC", DecisionTreeClassifier()),
         ("SVC", SVC(probability=True))]

models = {}
final_results = []

for name, model in algorithms:
    trained_model = model.fit(X_train, y_train)
    models[name] = trained_model

    # Evaluate model
    model_results = model.predict(X_test)
    #print("mr",model_results)
    #print("y", y_test)
    p_score = precision_score(y_test, model_results, average=None, labels=[0,1])
    a_score = accuracy_score(y_test, model_results)
    r_score = recall_score(y_test, model_results, average=None, labels=[0, 1])
    f1_score_result = f1_score(y_test, model_results, average=None, labels=[0, 1])
    cm = confusion_matrix(y_test, model_results, labels=[0, 1, 2])
    final_results.append(( name,  round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))
final_results
print(models)
pd.DataFrame(final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score", "Confusion Matrix"])


{'LR': LogisticRegression(), 'DTC': DecisionTreeClassifier(), 'SVC': SVC(probability=True)}


Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score,Confusion Matrix
0,LR,"[0.933, 1.0]",0.958333,"[1.0, 0.9]","[0.966, 0.947]","[[28, 0, 0], [2, 18, 0], [0, 0, 0]]"
1,DTC,"[0.897, 0.895]",0.895833,"[0.929, 0.85]","[0.912, 0.872]","[[26, 2, 0], [3, 17, 0], [0, 0, 0]]"
2,SVC,"[0.963, 0.905]",0.9375,"[0.929, 0.95]","[0.945, 0.927]","[[26, 2, 0], [1, 19, 0], [0, 0, 0]]"


In [16]:
with open("./all_sklearn.pkl", "wb") as f:
    pickle.dump(models["LR"], f)
# Dump input scaler
with open("./input_scalerall.pkl", "wb") as f:
    pickle.dump(sc, f)