# A Classification Model Designed for Multiclass Pose Detection with Scklearn

## Objective: Given a dataset of coordinates of mediapipe holistic/hand detection solutions, the model is designed to learn from the past gestures on dataset and detect the gestures live on webcam.

## Dataset: There are two types of datasets which can be used with this model.

- Holistic Dataset: The Mediapipe Holistic solutions offer detection of the position of a person's body in a webcam/image frame with 501 point percision with each point haveing x,y,z, visibility coordinates. The holistic solution detects the pose, face and both hands of the person.
  
![alt text](https://miro.medium.com/max/1400/1*yerbuR_F4PI7SKyvTneDiA.png "Pose Solution Model")

----

- Hand Dataset: The Mediapipe Hands solutions offer detection of the position of a person's hand/hands in a webcam/image frame with 21 point percision with each point haveing x,y, z (depth) coordinates.

![alt text](https://miro.medium.com/max/1400/1*Ytz19eku7HLGiJZysC6Hdg.png "Hands Solution Model")


### Install and Import Dependencies

In [None]:
!pip install mediapipe opencv-python pandas scikit-learn

### Preproccesing

In [41]:
# import dependencies
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import mediapipe as mp 
import cv2

In [42]:
# Make a dataframe
df = pd.read_csv('hands-coords.csv')

In [43]:
df.groupby(df['class']).count()


Unnamed: 0_level_0,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,...,z18,x19,y19,z19,x20,y20,z20,x21,y21,z21
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CLOSE,1430,1430,1430,1430,1430,1430,1430,1430,1430,1430,...,1430,1430,1430,1430,1430,1430,1430,1430,1430,1430
FUCK,960,960,960,960,960,960,960,960,960,960,...,960,960,960,960,960,960,960,960,960,960
OPEN,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793,...,1793,1793,1793,1793,1793,1793,1793,1793,1793,1793
WOLF,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,...,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026


In [44]:
df.shape

(5209, 64)

In [45]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [46]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=1234)

### Model Training

In [47]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [48]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),

}

In [49]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

In [50]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

## Model Evaluation and Serialization

In [51]:
from sklearn.metrics import accuracy_score
import pickle

In [52]:
# Get Model Accuracies
model_accuracy = {}
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    model_accuracy[algo] = accuracy_score(y_test, yhat)

In [53]:
model_accuracy

{'lr': 0.9980806142034548,
 'rc': 0.9929622520793346,
 'rf': 0.9987204094689699,
 'gb': 0.9980806142034548}

In [54]:
# Choose the best model
best_model = max(model_accuracy)
best_model

'rf'

In [55]:
# Save the model
with open('body_language_decoder.pkl', 'wb') as f:
    pickle.dump(fit_models[best_model], f)

# Make Detections with the Model

In [56]:
with open('body_language_decoder.pkl', 'rb') as f:
    model = pickle.load(f)

In [57]:
model

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('randomforestclassifier', RandomForestClassifier())])

#### Holistic Detections ( Only use with the holistic dataset)

In [58]:
mp_drawing = mp.solutions.drawing_utils # Drawing helper
mp_holistic = mp.solutions.holistic

cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = holistic.process(image)
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                 )
        
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            # Concate rows
            row = pose_row+face_row

            # Make Detections
            X = pd.DataFrame([row])
            body_language_class = model.predict(X)[0]
            body_language_prob = model.predict_proba(X)[0]
            
            # Grab ear coords
            coords = tuple(np.multiply(
                            np.array(
                                (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
                                 results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y))
                        , [640,480]).astype(int))
            
            cv2.rectangle(image, 
                          (coords[0], coords[1]+5), 
                          (coords[0]+len(body_language_class)*20, coords[1]-30), 
                          (245, 117, 16), -1)
            cv2.putText(image, body_language_class, coords, 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Get status box
            cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
            
            # Display Class
            cv2.putText(image, 'CLASS'
                        , (95,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, body_language_class.split(' ')[0]
                        , (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Display Probability
            cv2.putText(image, 'PROB'
                        , (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, str(round(body_language_prob[np.argmax(body_language_prob)],2))
                        , (10,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
        except:
            pass
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


#### Hands Detections ( Only use with the hands dataset)

In [59]:
import traceback

mp_drawing = mp.solutions.drawing_utils # Drawing helper
mp_hands = mp.solutions.hands

cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_hands.Hands(max_num_hands=1) as hand:

    while cap.isOpened():
        ret, frame = cap.read()

        # Recolor the image
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        # Make Detections
        results = hand.process(image)

        # Landmark detection
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        if results.multi_hand_landmarks:
            for handLms in results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(image, handLms, mp_hands.HAND_CONNECTIONS)

        if results.multi_hand_landmarks:
            # Export coordinates
            try:
                # Extracting hand landmarks
                detected_hand = results.multi_hand_landmarks[0].landmark
                row = list(np.array([[landmark.x, landmark.y, landmark.z] for landmark in detected_hand]).flatten())

                # Make Detections
                X = pd.DataFrame([row])
                body_language_class = model.predict(X)[0]
                body_language_prob = model.predict_proba(X)[0]
                
                # Get status box
                cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
                
                # Display Class
                cv2.putText(image, 'CLASS'
                            , (95,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, body_language_class.split(' ')[0]
                            , (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                
                # Display Probability
                cv2.putText(image, 'PROB'
                            , (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, str(round(body_language_prob[np.argmax(body_language_prob)],2))
                            , (10,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                
            except:
                traceback.print_exc()
                            
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()