# 0. Install and Import Dependencies

In [None]:
# !pip install mediapipe opencv-python pandas scikit-learn

In [1]:
import mediapipe as mp # Import mediapipe
import cv2 # Import opencv

In [2]:
mp_drawing = mp.solutions.drawing_utils # Drawing helpers
mp_holistic = mp.solutions.holistic # Mediapipe Solutions

# 1. Capture Landmarks & Export to CSV
<!--<img src="https://i.imgur.com/8bForKY.png">-->
<!--<img src="https://i.imgur.com/AzKNp7A.png">-->

In [3]:
import csv
import os
import numpy as np

In [4]:
num_coords = 75
num_coords

75

In [5]:
landmarks = ['class']
for val in range(1, num_coords + 1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

In [None]:
with open('coords.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

Step 1: Choose an action from the list below by rewriting class_name variable

In [6]:
actions = np.array(['raise_hand', 'thumbs_up', 'thumbs_down', 'cheer', 'cross_arms', 'clap'])
class_name = "clap" # choose from list above

Step 2: Run the code below for any amount of time, press q to quit

Step 3: Repeat Step 1 with a different action until all actions completed

In [None]:
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = holistic.process(image)
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                                 mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                                 )
        
        # 2. Draw right hand landmarks
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                                 )

        # 3. Draw left hand landmarks
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                                 )

        # 4. Draw pose detection landmarks
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Left Hand landmarks
            lhand = results.left_hand_landmarks.landmark
            lhand_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in lhand]).flatten())
            
            # Extract Right Hand landmarks
            rhand = results.right_hand_landmarks.landmark
            rhand_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in rhand]).flatten())
            
            # Concate rows
            row = pose_row + lhand_row + rhand_row
            
            # Append class name 
            row.insert(0, class_name)
            
            # Export to CSV
            with open('coords.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row) 
            
        except:
            pass
                        
        cv2.imshow('Raw Webcam Feed', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

# 3. Train Custom Model Using Scikit Learn

## 3.1 Read in Collected Data and Process

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [12]:
df = pd.read_csv('coords.csv')

In [13]:
df.head()

Unnamed: 0,raise_hand,0.544867217540741,0.536960303783417,-0.707772016525269,0.999994218349457,0.566478848457336,0.480075299739838,-0.646440982818604,0.999987244606018,0.580689787864685,...,-0.140474811196327,0.507,0.220968365669251,0.989262938499451,-0.141931176185608,0.508,0.230266228318214,1.00255727767944,-0.141073063015938,0.509
0,raise_hand,0.546196,0.540488,-0.685029,0.999994,0.567369,0.483232,-0.617822,0.999988,0.58143,...,-0.120326,0,0.205334,0.967968,-0.125019,0,0.218042,0.992212,-0.125968,0
1,raise_hand,0.546502,0.540943,-0.696263,0.999994,0.567782,0.483546,-0.632721,0.999988,0.581814,...,-0.119759,0,0.195935,0.969464,-0.122299,0,0.202761,0.996033,-0.12193,0
2,raise_hand,0.547892,0.541529,-0.633767,0.999995,0.568902,0.484224,-0.57137,0.999989,0.582736,...,-0.10871,0,0.199505,0.968332,-0.109723,0,0.20625,0.993365,-0.108446,0
3,raise_hand,0.547707,0.541499,-0.781555,0.999994,0.568979,0.484196,-0.719829,0.999989,0.582876,...,-0.105489,0,0.210618,0.977223,-0.104734,0,0.217494,0.995953,-0.103394,0
4,raise_hand,0.549083,0.542887,-0.73064,0.999994,0.569918,0.485704,-0.669111,0.99999,0.583576,...,-0.121371,0,0.213883,0.975762,-0.124379,0,0.222438,0.992003,-0.12561,0


In [14]:
df.tail()

Unnamed: 0,raise_hand,0.544867217540741,0.536960303783417,-0.707772016525269,0.999994218349457,0.566478848457336,0.480075299739838,-0.646440982818604,0.999987244606018,0.580689787864685,...,-0.140474811196327,0.507,0.220968365669251,0.989262938499451,-0.141931176185608,0.508,0.230266228318214,1.00255727767944,-0.141073063015938,0.509
710,cheer,0.542578,0.530349,-0.67852,0.999992,0.570667,0.479476,-0.587828,0.999985,0.58694,...,0.006168,0,0.246501,0.036008,0.002295,0,0.255794,0.008575,-0.000151,0
711,cheer,0.543096,0.530347,-0.679038,0.999992,0.571286,0.479248,-0.590333,0.999985,0.587669,...,0.010461,0,0.244034,0.034173,0.007217,0,0.252861,0.007693,0.005331,0
712,cheer,0.543103,0.530316,-0.675361,0.999992,0.57145,0.479165,-0.589569,0.999985,0.587907,...,0.003937,0,0.242888,0.023611,0.000258,0,0.252928,-0.004916,-0.001621,0
713,cheer,0.5435,0.531399,-0.65174,0.999992,0.571887,0.480035,-0.570431,0.999986,0.588159,...,-0.028212,0,0.225817,0.046541,-0.035188,0,0.235268,0.014611,-0.039284,0
714,cheer,0.544668,0.531991,-0.632472,0.999992,0.572765,0.480487,-0.552969,0.999986,0.58872,...,-0.023013,0,0.224404,0.047853,-0.029372,0,0.233167,0.016232,-0.032931,0


In [15]:
df.shape

(715, 2173)

In [17]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [18]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)

## 3.2 Train Machine Learning Classification Model

In [19]:
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [20]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [21]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [22]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [None]:
fit_models['rc'].predict(X_test)

## 3.3 Evaluate and Serialize Model 

In [23]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle 

In [24]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

lr 1.0
rc 1.0
rf 1.0
gb 0.9953488372093023


In [25]:
fit_models['rf'].predict(X_test)

array(['clap', 'clap', 'cheer', 'thumbs_down', 'clap', 'cross_arms',
       'thumbs_up', 'thumbs_down', 'thumbs_down', 'clap', 'clap',
       'thumbs_down', 'clap', 'thumbs_up', 'thumbs_down', 'cheer',
       'thumbs_down', 'clap', 'thumbs_up', 'thumbs_down', 'thumbs_down',
       'raise_hand', 'clap', 'clap', 'clap', 'thumbs_down', 'clap',
       'thumbs_up', 'clap', 'clap', 'thumbs_down', 'thumbs_up',
       'thumbs_down', 'thumbs_up', 'thumbs_up', 'thumbs_down',
       'raise_hand', 'thumbs_up', 'thumbs_down', 'thumbs_up', 'thumbs_up',
       'thumbs_down', 'cross_arms', 'clap', 'thumbs_down', 'thumbs_up',
       'thumbs_up', 'thumbs_down', 'cheer', 'thumbs_up', 'cheer', 'cheer',
       'clap', 'thumbs_down', 'raise_hand', 'raise_hand', 'raise_hand',
       'thumbs_down', 'cheer', 'clap', 'raise_hand', 'thumbs_up',
       'thumbs_up', 'thumbs_up', 'raise_hand', 'thumbs_down',
       'thumbs_down', 'cheer', 'thumbs_down', 'raise_hand', 'clap',
       'thumbs_down', 'thumbs_up', 'thum

In [26]:
y_test

522           clap
473           clap
646          cheer
267    thumbs_down
444           clap
          ...     
587           clap
40      raise_hand
149      thumbs_up
95      raise_hand
466           clap
Name: raise_hand, Length: 215, dtype: object

In [27]:
with open('body_language.pkl', 'wb') as f:
    pickle.dump(fit_models, f)

# 4. Make Detections with Model

In [10]:
with open('body_language.pkl', 'rb') as f:
    model = pickle.load(f)

EOFError: Ran out of input

In [28]:
model

In [31]:
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = holistic.process(image)
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                                 mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                                 )
        
        # 2. Draw right hand landmarks
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                                 )

        # 3. Draw left hand landmarks
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                                 )

        # 4. Draw pose detection landmarks
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose Landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
           
            # Extract Left Hand landmarks
            lhand = results.left_hand_landmarks.landmark
            lhand_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in lhand]).flatten())
            
            # Extract Right Hand landmarks
            rhand = results.right_hand_landmarks.landmark
            rhand_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in rhand]).flatten())
            
            # Concate rows
            row = pose_row + lhand_row + rhand_row

            # Make Detections
            X = pd.DataFrame([row])
            body_language_class = model.predict(X)[0]
            body_language_prob = model.predict_proba(X)[0]
            print(body_language_class, body_language_prob)
            
            # Grab ear coords
            coords = tuple(np.multiply(
                            np.array(
                                (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
                                 results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y))
                        , [640, 480]).astype(int))
            
            cv2.rectangle(image, 
                          (coords[0], coords[1] + 5), 
                          (coords[0] + len(body_language_class) * 20, coords[1] - 30), 
                          (245, 117, 16), -1)
            cv2.putText(image, body_language_class, coords, 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Get status box
            cv2.rectangle(image, (0, 0), (250, 60), (245, 117, 16), -1)
            
            # Display Class
            cv2.putText(image, 'CLASS'
                        , (95, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, body_language_class.split(' ')[0]
                        , (90, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Display Probability
            cv2.putText(image, 'PROB'
                        , (15, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, str(round(body_language_prob[np.argmax(body_language_prob)],2))
                        , (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
        except:
            pass
                        
        cv2.imshow('Raw Webcam Feed', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()



In [None]:
tuple(np.multiply(np.array((results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y)), [640, 480]).astype(int))