# Body Language Decoder

## Table of Content
0. [Install and Import Dependencies](#install)
1. [Detection using MediaPipe](#detection) 
2. [Feature Extraction](#feature-extraction) 
    1. [Write Columns Head in CSV File](#csv-header)
    2. [Extract Features of Assigned Class](#save-coordinates)
3. [Train Custom Model Using Scikit Learn](#model)
    1. [Load and Preprocess Input Data](#load-input)
    2. [Train Machine Learning Classification Models](#training)
    3. [Evaluate and Serialize Model](#evaluate)
4. [Real-time Detections with Model](#real-time-detection)

## 0. Install and Import Dependencies <a id="install"></a>

In [None]:
# !pip install mediapipe opencv-python pandas scikit-learn

In [1]:
from mediapipe import solutions as mp
import cv2
import csv
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import pickle
import os

## 1. Detection using MediaPipe <a id="detection"></a>

In [2]:
cap = cv2.VideoCapture(0)

# Initiate holistic model
with mp.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():
        success, frame = cap.read()

        if not success:
            break

        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Make detections
        results = holistic.process(image)

        # Recolor for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw face landmarks
        mp.drawing_utils.draw_landmarks(image, results.face_landmarks, mp.holistic.FACE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(242, 216, 175), thickness=1, circle_radius=1),
                                        mp.drawing_utils.DrawingSpec(color=(242, 216, 175), thickness=1))

        # Right hand
        mp.drawing_utils.draw_landmarks(image, results.right_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Left hand
        mp.drawing_utils.draw_landmarks(image, results.left_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Pose 
        mp.drawing_utils.draw_landmarks(image, results.pose_landmarks, mp.holistic.POSE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(65, 53, 31), thickness=2, circle_radius=2),
                                        mp.drawing_utils.DrawingSpec(color=(65, 53, 31), thickness=2))

        cv2.imshow("Holistic Model Detection", image)
        
        # Press "q" to exit
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [3]:
results.pose_landmarks

landmark {
  x: 0.5267259
  y: 0.6596185
  z: -1.5341202
  visibility: 0.9998993
}
landmark {
  x: 0.55522037
  y: 0.5783175
  z: -1.457382
  visibility: 0.9998686
}
landmark {
  x: 0.5771275
  y: 0.5786438
  z: -1.457639
  visibility: 0.99986154
}
landmark {
  x: 0.5975679
  y: 0.5778677
  z: -1.4578559
  visibility: 0.99981767
}
landmark {
  x: 0.4857009
  y: 0.5732176
  z: -1.4769529
  visibility: 0.9998871
}
landmark {
  x: 0.45699057
  y: 0.57228786
  z: -1.4770635
  visibility: 0.9998968
}
landmark {
  x: 0.43023258
  y: 0.5726263
  z: -1.4769832
  visibility: 0.99987715
}
landmark {
  x: 0.6167451
  y: 0.6059618
  z: -0.9424723
  visibility: 0.99983907
}
landmark {
  x: 0.39359233
  y: 0.6017649
  z: -1.0444511
  visibility: 0.99988544
}
landmark {
  x: 0.55645
  y: 0.73150635
  z: -1.3337765
  visibility: 0.99982435
}
landmark {
  x: 0.47358027
  y: 0.7281066
  z: -1.3573754
  visibility: 0.99985576
}
landmark {
  x: 0.7116631
  y: 0.9611046
  z: -0.59685177
  visibility: 0.985

## 2. Feature Extraction <a id="feature-extraction"></a>

### Write Columns Head in CSV File <a id="csv-header"></a>

In [4]:
num_coords = len(results.pose_landmarks.landmark) + len(results.face_landmarks.landmark)
num_coords

501

In [5]:
landmarks = ['class']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val),
                  'z{}'.format(val), 'v{}'.format(val)]

In [6]:
with open("data/body_language_coords.csv", mode="w", newline="" ) as f:
    csv_writer = csv.writer(f, delimiter=",", quotechar='"',quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

### Extract Features of Assigned Class <a id="save-coordinates"></a>

- Repeat this step to save features of different targets.
- Make sure your samples cover different scenario of the target.

In [13]:
class_name = "raise"

In [14]:
cap = cv2.VideoCapture(0)
with mp.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():
        success, frame = cap.read()

        if not success:
            break
            
        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Make detections
        results = holistic.process(image)

        # Recolor for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw face landmarks
        mp.drawing_utils.draw_landmarks(image, results.face_landmarks, mp.holistic.FACE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(242, 216, 175), thickness=1, circle_radius=1),
                                        mp.drawing_utils.DrawingSpec(color=(242, 216, 175), thickness=1))

        # Right hand
        mp.drawing_utils.draw_landmarks(image, results.right_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Left hand
        mp.drawing_utils.draw_landmarks(image, results.left_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Pose 
        mp.drawing_utils.draw_landmarks(image, results.pose_landmarks, mp.holistic.POSE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(65, 53, 31), thickness=2, circle_radius=2),
                                        mp.drawing_utils.DrawingSpec(color=(65, 53, 31), thickness=2))
        
        # Export coordinates
        if results.pose_landmarks and results.face_landmarks:
            # Extract pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
                                      for landmark in pose]).flatten())

                
            # Extract face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
                                      for landmark in face]).flatten())
            
#             # Extract right hand landmarks
#             right_hand = results.right_hand_landmarks.landmark
#             right_hand_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
#                                             for landmark in right_hand]).flatten())
           
            # Concate row
            row = pose_row + face_row 
            
            # Append class name
            row.insert(0, class_name)
            
            # Export to CVS
            with open("data/body_language_coords.csv", mode="a", newline="" ) as f:
                csv_writer = csv.writer(f, delimiter=",", quotechar='"',quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row)
        
        cv2.imshow("Holistic Model Detection", image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

## 3. Train Custom Model Using Scikit Learn <a id="model"></a>

### Load and Preprocess Input Data <a id="load-input"></a>

In [15]:
df = pd.read_csv("data/body_language_coords.csv")
df

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,right cross,0.481634,0.638690,-1.223230,0.999876,0.514188,0.561472,-1.151959,0.999781,0.534799,...,-0.009828,0.0,0.570018,0.533378,0.014218,0.0,0.576039,0.522098,0.014867,0.0
1,right cross,0.481683,0.631598,-0.967897,0.999862,0.514059,0.558415,-0.884569,0.999761,0.534764,...,-0.008309,0.0,0.559442,0.532211,0.014550,0.0,0.565604,0.521478,0.015113,0.0
2,right cross,0.477395,0.630917,-1.008824,0.999828,0.509993,0.558304,-0.927587,0.999708,0.531695,...,-0.007911,0.0,0.556724,0.529427,0.016187,0.0,0.562848,0.517802,0.016920,0.0
3,right cross,0.479631,0.626713,-1.022290,0.999820,0.510909,0.554343,-0.940453,0.999696,0.533595,...,-0.007964,0.0,0.562559,0.534030,0.013845,0.0,0.568916,0.521934,0.014404,0.0
4,right cross,0.488225,0.626787,-0.959260,0.999817,0.516332,0.554345,-0.879072,0.999698,0.538385,...,-0.012381,0.0,0.565439,0.535278,0.009425,0.0,0.571785,0.523138,0.010003,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
812,raise,0.538887,0.623328,-1.119153,0.999916,0.565032,0.552536,-0.995700,0.999831,0.585589,...,-0.006075,0.0,0.609848,0.557099,0.021008,0.0,0.616279,0.545586,0.022350,0.0
813,raise,0.515638,0.626589,-1.111372,0.999920,0.543540,0.554876,-0.997859,0.999839,0.564462,...,-0.003962,0.0,0.604257,0.567091,0.025365,0.0,0.610877,0.555761,0.026917,0.0
814,raise,0.511422,0.626549,-1.162492,0.999920,0.538632,0.554402,-1.063067,0.999835,0.559955,...,-0.005118,0.0,0.597226,0.570379,0.023999,0.0,0.603683,0.558276,0.025670,0.0
815,raise,0.506121,0.626574,-1.126606,0.999920,0.534801,0.554389,-1.024530,0.999833,0.556839,...,-0.007686,0.0,0.592707,0.568795,0.019837,0.0,0.598757,0.557001,0.021329,0.0


In [16]:
X = df.drop("class", axis=1)
y = df["class"]

In [17]:
y.value_counts()

cross          288
left cross     222
raise          177
right cross    130
Name: class, dtype: int64

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47)

### Train Machine Learning Classification Models <a id="training"></a>

In [19]:
pipelines = {
    "lr":make_pipeline(StandardScaler(), LogisticRegression()),
    "svc":make_pipeline(StandardScaler(), SVC()),
    "rf":make_pipeline(StandardScaler(), RandomForestClassifier()),
    "knn":make_pipeline(StandardScaler(), KNeighborsClassifier()),
}

In [20]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train.ravel())
    fit_models[algo] = model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Evaluate and Serialize Model <a id="evaluate"></a>

In [21]:
# Show accuracy
for algo, model in fit_models.items():
    pred = model.predict(X_test)
    print(algo, accuracy_score(y_test, pred))

lr 1.0
svc 0.9939024390243902
rf 1.0
knn 0.9817073170731707


In [22]:
# Show confusion matrix
for algo, model in fit_models.items():
    pred = model.predict(X_test)
    print(algo, confusion_matrix(y_test, pred), sep="\n", end="\n\n")

lr
[[57  0  0  0]
 [ 0 45  0  0]
 [ 0  0 38  0]
 [ 0  0  0 24]]

svc
[[57  0  0  0]
 [ 1 44  0  0]
 [ 0  0 38  0]
 [ 0  0  0 24]]

rf
[[57  0  0  0]
 [ 0 45  0  0]
 [ 0  0 38  0]
 [ 0  0  0 24]]

knn
[[56  1  0  0]
 [ 1 43  0  1]
 [ 0  0 38  0]
 [ 0  0  0 24]]



In [23]:
# Create folder if it does not exist
if not os.path.exists("generated_model"):
    os.mkdir("generated_model")
    
# Save model to file
model_to_save = "rf"

with open("generated_model/body_language_model.pkl", "wb") as f:
    pickle.dump(fit_models[model_to_save], f)

## 4. Real-time Detections with Model <a id="real-time-detection"></a>

In [24]:
with open("generated_model/body_language_model.pkl", "rb") as f:
    model_inference = pickle.load(f)

In [25]:
cap = cv2.VideoCapture(0)
with mp.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():
        success, frame = cap.read()

        if not success:
            break

        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Make detections
        results = holistic.process(image)


        # Recolor for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw face landmarks
        mp.drawing_utils.draw_landmarks(image, results.face_landmarks, mp.holistic.FACE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(242, 216, 175), thickness=1, circle_radius=1),
                                        mp.drawing_utils.DrawingSpec(color=(242, 216, 175), thickness=1))

        # Right hand
        mp.drawing_utils.draw_landmarks(image, results.right_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Left hand
        mp.drawing_utils.draw_landmarks(image, results.left_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Pose 
        mp.drawing_utils.draw_landmarks(image, results.pose_landmarks, mp.holistic.POSE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(65, 53, 31), thickness=2, circle_radius=2),
                                        mp.drawing_utils.DrawingSpec(color=(65, 53, 31), thickness=2))
        
        # Export coordinates
        if results.pose_landmarks and results.face_landmarks:
            # Extract pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
                                      for landmark in pose]).flatten())

                
            # Extract face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
                                      for landmark in face]).flatten())
           
            # Concate row
            row = pose_row + face_row 
            
            # Predict using inference model
            X = pd.DataFrame([row])
            pred = model_inference.predict(X)[0]
            prob = np.max(model_inference.predict_proba(X)[0]).round(2)
            print(pred, prob)
            
            # Display result
            cv2.rectangle(image, (0,0), (250,60), (245, 117, 16), -1)
            cv2.putText(image, "CLASS", (95,12), cv2.FONT_HERSHEY_SIMPLEX,
                       0.5, (0,0,0), 1, cv2.LINE_AA)
            cv2.putText(image, pred, (90,40), cv2.FONT_HERSHEY_SIMPLEX,
                       1, (255, 255, 255), 2, cv2.LINE_AA)
            cv2.putText(image, "PROB", (15,12), cv2.FONT_HERSHEY_SIMPLEX,
                       0.5, (0,0,0), 1, cv2.LINE_AA)
            cv2.putText(image, str(prob), (10,40), cv2.FONT_HERSHEY_SIMPLEX,
                       1, (255, 255, 255), 2, cv2.LINE_AA)
        
        
        cv2.imshow("Holistic Model Detection", image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

left cross 0.6
left cross 0.61
left cross 0.49
left cross 0.59
left cross 0.56
left cross 0.51
left cross 0.55
left cross 0.39
right cross 0.72
right cross 0.54
right cross 0.48
cross 0.45
right cross 0.42
right cross 0.47
right cross 0.75
right cross 0.71
right cross 0.83
right cross 0.89
right cross 0.9
right cross 0.88
cross 0.41
left cross 0.41
left cross 0.6
left cross 0.72
left cross 0.91
left cross 0.93
left cross 0.91
left cross 0.92
left cross 0.92
left cross 0.93
left cross 0.9
cross 0.44
cross 0.62
cross 0.96
cross 0.87
cross 0.84
cross 0.83
cross 0.85
cross 0.85
cross 0.88
cross 0.91
cross 0.95
cross 0.53
cross 0.44
cross 0.36
cross 0.35
cross 0.42
cross 0.38
cross 0.44
cross 0.34
right cross 0.27
left cross 0.27
raise 0.27
raise 0.27
raise 0.29
cross 0.27
raise 0.31
raise 0.32
raise 0.33
raise 0.36
raise 0.43
raise 0.41
raise 0.41
raise 0.45
raise 0.47
raise 0.45
raise 0.46
raise 0.48
raise 0.47
raise 0.5
raise 0.55
raise 0.59
raise 0.63
raise 0.63
raise 0.6
raise 0.6
rais