In [7]:
pip install mediapipe opencv-python

Collecting mediapipe
  Downloading mediapipe-0.10.14-cp312-cp312-win_amd64.whl.metadata (9.9 kB)
Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting absl-py (from mediapipe)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting flatbuffers>=2.0 (from mediapipe)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting jax (from mediapipe)
  Downloading jax-0.4.31-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.4.31-cp312-cp312-win_amd64.whl.metadata (1.0 kB)
Collecting opencv-contrib-python (from mediapipe)
  Downloading opencv_contrib_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.0-py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting ml-dtypes>=0.2.0 (from jax->mediapipe)
  Downloading ml_dtypes-0.4.0-cp312-cp312-win_amd64.whl.metadata (20 kB)



[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [25]:
import mediapipe as mp
import numpy as np
import cv2
import csv
import os
import matplotlib.pyplot as plt

In [26]:
mp_drawing = mp.solutions.drawing_utils # help in drawing on the input video/image
mp_pose = mp.solutions.pose

## 1. Make some detections

In [3]:
cap = cv2.VideoCapture(0)

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()

        # Recolour feed
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame.flags.writeable = False

        # Make detections
        results = pose.process(frame)

        # Recolour back for rendering
        frame.flags.writeable = True
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(117,66,117), thickness=2, circle_radius=2))
    
        cv2.imshow('Initial Raw webcam feed',frame)
    
        if cv2.waitKey(10) & 0xFF == ord("q"):
            break

cap.release()
cv2.destroyAllWindows()



## 1. Capture Video for ML model training.

In [14]:
cap = cv2.VideoCapture(0)

height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
fps = cap.get(cv2.CAP_PROP_FPS)
videoWriter = cv2.VideoWriter('video.avi', cv2.VideoWriter_fourcc('P', 'I', 'M', '1'), fps, (int(width), int(height)))

while cap.isOpened():
    ret, frame = cap.read()

    try:
        cv2.imshow('Saving Video', frame)
        videoWriter.write(frame)
    except Exception as e:
        break

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

print("Video saved")
cap.release()
cv2.destroyAllWindows()

Video saved


## 2. Capture landmarks from video and export to csv file

In [27]:
landmarks = ['class']
for val in range(1, 33+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

In [28]:
landmarks[1:]

['x1',
 'y1',
 'z1',
 'v1',
 'x2',
 'y2',
 'z2',
 'v2',
 'x3',
 'y3',
 'z3',
 'v3',
 'x4',
 'y4',
 'z4',
 'v4',
 'x5',
 'y5',
 'z5',
 'v5',
 'x6',
 'y6',
 'z6',
 'v6',
 'x7',
 'y7',
 'z7',
 'v7',
 'x8',
 'y8',
 'z8',
 'v8',
 'x9',
 'y9',
 'z9',
 'v9',
 'x10',
 'y10',
 'z10',
 'v10',
 'x11',
 'y11',
 'z11',
 'v11',
 'x12',
 'y12',
 'z12',
 'v12',
 'x13',
 'y13',
 'z13',
 'v13',
 'x14',
 'y14',
 'z14',
 'v14',
 'x15',
 'y15',
 'z15',
 'v15',
 'x16',
 'y16',
 'z16',
 'v16',
 'x17',
 'y17',
 'z17',
 'v17',
 'x18',
 'y18',
 'z18',
 'v18',
 'x19',
 'y19',
 'z19',
 'v19',
 'x20',
 'y20',
 'z20',
 'v20',
 'x21',
 'y21',
 'z21',
 'v21',
 'x22',
 'y22',
 'z22',
 'v22',
 'x23',
 'y23',
 'z23',
 'v23',
 'x24',
 'y24',
 'z24',
 'v24',
 'x25',
 'y25',
 'z25',
 'v25',
 'x26',
 'y26',
 'z26',
 'v26',
 'x27',
 'y27',
 'z27',
 'v27',
 'x28',
 'y28',
 'z28',
 'v28',
 'x29',
 'y29',
 'z29',
 'v29',
 'x30',
 'y30',
 'z30',
 'v30',
 'x31',
 'y31',
 'z31',
 'v31',
 'x32',
 'y32',
 'z32',
 'v32',
 'x33',
 'y3

In [5]:
with open('landmarks.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [29]:
def export_landmarks(results, action):
    try:
        keypoints = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten().tolist()
        keypoints.insert(0, action)

        with open('coords_part_2.csv', mode='a', newline='') as f:
            csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_writer.writerow(keypoints)
    
    except Exception as e:
        pass

In [30]:
results.pose_landmarks

landmark {
  x: 0.620779037
  y: 0.697657168
  z: -1.33596301
  visibility: 0.9993698
}
landmark {
  x: 0.650021076
  y: 0.628593683
  z: -1.26284027
  visibility: 0.999261081
}
landmark {
  x: 0.663505197
  y: 0.62894
  z: -1.26319158
  visibility: 0.999364197
}
landmark {
  x: 0.676163495
  y: 0.629597068
  z: -1.26312876
  visibility: 0.999184906
}
landmark {
  x: 0.595505238
  y: 0.618217707
  z: -1.26351595
  visibility: 0.999244
}
landmark {
  x: 0.573211551
  y: 0.611055
  z: -1.26329708
  visibility: 0.999201715
}
landmark {
  x: 0.55461973
  y: 0.605435371
  z: -1.26375258
  visibility: 0.999031067
}
landmark {
  x: 0.688717246
  y: 0.642050862
  z: -0.765923321
  visibility: 0.999248922
}
landmark {
  x: 0.528370082
  y: 0.61471653
  z: -0.767634153
  visibility: 0.999538898
}
landmark {
  x: 0.647233844
  y: 0.753465116
  z: -1.14148617
  visibility: 0.998968124
}
landmark {
  x: 0.581183255
  y: 0.741839409
  z: -1.14401782
  visibility: 0.999474466
}
landmark {
  x: 0.8040

In [31]:
export_landmarks(results, 'up')

## 2.1 Load saved video and make csv file through it

In [None]:
cap = cv2.VideoCapture('ml.avi')
# Initiate holistic model
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.2) as pose:
    while cap.isOpened():
        ret, frame = cap.read()

        # Recolour Feed
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame.flags.writeable = False

        # Make Detections
        results = pose.process(frame)

        # Recolour frame back to BGR for rendering
        frame.flags.writeable = True
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(117,66,117), thickness=2, circle_radius=2))

        k = cv2.waitKey(1)
        if k == ord('r'):
            export_landmarks(results, "right")
        if k == ord('l'):
            export_landmarks(results, "left")
        if k == ord('m'):
            export_landmarks(results, "mid")
        
        cv2.imshow('Raw webcam feed',frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break


cap.release()
cv2.destroyAllWindows()

## 3. Train Coustom Model using scikit-learn

**3.1 Read in Collected Data and Process**

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split

## Wide-narrow-neutral model

In [16]:
df = pd.read_csv("coords.csv")

In [24]:
df

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z31,v31,x32,y32,z32,v32,x33,y33,z33,v33
0,wide,0.486866,0.092757,-0.459668,0.999995,0.494195,0.076068,-0.438539,0.999984,0.499258,...,0.258944,0.919970,0.622600,0.778899,0.065754,0.993746,0.341608,0.782204,0.098797,0.992899
1,wide,0.478396,0.097314,-0.479792,0.999997,0.485839,0.081843,-0.458554,0.999989,0.490650,...,0.246456,0.928028,0.619905,0.779492,0.115507,0.994298,0.341757,0.782840,0.081611,0.993506
2,wide,0.477945,0.096811,-0.440314,0.999997,0.485219,0.081407,-0.417771,0.999990,0.489971,...,0.228079,0.928139,0.619793,0.779380,0.111495,0.994229,0.342462,0.782806,0.063144,0.993581
3,wide,0.477502,0.096399,-0.438518,0.999997,0.484732,0.081033,-0.416107,0.999990,0.489472,...,0.227770,0.927914,0.619534,0.779366,0.113318,0.994144,0.343010,0.782748,0.063218,0.993615
4,wide,0.477413,0.096976,-0.483355,0.999997,0.484759,0.081420,-0.464002,0.999990,0.489531,...,0.263808,0.928478,0.619122,0.779337,0.095628,0.994187,0.343381,0.782628,0.100991,0.993585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
300,narrow,0.469571,0.053001,-0.432698,0.999967,0.477044,0.035400,-0.406841,0.999903,0.482064,...,0.327301,0.632257,0.508576,0.826273,0.041638,0.926759,0.451166,0.791333,0.155239,0.906517
301,narrow,0.476244,0.052890,-0.446210,0.999968,0.483525,0.034949,-0.420078,0.999904,0.488540,...,0.419933,0.630761,0.515934,0.835698,-0.003159,0.929198,0.450418,0.791160,0.254371,0.905866
302,narrow,0.478862,0.053017,-0.450948,0.999968,0.485813,0.034954,-0.424712,0.999906,0.490754,...,0.460707,0.629708,0.517901,0.837704,-0.086357,0.932438,0.450326,0.791047,0.298355,0.904870
303,narrow,0.483240,0.044454,-0.465028,0.999970,0.490601,0.028578,-0.438711,0.999911,0.495742,...,0.510511,0.627739,0.519048,0.855551,-0.030730,0.936262,0.450061,0.792238,0.354916,0.905323


In [17]:
x = df.drop('class', axis=1) # features
y = df['class'] # target variable

In [26]:
x.head()

Unnamed: 0,x1,y1,z1,v1,x2,y2,z2,v2,x3,y3,...,z31,v31,x32,y32,z32,v32,x33,y33,z33,v33
0,0.486866,0.092757,-0.459668,0.999995,0.494195,0.076068,-0.438539,0.999984,0.499258,0.076268,...,0.258944,0.91997,0.6226,0.778899,0.065754,0.993746,0.341608,0.782204,0.098797,0.992899
1,0.478396,0.097314,-0.479792,0.999997,0.485839,0.081843,-0.458554,0.999989,0.49065,0.082107,...,0.246456,0.928028,0.619905,0.779492,0.115507,0.994298,0.341757,0.78284,0.081611,0.993506
2,0.477945,0.096811,-0.440314,0.999997,0.485219,0.081407,-0.417771,0.99999,0.489971,0.081638,...,0.228079,0.928139,0.619793,0.77938,0.111495,0.994229,0.342462,0.782806,0.063144,0.993581
3,0.477502,0.096399,-0.438518,0.999997,0.484732,0.081033,-0.416107,0.99999,0.489472,0.081235,...,0.22777,0.927914,0.619534,0.779366,0.113318,0.994144,0.34301,0.782748,0.063218,0.993615
4,0.477413,0.096976,-0.483355,0.999997,0.484759,0.08142,-0.464002,0.99999,0.489531,0.081589,...,0.263808,0.928478,0.619122,0.779337,0.095628,0.994187,0.343381,0.782628,0.100991,0.993585


In [27]:
y.head()

0    wide
1    wide
2    wide
3    wide
4    wide
Name: class, dtype: object

In [28]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=42)

In [29]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((244, 132), (61, 132), (244,), (61,))

In [19]:
x_test

Unnamed: 0,x1,y1,z1,v1,x2,y2,z2,v2,x3,y3,...,z31,v31,x32,y32,z32,v32,x33,y33,z33,v33
180,0.448727,0.102483,-0.393044,0.999992,0.455323,0.086912,-0.368247,0.999975,0.459215,0.087110,...,0.231551,0.912235,0.618426,0.778729,0.133372,0.987722,0.284202,0.797476,0.069576,0.990875
154,0.453052,0.079344,-0.378657,0.999991,0.459263,0.064746,-0.357084,0.999965,0.463484,0.064879,...,0.239988,0.915370,0.531533,0.775515,0.039868,0.991776,0.381421,0.784437,0.076347,0.991204
111,0.457207,0.074549,-0.393043,0.999961,0.463214,0.059951,-0.369830,0.999872,0.467463,0.060245,...,0.223123,0.721794,0.503244,0.760572,0.099806,0.888731,0.441784,0.778926,0.059065,0.906717
247,0.464629,0.072911,-0.349310,0.999986,0.471377,0.058936,-0.325228,0.999947,0.474837,0.059447,...,0.236641,0.883556,0.533350,0.776153,0.012155,0.987830,0.421683,0.786310,0.071850,0.986373
60,0.463275,0.080088,-0.431400,0.999991,0.469274,0.064922,-0.410518,0.999968,0.473425,0.065197,...,0.203270,0.902562,0.524732,0.767881,0.002343,0.988971,0.392095,0.777691,0.046183,0.987204
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218,0.451194,0.073537,-0.359446,0.999985,0.456391,0.058348,-0.336125,0.999946,0.460640,0.058325,...,0.218233,0.864472,0.532163,0.778378,0.067912,0.987982,0.427739,0.785560,0.051775,0.985754
104,0.456374,0.075203,-0.340168,0.999971,0.462698,0.060232,-0.315059,0.999905,0.467158,0.060561,...,0.213150,0.704540,0.490334,0.762510,0.073369,0.897798,0.442502,0.779051,0.042038,0.914731
302,0.478862,0.053017,-0.450948,0.999968,0.485813,0.034954,-0.424712,0.999906,0.490754,0.035535,...,0.460707,0.629708,0.517901,0.837704,-0.086357,0.932438,0.450326,0.791047,0.298355,0.904870
194,0.455262,0.102084,-0.369946,0.999993,0.460929,0.087010,-0.344750,0.999978,0.464713,0.087341,...,0.203390,0.911345,0.617434,0.776732,0.123651,0.987866,0.285450,0.797378,0.043606,0.991645


In [20]:
y_test

180       wide
154    neutral
111     narrow
247    neutral
60     neutral
        ...   
218    neutral
104     narrow
302     narrow
194       wide
185       wide
Name: class, Length: 61, dtype: object

In [18]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [19]:
pipelines = {
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier())
}

In [None]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(x_train, y_train)
    fit_models[algo] = model

In [21]:
fit_models

{}

In [32]:
fit_models['gb'].predict(x_test)

array(['wide', 'neutral', 'narrow', 'neutral', 'neutral', 'wide',
       'narrow', 'neutral', 'neutral', 'wide', 'wide', 'narrow',
       'neutral', 'wide', 'narrow', 'neutral', 'narrow', 'wide',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'wide',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'narrow',
       'neutral', 'narrow', 'wide', 'neutral', 'wide', 'narrow',
       'neutral', 'narrow', 'wide', 'neutral', 'narrow', 'neutral',
       'wide', 'narrow', 'wide', 'narrow', 'narrow', 'narrow', 'narrow',
       'neutral', 'neutral', 'neutral', 'neutral', 'narrow', 'narrow',
       'neutral', 'neutral', 'narrow', 'narrow', 'wide', 'wide'],
      dtype=object)

## 3.2 Evaluate and serialize the model

In [32]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
import pickle

In [33]:
for algo, model in fit_models.items():
    yhat = model.predict(x_test)
    print(algo, accuracy_score(y_test.values, yhat),
         precision_score(y_test.values, yhat, average='weighted'),
         recall_score(y_test.values, yhat, average="weighted"))

In [32]:
with open("wide_narrow_neutral.pkl", "wb") as f:
    pickle.dump(fit_models['rf'], f)

## 4. Make detections with model

In [34]:
with open("wide_narrow_neutral.pkl", "rb") as f:
    model = pickle.load(f)

In [38]:
cap = cv2.VideoCapture(0)
counter = 0
current_stage = ''

# Initiate holistic model
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()

        # recolour feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make detections
        results = pose.process(image)

        # Recolour image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                 mp_drawing.DrawingSpec(color=(245, 117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(117,66,117), thickness=2, circle_radius=2)
                                                       )
        try:
            row = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten().tolist()
            X = pd.DataFrame([row], columns=landmarks[1:])
            body_language_class = model.predict(X)[0]
            body_language_prob = model.predict_proba(X)[0]
            print(body_language_class, body_language_prob)

            if body_language_class == 'down' and body_language_prob[body_language_prob.argmax()] >= 0.7:
                current_stage = 'down'
            elif current_stage == 'down' and body_language_class == 'up' and body_language_prob[body_language_prob.argmax()] >= 0.7:
                current_stage = 'up'
                counter += 1
                print(current_stage)

        except Exception as e:
            print("Executing except condition")
            pass

        # Get status box
        cv2.rectangle(image, (0,0), (250,60), (245, 117, 16), -1)
    
        # Display class
        cv2.putText(image, 'CLASS',
                       (95, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(image, body_language_class.split(' ')[0],
                        (90, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    
        # Display probability
        cv2.putText(image, 'PROB',
                        (15, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, str(round(body_language_prob[np.argmax(body_language_prob)], 2)),
                        (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        
        
        
        cv2.imshow("detection frame", image)
    
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()



narrow [0.79 0.12 0.09]
narrow [0.79 0.12 0.09]
narrow [0.77 0.11 0.12]
narrow [0.77 0.11 0.12]
narrow [0.73 0.14 0.13]




narrow [0.77 0.11 0.12]
narrow [0.77 0.11 0.12]
narrow [0.73 0.13 0.14]
narrow [0.7  0.15 0.15]
narrow [0.7  0.15 0.15]




narrow [0.7  0.15 0.15]
narrow [0.7  0.14 0.16]
narrow [0.69 0.14 0.17]
narrow [0.69 0.14 0.17]
narrow [0.69 0.13 0.18]




narrow [0.69 0.13 0.18]
narrow [0.65 0.14 0.21]
narrow [0.63 0.14 0.23]
narrow [0.63 0.14 0.23]
narrow [0.63 0.14 0.23]




narrow [0.63 0.14 0.23]
narrow [0.63 0.14 0.23]
narrow [0.63 0.14 0.23]
narrow [0.63 0.14 0.23]
narrow [0.63 0.14 0.23]




narrow [0.63 0.14 0.23]
narrow [0.63 0.14 0.23]
narrow [0.63 0.14 0.23]
narrow [0.63 0.14 0.23]
narrow [0.56 0.2  0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]




narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]




narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]




narrow [0.55 0.21 0.24]
narrow [0.55 0.21 0.24]
