In [None]:
!pip install mouse
!pip install mediapipe

In [None]:
import mouse

import cv2
import csv
import mediapipe as mp
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score

mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Mediapipe test

In [None]:
cap = cv2.VideoCapture(0)
# Setup mediapipe instance
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()

        # Recolor image to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make detection
        results = pose.process(image)

        # Recolor back to BGR
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            #print(landmarks)
        except:
            pass


        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )

        cv2.imshow('Mediapipe Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Collecting data from hands

In [None]:
base_arms = ['class_name']
for i in range(12):
    new_row = [f'x{i}', f'y{i}', f'z{i}', f'v{i}']
    base_arms += new_row

In [None]:
with open('arms.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(base_arms)

In [None]:
class_name = 'block'

arms_range = [[11, 22]]

cap = cv2.VideoCapture(0)
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        results = pose.process(image)

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        try:
            # landmark colleting
            landmarks = results.pose_landmarks.landmark
            arms_landmarks = list()

            for arms_list in arms_range:
                for landmark_index in range(arms_list[0], arms_list[1]+1):
                    landmark_row = [
                        landmarks[landmark_index].x,
                        landmarks[landmark_index].y,
                        landmarks[landmark_index].z,
                                   ]
                    arms_landmarks += landmark_row

            # add label
            arms_landmarks.insert(0, class_name)

            # landmark saving
            with open('arms.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(arms_landmarks)
        except:
            pass

        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )

        cv2.imshow('Mediapipe Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Collecting data from body

In [None]:
with open('body.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [None]:
class_name = 'stay'

body_range = [[9, 12], [23, 24]]

cap = cv2.VideoCapture(0)
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        results = pose.process(image)

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        try:
            landmarks = results.pose_landmarks.landmark
            body_landmarks = list()
            for body_list in body_range:
                for landmark_index in range(body_list[0], body_list[1]+1):
                    landmark_row = [
                        landmarks[landmark_index].x,
                        landmarks[landmark_index].y,
                        landmarks[landmark_index].z,
                                   ]
                    body_landmarks += landmark_row

            body_landmarks.insert(0, class_name)

            with open('body.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(body_landmarks)
        except:
            pass

        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )

        cv2.imshow('Mediapipe Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Model training

In [None]:
df = pd.read_csv('arms.csv')

In [None]:
df['class_name'].unique()

array(['stay', 'right_punch', 'left_punch', 'block'], dtype=object)

In [None]:
X = df.drop('class_name', axis=1)
y = df['class_name']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=228)

In [None]:
'''
get some standard ml pipelines to train all of
them and choose the best by accuracy metric
'''

pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [None]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [None]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

lr 0.9983333333333333
rc 0.9966666666666667
rf 0.9991666666666666
gb 0.9991666666666666


In [None]:
for model_name in fit_models.keys():
    with open(f'{model_name}_arms.pkl', 'wb') as f:
        pickle.dump(fit_models[model_name], f)

In [None]:
with open('gb_arms.pkl', 'rb') as f:
    model = pickle.load(f)

# Final testing

In [None]:
import mouse

arms_range = [[11, 22]]

cap = cv2.VideoCapture(0)
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        results = pose.process(image)

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        try:
            landmarks = results.pose_landmarks.landmark
            arms_landmarks = list()
            for arms_list in arms_range:
                for landmark_index in range(arms_list[0], arms_list[1]+1):
                    landmark_row = [
                        landmarks[landmark_index].x,
                        landmarks[landmark_index].y,
                        landmarks[landmark_index].z,
                                   ]
                    arms_landmarks += landmark_row

            # gesture prediction
            X = pd.DataFrame([arms_landmarks])
            pred = model.predict(X)[0]
            proba = model.predict_proba(X)[0]
            print(pred)

            cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
            cv2.putText(image, pred.split(' ')[0], (95, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            cv2.putText(image, str(round(proba[np.argmax(proba)], 2)), (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            predict = pred.split(' ')[0]

            # setting mouse actions for each of the gestures
            if predict == 'right_punch':
                mouse.move(100, 100, absolute=False, duration=0.05)
                mouse.click('left')
                mouse.move(-100, -100, absolute=False, duration=0.05)
            elif predict == 'left_punch':
                mouse.move(-100, 100, absolute=False, duration=0.05)
                mouse.click('left')
                mouse.move(100, -100, absolute=False, duration=0.05)
            elif predict == 'block':
                mouse.click('right')

        except:
            pass

        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )

        cv2.imshow('Mediapipe Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()