In [None]:
!pip install PyDirectInput
!pip install keyboard

In [None]:
import pyautogui as pag
import keyboard
import pydirectinput as py
import time

import cv2
import csv
import mediapipe as mp
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Data collection

In [None]:
def which_hand_is(results):
    fst_hand = results.multi_hand_landmarks[0].landmark[0]
    sec_hand = results.multi_hand_landmarks[1].landmark[0]
    fst_hand_coord = (int(fst_hand.x * 640), int(fst_hand.y * 480))
    sec_hand_coord = (int(sec_hand.x * 640), int(sec_hand.y * 480))
    if fst_hand.x > sec_hand.x:
        return ['Right', 'Left'], [fst_hand_coord, sec_hand_coord]
    else:
        return ['Left', 'Right'], [fst_hand_coord, sec_hand_coord]

In [None]:
list_of_classes = ['l_stay', 'forward', 'back', 'r_stay', 'jump']
hand_for_class = ['Left', 'Left', 'Left', 'Right', 'Right']
class_dur = 60
datafile_names = {'Left':'left_mario_data1.csv',
                  'Right':'right_mario_data1.csv'}
prerec_time = 5

In [None]:
for name in datafile_names.values():
    base = ['class_name']
    for i in range(21):
        new_row = [f'x{i}', f'y{i}', f'z{i}']
        base += new_row

    with open(name, mode='w', newline='') as f:
        csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(base)

start_time = time.time()
class_iter = 0
acc_class = list_of_classes[class_iter]
acc_hand = hand_for_class[class_iter]

cap = cv2.VideoCapture(0)
with mp_hands.Hands(min_detection_confidence=0.6, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        ret, frame = cap.read()

        # BGR 2 RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Flip on horizontal
        image = cv2.flip(image, 1)

        # Set flag
        image.flags.writeable = False

        # Detections
        results = hands.process(image)

        # Set flag to true
        image.flags.writeable = True

        # RGB 2 BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Class initialization
        iter_time = time.time()
        time_dist = iter_time - start_time
        if time_dist > class_dur:
            if class_iter < len(list_of_classes) - 1:
                class_iter += 1
                start_time = time.time()
            else:
                print('End of recognition')
                break


        acc_class = list_of_classes[class_iter]
        acc_hand = hand_for_class[class_iter]

        try:
            # Detect right and left hands
            hands_labels, hands_coords = which_hand_is(results)
            hand_landmarks = results.multi_hand_landmarks[hands_labels.index(acc_hand)].landmark

            if time_dist > prerec_time:
                row = [acc_class]
                for landmark in hand_landmarks:
                    row += [landmark.x, landmark.y, landmark.z]

                with open(datafile_names[acc_hand], mode='a', newline='') as f:
                    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                    csv_writer.writerow(row)

                w = 'To: ' + str(datafile_names[acc_hand])
                cv2.putText(image, w, (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                cv2.putText(image, 'recording', hands_coords[hands_labels.index(acc_hand)], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            else:
                cv2.putText(image, 'get ready', hands_coords[hands_labels.index(acc_hand)], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        except:
            print('Detection error')

        # Rendering results
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):

                cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
                cv2.putText(image, acc_class, (95, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                cv2.putText(image, str(round(class_dur - time_dist, 1)), (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS,
                                        mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                        mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                        )

        cv2.imshow('Hand Tracking', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

End of recognition


# Pipeline training for each hand

In [None]:
pipelines = {
    #'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    #'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [None]:
df_right = pd.read_csv(datafile_names['Right'])
display(df_right)

print('Action space:', df_right['class_name'].unique())

X = df_right.drop('class_name', axis=1)
y = df_right['class_name']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=228)

fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

print('Models scores:')
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

for model_name in fit_models.keys():
    with open(f'right_{model_name}_mario1.pkl', 'wb') as f:
        pickle.dump(fit_models[model_name], f)

with open('right_gb_mario1.pkl', 'rb') as f:
    model_right = pickle.load(f)

Unnamed: 0,class_name,x0,y0,z0,x1,y1,z1,x2,y2,z2,...,z17,x18,y18,z18,x19,y19,z19,x20,y20,z20
0,r_stay,0.755740,0.497416,-1.250225e-08,0.727697,0.479330,-0.014310,0.705854,0.455027,-0.025845,...,-0.024698,0.808153,0.371209,-0.039167,0.811530,0.354112,-0.045588,0.812785,0.347894,-0.046599
1,r_stay,0.760775,0.481312,-3.194136e-07,0.734424,0.463627,-0.015161,0.709454,0.427370,-0.022169,...,-0.013587,0.787292,0.402644,-0.023173,0.780758,0.426260,-0.021147,0.779138,0.420534,-0.016859
2,r_stay,0.760119,0.481560,-3.245356e-07,0.733748,0.463630,-0.015194,0.708817,0.427064,-0.021995,...,-0.013660,0.786875,0.403599,-0.023051,0.780257,0.427583,-0.020863,0.778869,0.421788,-0.016445
3,r_stay,0.760625,0.481665,-3.208379e-07,0.734106,0.463444,-0.014649,0.709226,0.427723,-0.021580,...,-0.013946,0.786487,0.403711,-0.023401,0.779754,0.428272,-0.021264,0.778198,0.422749,-0.016971
4,r_stay,0.761060,0.480936,-3.276879e-07,0.734570,0.463791,-0.015168,0.709631,0.427746,-0.022032,...,-0.013543,0.786510,0.404930,-0.022877,0.779675,0.428914,-0.020915,0.778132,0.423090,-0.016741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1755,jump,0.719600,0.502863,2.048131e-07,0.679323,0.467716,-0.012106,0.659798,0.413244,-0.016088,...,-0.016724,0.744507,0.330563,-0.025535,0.740690,0.298937,-0.029189,0.736056,0.268567,-0.031738
1756,jump,0.718655,0.506891,2.044510e-07,0.678524,0.471562,-0.012264,0.659028,0.416875,-0.016182,...,-0.016082,0.743691,0.334757,-0.024942,0.739972,0.303269,-0.028540,0.735525,0.272669,-0.031063
1757,jump,0.718524,0.517838,1.787943e-07,0.678143,0.484189,-0.012025,0.657735,0.430068,-0.016357,...,-0.019839,0.743452,0.344787,-0.029154,0.739274,0.313954,-0.033112,0.734060,0.284933,-0.035869
1758,jump,0.720475,0.520486,2.134870e-07,0.679615,0.485723,-0.011738,0.659534,0.431820,-0.015462,...,-0.015876,0.744327,0.347484,-0.023959,0.740573,0.316067,-0.026921,0.736256,0.286196,-0.028911


Action space: ['r_stay' 'jump']
Models scores:
rf 0.9981060606060606
gb 0.9981060606060606


In [None]:
df_left = pd.read_csv(datafile_names['Left'])
display(df_left)

print('Action space:', df_left['class_name'].unique())

X = df_left.drop('class_name', axis=1)
y = df_left['class_name']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=228)

fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

print('Models scores:')
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

for model_name in fit_models.keys():
    with open(f'left_{model_name}_mario1.pkl', 'wb') as f:
        pickle.dump(fit_models[model_name], f)

with open('left_gb_mario1.pkl', 'rb') as f:
    model_left = pickle.load(f)

Unnamed: 0,class_name,x0,y0,z0,x1,y1,z1,x2,y2,z2,...,z17,x18,y18,z18,x19,y19,z19,x20,y20,z20
0,l_stay,0.223635,0.471478,-6.122481e-08,0.247801,0.471344,-0.018701,0.269967,0.462820,-0.032576,...,-0.029183,0.195987,0.456673,-0.038522,0.201836,0.456981,-0.039289,0.201250,0.439074,-0.039191
1,l_stay,0.224032,0.469967,-5.923387e-08,0.249079,0.468944,-0.017214,0.270625,0.460151,-0.030439,...,-0.028346,0.195126,0.451520,-0.036834,0.201693,0.451812,-0.037019,0.202189,0.434247,-0.036339
2,l_stay,0.223413,0.468628,-6.518906e-08,0.247705,0.467896,-0.017766,0.270279,0.459719,-0.031371,...,-0.029443,0.196109,0.451704,-0.038220,0.202406,0.453405,-0.038062,0.202424,0.436684,-0.037026
3,l_stay,0.222917,0.473343,-7.957876e-08,0.247960,0.472086,-0.018271,0.271313,0.462737,-0.032007,...,-0.029146,0.196584,0.453191,-0.038465,0.203064,0.454840,-0.038816,0.202901,0.437364,-0.038122
4,l_stay,0.222266,0.477582,-6.407320e-08,0.246370,0.474621,-0.017972,0.268090,0.463882,-0.031463,...,-0.028674,0.194541,0.454812,-0.037579,0.200760,0.457731,-0.037764,0.200530,0.441463,-0.037090
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2636,back,0.195870,0.493704,-1.533852e-07,0.230653,0.471376,-0.009936,0.263631,0.445762,-0.019967,...,-0.023278,0.195116,0.382369,-0.027603,0.197490,0.407589,-0.021428,0.189191,0.411809,-0.015723
2637,back,0.196966,0.491439,-1.432743e-07,0.231253,0.469638,-0.009231,0.263431,0.445903,-0.019602,...,-0.023473,0.194427,0.382057,-0.027952,0.196631,0.407258,-0.021179,0.188797,0.412036,-0.014834
2638,back,0.196549,0.491152,-1.256572e-07,0.230711,0.471632,-0.011648,0.264365,0.448567,-0.022819,...,-0.022631,0.192292,0.383449,-0.027272,0.195276,0.409216,-0.021014,0.187301,0.413924,-0.015212
2639,back,0.195215,0.491097,-1.353169e-07,0.229298,0.472901,-0.013359,0.264406,0.449277,-0.024942,...,-0.021740,0.189553,0.384038,-0.026206,0.192463,0.409148,-0.019933,0.186021,0.413418,-0.014076


Action space: ['l_stay' 'forward' 'back']
Models scores:
rf 0.9987389659520807
gb 0.9987389659520807


In [None]:
hand_models = {'Left':model_left,
               'Right':model_right}

# Testing of trained models

In [None]:
cap = cv2.VideoCapture(0)
with mp_hands.Hands(min_detection_confidence=0.6, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        ret, frame = cap.read()

        # BGR 2 RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Flip on horizontal
        image = cv2.flip(image, 1)

        # Set flag
        image.flags.writeable = False

        # Detections
        results = hands.process(image)

        # Set flag to true
        image.flags.writeable = True

        # RGB 2 BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        try:
            for acc_hand in list(set(hand_for_class)):
                model = hand_models[acc_hand]
                hands_labels, hands_coords = which_hand_is(results)
                hand_landmarks = results.multi_hand_landmarks[hands_labels.index(acc_hand)].landmark

                row = []
                for landmark in hand_landmarks:
                    row += [landmark.x, landmark.y, landmark.z]
                X = pd.DataFrame([row])

                pred = model.predict(X)[0]
                proba = round(max(model.predict_proba(X)[0]),2)
                cv2.putText(image, f'{proba} {pred}', hands_coords[hands_labels.index(acc_hand)], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        except IndexError:
            print('Detection Error')

        # Rendering results
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):

                #cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
                #cv2.putText(image, acc_class, (95, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                #cv2.putText(image, str(round(class_dur - time_dist, 1)), (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS,
                                        mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                        mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                        )

        cv2.imshow('Hand Tracking', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

# Final testing of trained models inside the game

In [None]:
# setting keyboard binds for each of the gestures
bind_settings = {
    'l_stay':    [['kb', 'keyUp', 'a'],['kb', 'keyUp', 'd']],
    'forward':   [['kb', 'keyUp', 'a'],['kb', 'keyDown', 'd']],
    'back':      [['kb', 'keyUp', 'd'],['kb', 'keyDown', 'a']],
    'r_stay':    [['kb', 'keyUp', 'space']],
    'jump':      [['kb', 'keyDown', 'space']]
}

In [None]:
def emulator_request_decoder(predict, binds):
    # instructions for activating keyboard bindings
    request = binds[predict]
    for req_index in range(len(request)):
        device, command, button = request[req_index]
        if device == 'kb':
            if command == 'keyUp':
                try:
                    py.keyUp(button)
                except:
                    print(f'Unknown Button Error: {button}')
            elif command == 'keyDown':
                try:
                    py.keyDown(button)
                except:
                    print(f'Unknown Button Error: {button}')
            elif command == 'press':
                try:
                    py.press(button)
                except:
                    print(f'Unknown Button Error: {button}')
            else:
                print(f'Unknown Command Error: {command}. Only "keyUp", "keyDown", "press" commands are available for {device}')
        elif device == 'ms':
            if command == 'click':
                try:
                    py.click(button=button)
                except:
                    print(f'Unknown Button Error: {button}')
            else:
                print(f'Unknown Command Error: {command}. Only "click" commands are available for {device}')
        else:
            print(f'Unknown Device Error: {device}. Only "kb", "ms" devices are available')

In [None]:
cap = cv2.VideoCapture(0)
with mp_hands.Hands(min_detection_confidence=0.6, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        ret, frame = cap.read()

        # BGR 2 RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Flip on horizontal
        image = cv2.flip(image, 1)

        # Set flag
        image.flags.writeable = False

        # Detections
        results = hands.process(image)

        # Set flag to true
        image.flags.writeable = True

        # RGB 2 BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        try:
            for acc_hand in list(set(hand_for_class)):
                model = hand_models[acc_hand]
                hands_labels, hands_coords = which_hand_is(results)
                hand_landmarks = results.multi_hand_landmarks[hands_labels.index(acc_hand)].landmark

                row = []
                for landmark in hand_landmarks:
                    row += [landmark.x, landmark.y, landmark.z]
                X = pd.DataFrame([row])

                pred = model.predict(X)[0]
                proba = round(max(model.predict_proba(X)[0]),2)
                cv2.putText(image, f'{proba} {pred}', hands_coords[hands_labels.index(acc_hand)], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

                emulator_request_decoder(pred, bind_settings)
        except:
            print('Detection Error')

        # Rendering results
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS,
                                        mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                        mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                        )

        cv2.imshow('Hand Tracking', image)

        if cv2.waitKey(10) & 0xFFd == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()