In [3]:
import os
import openpifpaf
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import PIL
import numpy as np
import seaborn as sns
import cv2
from time import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import pickle
import os

In [4]:
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def compute_angle(depart, arriv1, arriv2):
    """ Returns the angle in radiants between of the arm defined by its shoudler, hip and elbow"""
    v1 = unit_vector(arriv1 - depart)
    v2 = unit_vector(arriv2 - depart)
    return np.arccos(np.clip(np.dot(v1, v2),-1.0,1.0))*360/(2*np.pi)

def compute_angles(predictions):
    """ Returns the angle of left and right arm wrt to the torso (return nan if the arm is not seen) """
    
    pos_dict = {
        "nose": 0,
        "left_eye": 1,
        "right_eye": 2,
        "left_ear": 3,
        "right_ear": 4,
        "left_shoudler": 5,
        "right_shoudler": 6,
        "left_elbow": 7,
        "right_elbow": 8,
        "left_wrist": 9,
        "right_wrist": 10,
        "left_hip": 11,
        "right_hip": 12,
        "left_knee": 13,
        "right_knee": 14,
        "left_ankle": 15,
        "right_ankle": 16}
    
    # we suppose only on person on the image (index = 0)
    keypoints = predictions[0].data

    # left shoulder
    left_shoulder = keypoints[pos_dict["left_shoudler"],0:2]
    left_hip = keypoints[pos_dict["left_hip"],0:2]
    left_elbow = keypoints[pos_dict["left_elbow"],0:2]

    # right shoulder
    right_shoulder = keypoints[pos_dict["right_shoudler"],0:2]
    right_hip = keypoints[pos_dict["right_hip"],0:2]
    right_elbow = keypoints[pos_dict["right_elbow"],0:2]

    # left elbow
    left_wrist = keypoints[pos_dict["left_wrist"],0:2]

    # right elbow
    right_wrist = keypoints[pos_dict["right_wrist"],0:2]

    return({"left_shoulder": compute_angle(left_shoulder, left_hip, left_elbow), 
            "right_shoulder": compute_angle(right_shoulder, right_hip, right_elbow), 
            "left_elbow": compute_angle(left_elbow, left_shoulder, left_wrist), 
            "right_elbow": compute_angle(right_elbow, right_shoulder, right_wrist)})

def predict_angles(cv2_img, predictor=openpifpaf.Predictor(checkpoint='shufflenetv2k16'), resize_x = 0.3,  resize_y = 0.3):
    """ Returns the angles of the left and right arm wrt to the torso and left and right forearm with resepct to the elbow (return nan if the arm is not seen)"""
    resized_img = cv2.resize(cv2_img, None, fx=resize_x, fy=resize_y, interpolation=cv2.INTER_AREA)
    recolored_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
    pil_img = PIL.Image.fromarray(recolored_img)
    predictions, _, _ = predictor.pil_image(pil_img)
    return compute_angles(predictions)

In [5]:
RED = (0, 0, 255)
GREEN = (0, 255, 0)
BLUE = (255, 0, 0)
YELLOW = (0, 255, 255)

pos_dict = {
        "nose": 0,
        "left_eye": 1,
        "right_eye": 2,
        "left_ear": 3,
        "right_ear": 4,
        "left_shoudler": 5,
        "right_shoudler": 6,
        "left_elbow": 7,
        "right_elbow": 8,
        "left_wrist": 9,
        "right_wrist": 10,
        "left_hip": 11,
        "right_hip": 12,
        "left_knee": 13,
        "right_knee": 14,
        "left_ankle": 15,
        "right_ankle": 16}

In [28]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.5)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.batch_norm = nn.BatchNorm1d(input_size)

    def forward(self, x):
        # h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # = self.batch_norm(x)
        out, _ = self.lstm(x)
        out = torch.nn.functional.softmax(self.fc(out[:, -1, :]))
        return out

    def predict(self, x):
        out, _ = self.lstm(x)
        out = torch.nn.functional.softmax(self.fc(out[-1, :]))
        return out

In [7]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

cuda


In [30]:
input_size = 4 # Size of each time step in the input window
hidden_size = 64 # Number of features in the hidden state of the LSTM
num_layers = 2 # Number of LSTM layers
num_classes = 5 # Number of output classes (i.e. number of possible labels)
model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)

WEIGHTS_PATH = "pytorch_weights/LSTM/"
model.load_state_dict(torch.load(WEIGHTS_PATH+"lstm_model_v1.pt"))

cap = cv2.VideoCapture(0)
debug = False 

# Check if the webcam is opened correctly
if not cap.isOpened():
    raise IOError("Cannot open webcam")

frames_angles_tensor = torch.zeros((5, 4)).to(device)
frames = 0

while True:
    frames += 1
    start = time()
    ret, frame = cap.read()
    frame = cv2.resize(frame, None, fx=1, fy=1, interpolation=cv2.INTER_AREA) #tweak those values to get better performance

    c = cv2.waitKey(1)
    if c == 27: # press escape to quit
        break

    #TODO : add a normalization step
    angles_list = list(predict_angles(frame).values())

    for i in range (5):
        if i < 4:
            frames_angles_tensor[i, :] = frames_angles_tensor[i+1, :]
        else:
            frames_angles_tensor[i, :] = torch.tensor(angles_list)

    # writes desired command
    if frames > 10:
        command = model.predict(frames_angles_tensor).argmax(dim=-1)
    else:
        command = "no command"

    sys.stdout.write(f"\r{frames_angles_tensor}")

    cv2.putText(img = frame, text=f"{command}", org = (0,90), fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=1, color=GREEN,thickness=2)

    stop = time()

    # writes fps
    fps = 1/(stop-start)
    cv2.putText(img = frame, text=f"{fps:.2f} fps", org = (0,30), fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=1, color=GREEN,thickness=2)

    # Naming a window
    cv2.namedWindow('Resized_Window', cv2.WINDOW_NORMAL)
    
    cv2.imshow('Resized_Window', frame)

cap.release()
cv2.destroyAllWindows()

  return vector / np.linalg.norm(vector)


tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
tensor([[0., 0., 0., 0.],]], device='cuda:0')
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., nan, nan],
tensor([[0.0000, 0.0000, 0.0000, 0.0000],:0')
        [0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000,    nan,    nan],
        [0.0000, 0.0000,    nan,    nan],
tensor([[0.0000, 0.0000, 0.0000, 0.0000],, device='cuda:0')
        [0.0000, 0.0000,    nan,    nan],
        [0.0000, 0.0000,    nan,    nan],
        [0.0343, 0.0280,    nan,    nan],
tensor([[0.0000, 0.0000,    nan,    nan],, device='cuda:0')
        [0.0000, 0.0000,    nan,    nan],
        [0.0343, 0.0280,    nan,    nan],
        [0.0000, 0.0000,    nan,    nan],
tensor([[0.0000, 0.0000,    nan,    nan],, device='cuda:0')
        [0.0343, 0.0280,    nan,    nan],
        [0.0000, 0.0000,    nan,    nan],
        [0.0000, 0.0198,    nan,    nan],
tensor([[0.0343, 0.0280,    nan,    nan]

  out = torch.nn.functional.softmax(self.fc(out[-1, :]))


tensor([[0.0280, 0.0000,    nan,    nan],
        [0.0000, 0.0000,    nan,    nan],
        [0.0280, 0.0000,    nan,    nan],
        [0.0000, 0.0000,    nan,    nan],
tensor([[0.0000, 0.0000,    nan,    nan],, device='cuda:0')
        [0.0280, 0.0000,    nan,    nan],
        [0.0000, 0.0000,    nan,    nan],
        [0.0000, 0.0000,    nan,    nan],
tensor([[0.0280, 0.0000,    nan,    nan],, device='cuda:0')
        [0.0000, 0.0000,    nan,    nan],
        [0.0000, 0.0000,    nan,    nan],
        [0.0000, 0.0343,    nan,    nan],
tensor([[0.0000, 0.0000,    nan,    nan],, device='cuda:0')
        [0.0000, 0.0000,    nan,    nan],
        [0.0000, 0.0343,    nan,    nan],
        [0.0343, 0.0198,    nan,    nan],
tensor([[0.0000, 0.0000,    nan,    nan],, device='cuda:0')
        [0.0000, 0.0343,    nan,    nan],
        [0.0343, 0.0198,    nan,    nan],
        [0.0280, 0.0000,    nan,    nan],
tensor([[0.0000, 0.0343,    nan,    nan],, device='cuda:0')
        [0.0343, 0.0198,    