In [None]:
import json
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg 
import trt_pose.coco
import math
import os
import numpy as np
import traitlets
import pickle 


In [None]:
#After running this code, go back to above to find game window

In [None]:
import json
import trt_pose.coco

with open('preprocess/human_pose.json', 'r') as f:
    human_pose = json.load(f)

topology = trt_pose.coco.coco_category_to_topology(human_pose)

import trt_pose.models

num_parts = len(human_pose['keypoints'])
num_links = len(human_pose['skeleton'])

model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()

import torch

MODEL_WEIGHTS = 'model/resnet18_baseline_att_224x224_A_epoch_249.pth'

model.load_state_dict(torch.load(MODEL_WEIGHTS))

WIDTH = 224
HEIGHT = 224

data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()

import torch2trt

model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)

OPTIMIZED_MODEL = 'model/resnet18_baseline_att_224x224_A_epoch_249_trt.pth'

torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)

from torch2trt import TRTModule

model_trt = TRTModule()
model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))

In [None]:
from trt_pose.draw_objects import DrawObjects
from trt_pose.parse_objects import ParseObjects

parse_objects = ParseObjects(topology)
draw_objects = DrawObjects(topology)

In [None]:

import torchvision.transforms as transforms
import PIL.Image

mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
device = torch.device('cuda')

def preprocess(image):
    global device
    device = torch.device('cuda')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = PIL.Image.fromarray(image)
    image = transforms.functional.to_tensor(image).to(device)
    image.sub_(mean[:, None, None]).div_(std[:, None, None])
    return image[None, ...]

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf'))

In [None]:
from preprocessdata import preprocessdata
preprocessdata = preprocessdata(topology, num_parts)

In [None]:
from dataloader import dataloader
path = "/home/ceng/trt_pose_hand-main/data_collection/"
label_file = "pose_dataset_train.json"
test_label = "pose_dataset_test.json"
human = dataloader(path, label_file, test_label)

In [None]:
def data_preprocess(images):
    dist_bn_joints_all_data = []
    for im in images:
        im = im[:, ::-1, :]
        data_im = preprocess(im)
        cmap, paf = model_trt(data_im)
        cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
        counts, objects, peaks = parse_objects(cmap, paf)
        joints = preprocessdata.joints_inference(im, counts, objects, peaks)
        dist_bn_joints = preprocessdata.find_distance(joints)
        dist_bn_joints_all_data.append(dist_bn_joints)
    return dist_bn_joints_all_data

In [None]:
def smaller_dataset(dataset, no_samples_per_class, no_of_classes):
    total_samples_per_class =10
    start = 0
    end = no_samples_per_class
    new_dataset = []
    labels = []
    for i in range(no_of_classes):
        new_data = dataset[start:end]
        start = start+total_samples_per_class
        end = start+no_samples_per_class
        new_dataset.extend(new_data)
        labels.extend([i+1]*no_samples_per_class)
    return new_dataset, labels

In [None]:
train_images, labels_train = human.smaller_dataset(human.train_images,10,7)

In [None]:
joints_train = data_preprocess(human.train_images)
joints_test = data_preprocess(human.test_images)

In [None]:
svm_train = False
if svm_train:
    clf, predicted = preprocessdata.trainsvm(clf, joints_train, joints_test, human.labels_train, human.labels_test)
    filename = 'svmmodel_new.sav'
    pickle.dump(clf, open(filename, 'wb'))
else:
    filename = 'svmmodel_new.sav'
    clf = pickle.load(open(filename, 'rb'))

In [None]:
with open('preprocess/ceng.json', 'r') as f:
    gesture = json.load(f)
gesture_type = gesture["classes"]


In [None]:
from jetcam.usb_camera import USBCamera
from jetcam.csi_camera import CSICamera
from jetcam.utils import bgr8_to_jpeg

#camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=30, capture_device=1)
camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)

camera.running = True

In [None]:
def draw_joints(image, joints):
    count = 0
    for i in joints:
        if i==[0,0]:
            count+=1
    if count>= 3:
        return 
    for i in joints:
        cv2.circle(image, (i[0],i[1]), 2, (0,0,255), 1)
    cv2.circle(image, (joints[0][0],joints[0][1]), 2, (255,0,255), 1)
    for i in human_pose['skeleton']:
        if joints[i[0]-1][0]==0 or joints[i[1]-1][0] == 0:
            break
        cv2.line(image, (joints[i[0]-1][0],joints[i[0]-1][1]), (joints[i[1]-1][0],joints[i[1]-1][1]), (0,255,0), 1)

In [None]:
import ipywidgets
from IPython.display import display


image_w = ipywidgets.Image(format='jpeg', width=224, height=224)
display(image_w)

In [None]:
first_pose = False
second_pose = False
third_pose = False
fourth_pose = False
fifth_pose = False
sixth_pose = False
score = 0


In [None]:
time = 0
score = 0
first_flag = False
second_flag = False
third_flag = False
fourth_flag = False
fifth_flag = False
sixth_flag = False

In [None]:
def execute(change):
    global time, score, first_flag, second_flag, third_flag, fourth_flag, fifth_flag, sixth_flag
    image = change['new']
    data = preprocess(image)
    cmap, paf = model_trt(data)
    cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
    counts, objects, peaks = parse_objects(cmap, paf)
    joints = preprocessdata.joints_inference(image, counts, objects, peaks)
    draw_joints(image, joints)
    #draw_objects(image, counts, objects, peaks)
    dist_bn_joints = preprocessdata.find_distance(joints)
    gesture = clf.predict([dist_bn_joints,[0]*num_parts*num_parts])
    gesture_joints = gesture[0]
    preprocessdata.prev_queue.append(gesture_joints)
    preprocessdata.prev_queue.pop(0)
    
        
    if(time <= 60):
        image = cv2.putText(image, "Ready", org, font, fontScale, color, thickness, cv2.LINE_AA)
        #-----------------------------------------------
        
    if(time > 120 and time <= 133):
        image = cv2.putText(image, "4!", org, font, fontScale, color, thickness, cv2.LINE_AA)
        if(preprocessdata.text == "4"):
            first_flag = True
            
    if(x > 133 and x <= 138 and first_flag):
        if(first_flag):
            image = cv2.putText(image, "Correct!", org, font, fontScale, (0,255,0), thickness, cv2.LINE_AA)
            score = score + 1
        else:
            image = cv2.putText(image, "Wrong!", org, font, fontScale, (0,0,255), thickness, cv2.LINE_AA)
            #-----------------------------------------------
        
        
    if(x > 142 and x <= 155):
        image = cv2.putText(image, "5!", org, font, fontScale, color, thickness, cv2.LINE_AA)
        if(preprocessdata.text == "5"):
            second_flag = True
        
    if(x > 155 and x <= 160):
        if(second_flag):
            image = cv2.putText(image, "Correct!", org, font, fontScale, (0,255,0), thickness, cv2.LINE_AA)
            score = score + 1
        else:
            image = cv2.putText(image, "Wrong!", org, font, fontScale, (0,0,255), thickness, cv2.LINE_AA)
            #-----------------------------------------------
        
    if(x > 168 and x <= 178):
        image = cv2.putText(image, "6!", org, font, fontScale, color, thickness, cv2.LINE_AA)
        if(preprocessdata.text == "6"):
            third_flag = True
        
    if(x > 180 and x <= 185):
        if(third_flag):
            image = cv2.putText(image, "Correct!", org, font, fontScale, (0,255,0), thickness, cv2.LINE_AA)
            score = score + 1
        else:
            image = cv2.putText(image, "Wrong!", org, font, fontScale, (0,0,255), thickness, cv2.LINE_AA)
            #-----------------------------------------------
            
    if(x > 200 and x <= 210):
        image = cv2.putText(image, "1!", org, font, fontScale, color, thickness, cv2.LINE_AA)
        if(preprocessdata.text == "1"):
            fourth_flag = True
        
    if(x > 210 and x <= 215):
        if(fourth_flag):
            image = cv2.putText(image, "Correct!", org, font, fontScale, (0,255,0), thickness, cv2.LINE_AA)
            score = score + 1
        else:
            image = cv2.putText(image, "Wrong!", org, font, fontScale, (0,0,255), thickness, cv2.LINE_AA)
            #-----------------------------------------------
            
    if(x > 220 and x <= 230):
        image = cv2.putText(image, "2!", org, font, fontScale, color, thickness, cv2.LINE_AA)
        if(preprocessdata.text == "2"):
            fifth_flag = True
        
    if(x > 230 and x <= 235):
        if(fifth_flag):
            image = cv2.putText(image, "Correct!", org, font, fontScale, (0,255,0), thickness, cv2.LINE_AA)
            score = score + 1
        else:
            image = cv2.putText(image, "Wrong!", org, font, fontScale, (0,0,255), thickness, cv2.LINE_AA)
            #------------------------------------------------
            
    if(x > 245 and x <= 255):
        image = cv2.putText(image, "3!", org, font, fontScale, color, thickness, cv2.LINE_AA)
        if(preprocessdata.text == "3"):
            sixth_flag = True
        
    if(x > 255 and x <= 260):
        if(sixth_flag):
            image = cv2.putText(image, "Correct!", org, font, fontScale, (0,255,0), thickness, cv2.LINE_AA)
            score = score + 1
        else:
            image = cv2.putText(image, "Wrong!", org, font, fontScale, (0,0,255), thickness, cv2.LINE_AA)
            #-----------------------------------------------
            
    if(x>270):
        score_str = str(score)
        score_show = "Score: " + score_str
        image = cv2.putText(image, score_show, org, font, fontScale, color, thickness, cv2.LINE_AA)
    image_w.value = bgr8_to_jpeg(image[:, ::-1, :])
    x = x + 1
    
    
    image_w.value = bgr8_to_jpeg(image)


In [None]:
def face():
    import jetson.inference
    import jetson.utils
    import time

    net = jetson.inference.detectNet("ssd-mobilenet-v2", threshold=0.5)
    camera = jetson.utils.videoSource("csi://0")      # '/dev/video0' for V4L2
    display = jetson.utils.videoOutput("display://0") # 'my_video.mp4' for file

    while display.IsStreaming():
        img = camera.Capture()
        detections = net.Detect(img)
        for detection in detections:
            if (net.GetClassDesc(detection.ClassID) == "person"):
                print("Game window will start soon...\nPlease go to the game window cell now.")
                time.sleep(10)
                return
        display.Render(img)
        display.SetStatus("Object Detection | Network {:.0f} FPS".format(net.GetNetworkFPS()))

In [None]:
face()

In [None]:
execute({'new': camera.value})

In [None]:
camera.observe(execute, names='value')

In [None]:
#camera.unobserve_all()

In [None]:
#camera.running = False