In [1]:
import torch
import time

def time_synchronized():
    # pytorch-accurate time
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    return time.time()

In [2]:
import argparse
from tqdm import tqdm

from modules.target_person_detection import target_person_detection
from modules.crop_target_box import crop_target_box
from modules.gesture_recognition import gesture_recognition
from modules.gesture_synthesis import gesture_synthesis
from select_device import select_device
from load_model import load_targets_model, load_gestures_model
from LoadImages import LoadImages
from inference_prep import img_prep, warmup

def main(opt):
    target, img_source, show_plots, save = opt.target, opt.img_source, opt.show_plots, opt.save
    results = {}
    speed = []
    
    # Initialize
    device = select_device('0')
    half = device.type != 'cpu' 
    
    # Load all models and prep everything that I'll need for inference
    targets_model, stride = load_targets_model(target, device, half)
    gestures_model, stride_g = load_gestures_model(device, half)
    
    # Load DataLoader
    dataset = LoadImages(img_source, img_size=1024, stride=stride)
    print(dataset)
    
    old_img_w = old_img_h = 1024
    old_img_b = 1
    
    for path, img, im0s in tqdm(dataset):
        # print(f"img {path}")
        # print(f"SHAPE UNCROPPED: {im0s.shape}")
        img = img_prep(img, device, half)
        warmup(targets_model, device, old_img_b, old_img_h, old_img_w, img)
        # print(f"SHAPE UNCROPPED: {img.shape}")
        
        # Start timer
        t1 = time_synchronized()

        # Detect target person on imgs
        person_bboxs = target_person_detection(targets_model, path, img, im0s, opt.conf_thres)
        # print(f"target detection completed; bboxs: {person_bboxs}")
        # t2 = time_synchronized()
        
        # Extract
        person_extracted_img = crop_target_box(person_bboxs, im0s)
        # if not isinstance(person_extracted_img, int):
        #     print(f"target crop completed; img shape: {person_extracted_img.shape}")
        # t3 = time_synchronized()
        
        if isinstance(person_extracted_img, int) and person_extracted_img == -1:
            # print("no target on img detected")
            p = -1
            # t6 = time_synchronized()
        else:
            # Detect gestures
            # print(f"SHAPE CROPPED: {person_extracted_img.shape}")
            img = img_prep(person_extracted_img, device, half, cropped_img=True, stride=stride_g)
            # print(f"SHAPE CROPPED: {img.shape}")
            # warmup(gestures_model, device, old_img_b, old_img_h, old_img_w, img)
            
            gestures_preds = gesture_recognition(gestures_model, path, img, person_extracted_img, opt.conf_thres)
            # print(f"gestures recognition completed; preds: {gestures_preds}")
            # t4 = time_synchronized()
            
            # Classify
            p = gesture_synthesis(gestures_preds)
            # print(f"gestures synthesis completed; p: {p}")
            # t5 = time_synchronized()
        
        # Calculate speed
        # if t5:
        #     full_time = 1E3 * (t5 - t1)
        #     p_det_time = 1E3 * (t2 - t1)
        #     p_extr_time = 1E3 * (t3 - t2)
        #     g_det_time = 1E3 * (t4 - t3)
        #     g_cls_time = 1E3 * (t5 - t4)
        # else:
        #     full_time = 1E3 * (t6 - t1)
        #     p_det_time = 1E3 * (t2 - t1)
        #     p_extr_time = 1E3 * (t6 - t2)
        
        t2 = time_synchronized()
        full_time = 1E3 * (t2 - t1)
        speed.append(full_time)
        results[path] = p
        
        
    # Print final results
    print(results)
    print(speed)
    
#         # Display plots if enabled
#         if show_plots:
#             # TODO: add plotting where each row: |original | target detected | cut | gesture detected | prediction p |
#         # Save results if enabled
        
#     if(save):
#         # TODO: add saving

In [3]:
import argparse

opt = argparse.Namespace(
    target=0,
    img_source="./../datasets/SIGGI/full/0_small",
    conf_thres=0.231,
    show_plots=False,
    save=False
)

main(opt)

Fusing layers... 
IDetect.fuse


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
  return F.conv2d(input, weight, bias, self.stride,


Fusing layers... 
IDetect.fuse
<LoadImages.LoadImages object at 0x7fc5fd7165f0>


10it [00:10,  1.03s/it]

{'/home/jovyan/Person-Tailored Gesture Classification/../datasets/SIGGI/full/0_small/075823f53f.jpg': 0, '/home/jovyan/Person-Tailored Gesture Classification/../datasets/SIGGI/full/0_small/185fcae3be.jpg': 2.0, '/home/jovyan/Person-Tailored Gesture Classification/../datasets/SIGGI/full/0_small/236ba0daed.jpg': 0, '/home/jovyan/Person-Tailored Gesture Classification/../datasets/SIGGI/full/0_small/2b70b73c73.jpg': 4.0, '/home/jovyan/Person-Tailored Gesture Classification/../datasets/SIGGI/full/0_small/474d3ae1fe.jpg': 0, '/home/jovyan/Person-Tailored Gesture Classification/../datasets/SIGGI/full/0_small/4a8512a9e3.jpg': 0.0, '/home/jovyan/Person-Tailored Gesture Classification/../datasets/SIGGI/full/0_small/523fe5470b.jpg': 4.0, '/home/jovyan/Person-Tailored Gesture Classification/../datasets/SIGGI/full/0_small/5cc8062e8e.jpg': 0.0, '/home/jovyan/Person-Tailored Gesture Classification/../datasets/SIGGI/full/0_small/62f5f46069.jpg': 0, '/home/jovyan/Person-Tailored Gesture Classification/


