In [23]:
import cv2
import imageio
import torch
from cvzone.HandTrackingModule import HandDetector
from facenet_pytorch import MTCNN
import numpy as np
from torchvision import models
import torchvision.transforms as tt

import matplotlib.pyplot as plt
from PIL import Image



cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)
offset = 30

weights = models.ResNet18_Weights.IMAGENET1K_V1
preprocess = weights.transforms()


class FaceDetector(object):
    """
    Face detector class
    """
    
    def __init__(self, mtcnn):
        self.mtcnn = mtcnn
        self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
        self.gesture_model = torch.load('./model___.pth').to(self.device)
        self.gesture_model.eval()
        
    def _draw(self, frame, boxes, probs, landmarks, gestures):
        """
        Draw landmarks and boxes for each face detected
        """
        try:
            for box, prob, ld, gesture in zip(boxes, probs, landmarks, [gestures]):
                # Draw rectangle on frame
                cv2.rectangle(frame,
                              (int(box[0]), int(box[1])),
                              (int(box[2]), int(box[3])),
                              (0, 0, 255),
                              thickness=2)

                # Show probability
                cv2.putText(frame, gesture,
                            (int(box[1]), int(box[2])),
                            cv2.FONT_HERSHEY_COMPLEX,
                            0.5, (0, 255, 128), 1,
                            cv2.LINE_AA)

                # Draw landmarks
                cv2.circle(frame, (int(ld[0][0]), int(ld[0][1])), 1, (0, 0, 255), -1)
                cv2.circle(frame, (int(ld[1][0]), int(ld[1][1])), 1, (0, 0, 255), -1)
                cv2.circle(frame, (int(ld[2][0]), int(ld[2][1])), 1, (0, 0, 255), -1)
                cv2.circle(frame, (int(ld[3][0]), int(ld[3][1])), 1, (0, 0, 255), -1)
                cv2.circle(frame, (int(ld[4][0]), int(ld[4][1])), 1, (0, 0, 255), -1)
        except:
            pass
        
        return frame
    
    
    @staticmethod
    def digit_to_classname(digit):
        if digit == 0:
            return 'palm'
        elif digit == 1:
            return 'l'
        elif digit == 2:
            return 'fist'
        elif digit == 3:
            return 'fist_moved'
        elif digit == 4:
            return 'thumb'
        elif digit == 5:
            return 'index'
        elif digit == 6:
            return 'ok'
        elif digit == 7:
            return 'palm_moved'
        elif digit == 8:
            return 'c'
        elif digit == 9:
            return 'down'
        else:
            return None

        
    def run(self):
        gif = []
        
        while True:
            success, image = cap.read()
            
            # обнаружение лица
            boxes, probs, landmarks = self.mtcnn.detect(image, landmarks=True)
            
            
            # если обанружено лицо
            try:
                if boxes.all():
                    # обнаружение рук
                    hands = detector.findHands(image, draw=False)

                    try:
                        hand = hands[0]
                        x, y, w, h = hand['bbox']

                        imgCrop = image[y-offset:y+h+offset, x-offset:x+w+offset]
#                         imgCrop = cv2.resize(imgCrop, (224, 224))
                        cv2.imshow('HAND', imgCrop)
                        imgCrop = tt.ToTensor()(imgCrop)
                        imgCrop = preprocess(imgCrop)

                        imgCrop = imgCrop.to(self.device)
                        
                        gesture = self.gesture_model(imgCrop[None, ...])
                        gesture = self.digit_to_classname(gesture.argmax())

                    except:
                        gesture = 'No hands'
                        
                    
                    # draw on frame
                    self._draw(image, boxes, probs, landmarks, gesture)
            except:
                pass
                
                    
            

            # Show the frame
            cv2.imshow('Image', image)
            cv2.waitKey(1)
            
            if cv2.waitKey(1) & 0xFF == ord('s'):
                print('append image to gif')
                gif.append(image)

            elif cv2.waitKey(1) & 0xFF == 27:
                with imageio.get_writer("final.gif", mode='I') as writer:
                    for idx, image in enumerate(gif):
                        print("Adding frame to GIF file: ", idx + 1)
                        writer.append_data(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                break

        cap.release()
        cv2.destroyAllWindows()
        
        
mtcnn = MTCNN()
fcd = FaceDetector(mtcnn)
fcd.run()