In [1]:
import cv2
import os
from time import time
from PIL import Image
import matplotlib.pyplot as plt
from pynput import keyboard

In [2]:
import torch as tr
from model_vgg16 import VGG16
from torchvision import transforms


In [3]:
def face_detection(frame):
    face_classifier = cv2.CascadeClassifier(
        cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
    )

    # ora applichiamo il classificatore alla nostra immagine
    face = face_classifier.detectMultiScale(
        frame, scaleFactor=1.1, minNeighbors=5, minSize=(40, 40)
    )

    
    faces_area = []
    for (x, y, w, h) in face:
        frame = cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 4)
        face_surface = frame[y+5:y + h-5, x+5:x + w-5]
        resized_image = cv2.resize(face_surface,(224,224), interpolation= cv2.INTER_AREA)
        faces_area.append(resized_image)
        
    return faces_area, face

In [4]:
device = 'cuda' if tr.cuda.is_available() else 'cpu'

#ricordati che vale solo per questo modello, aggiorna il dataset con questo dict
labels = {'surprise': 0, 'fear': 1, 'angry': 2, 'neutral': 3, 'sad': 4, 'disgust': 5, 'happy': 6}

# load model
model = VGG16(7).to(device)

checkpoint_sd = tr.load('./model2.pth', map_location=device)
model.load_state_dict(checkpoint_sd)

model.eval()


VGG16(
  (layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=

In [8]:
def doSomeMagic(image : Image.Image, model):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    processed_image = transform(image)

    input_image = processed_image.unsqueeze(0)  # Add a batch dimension

    # Perform inference
    with tr.no_grad():
        output = model(input_image.to(device))
        

    # Interpret the results
    predicted_class = tr.argmax(output, dim=1).item()
    getLabel = lambda value: next((key for key, val in labels.items() if val == value), None)
    predicted_label =  getLabel(predicted_class)
    
    return predicted_label

In [10]:
video_captor = cv2.VideoCapture(0)
predicts = []
while True:
    ret, frame = video_captor.read()

    face_img, face_coor = face_detection(frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break

    if cv2.waitKey(1) & 0xFF == ord('s'):
        predicts =[]
        face_img, face_coor = face_detection(frame)

        if face_img is not None:
            if not os.path.exists("./camera_output"):
                os.mkdir("./camera_output")
            for img in face_img:
                img_path = os.path.join("./camera_output", f"face_image{time()}.jpg")
                cv2.imwrite(img_path, img)
                gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
                gray = cv2.resize(gray, (224, 224))
                predicts += [doSomeMagic(Image.open(img_path), model)]
                
    if face_coor is not None:
        for (x, y, w, h), predict in zip(face_coor, predicts):
            if predict:
                cv2.putText(
                    frame, predict, (x+ w//2, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2
                )
    cv2.imshow("Sorridi!", frame)