In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import torch
import tensorflow as tf

from tensorflow.keras.models import Model,Sequential, load_model,model_from_json
from tensorflow.compat.v1.keras.backend import set_session 
from PIL import Image
from PIL import Image
from torchvision import transforms

# Importar módulo facial_analysis.py
from facial_analysis import FacialImageProcessing

from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM
from pytorch_grad_cam.utils.image import show_cam_on_image

%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

class FacialEmotionRecognition:
    
    def __init__(self):
        
        %env CUDA_VISIBLE_DEVICES=0
        
        # Configuración de la sesión
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True
        sess=tf.compat.v1.Session(config=config)
        set_session(sess)

        # Comprobar si hay cuda o si se ha de usar la cpu
        self.use_cuda = torch.cuda.is_available()
        print("Cuda: ", self.use_cuda)
        self.device = 'cuda' if self.use_cuda else 'cpu'

        # Utilización modelo de procesado de la imagen
        self.imgProcessing = FacialImageProcessing(False)
                        
        # El modelo escogido es enet_b2_7.pt, que predice las 7 emociones básicas
        NUM_EMOTIONS = 7
        self.IMG_SIZE = 260
        models_path,_ = os.path.split(os.path.realpath(__file__))
        PATH=os.path.join(models_path,'models','affectnet_emotions','enet_b2_7.pt')
        self.idx_to_class = {0: 'Anger', 1: 'Disgust', 2: 'Fear', 3: 'Happiness', 4: 'Neutral', 5: 'Sadness', 6: 'Surprise'}

        # Transformar la imagen a tensor para hacer inferencia
        self.test_transforms = transforms.Compose(
            [
                transforms.Resize((self.IMG_SIZE, self.IMG_SIZE)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
            ]
        )
        # print(PATH)
        self.model = torch.load(PATH,map_location=torch.device(self.device)) # Cargar el modelo en la cpu
        self.model = self.model.to(self.device)
        # model.eval()
        
    def FramePrediction(self, frame=None, heat_map=False):
        
        scores = []
        max_score = []
        
        # Comprobar si hay frame
        if frame is not None:
            
            # Predicción de las caras de la imagen y plot de las mismas junto a sus predicciones
            frame_bgr=frame
            # plt.figure(figsize=(5, 5))
            frame = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
            # plt.axis('off')
            # plt.imshow(frame)
            bounding_boxes, points = self.imgProcessing.detect_faces(frame)
            points = points.T
            
            cont_faces = 1
            
            # Comprobar si hay caras
            if bounding_boxes.any(): 

                for bbox,p in zip(bounding_boxes, points):
                    box = bbox.astype(np.int)
                    x1,y1,x2,y2=box[0:4]    
                    face_img=frame[y1:y2,x1:x2,:]

                    img_tensor = self.test_transforms(Image.fromarray(face_img))
                    img_tensor.unsqueeze_(0)
                    scores = self.model(img_tensor.to(self.device))
                    scores=scores[0].data.cpu().numpy()
                    max_score=self.idx_to_class[np.argmax(scores)]
                    
                    plt.figure(figsize=(3, 3))
                    plt.axis('off')
                    plt.imshow(face_img)
                    plt.title(max_score)
                    print("Score cara {}: ".format(cont_faces),scores)
                    plt.show()

                    cont_faces = cont_faces+1

                    # Para ver el mapa de calor
                    if heat_map == True:

                        # Para ver el mapa de calor (GradCAM), que ayuda a detectar las regiones 
                        # consideradas importantes por la red neuronal para realizar la predicción

                        target_layers = [self.model.blocks[-1][-1]]
                        # Construct the CAM object once, and then re-use it on many images:
                        cam = GradCAM(model=self.model, target_layers=target_layers, use_cuda=self.use_cuda)

                        grayscale_cam = cam(input_tensor=img_tensor)
                        grayscale_cam = grayscale_cam[0, :]
                        face_img=cv2.resize(face_img,(self.IMG_SIZE, self.IMG_SIZE))
                        rgb_img = np.float32(face_img) / 255
                        visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)

                        plt.figure(figsize=(3, 3))
                        plt.axis('off')
                        plt.imshow(visualization)
                        plt.title(max_score)  
                        plt.show()
                                      
            # Si no hay caras
            else: print("There is no faces!")
           
        # Si no hay imagen (frame)
        else: print("There is no image!")
            
        return scores, max_score            