In [1]:
import cv2
import os
import numpy as np
from numpy import expand_dims

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array


2023-12-08 09:51:04.498029: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
#Defino función de predicción
def prediccion(modelo,imagen,treshold):

    # Reshape para que coincida con el formato de entrada del modelo

     nueva_altura = 224
     nuevo_ancho = 224

     h, w = imagen.shape[:2]

    # Calcular el nuevo tamaño manteniendo la relación de aspecto original
     if h < w:
          nueva_altura_temp = int(nueva_altura * (h / w))
          nuevo_ancho_temp = nuevo_ancho
     else:
          nueva_altura_temp = nueva_altura
          nuevo_ancho_temp = int(nuevo_ancho * (w / h))

     imagen_redimensionada = cv2.resize(imagen,(nuevo_ancho_temp , nueva_altura_temp))

     imagen_redimensionada = imagen_redimensionada / 255.0

     # Crear una imagen en blanco del tamaño objetivo
     imagen_final = np.zeros((nueva_altura, nuevo_ancho, 3), dtype=np.uint8)

     # Calcular las coordenadas para copiar la imagen redimensionada en el centro
     y_offset = (nueva_altura - nueva_altura_temp) // 2
     x_offset = (nuevo_ancho - nuevo_ancho_temp) // 2

     # Copiar la región redimensionada en la ubicación calculada
     imagen_final[y_offset:y_offset + nueva_altura_temp, x_offset:x_offset + nuevo_ancho_temp] = imagen_redimensionada

     imagen_final = np.expand_dims(imagen_final, axis=0)


     # Realizar predicciones
     prediccion_imagen = modelo.predict(imagen_final)

     if prediccion_imagen > treshold:
          print(prediccion_imagen)
          return f"La persona tiene sueño:[{prediccion_imagen}]"
     else:
          print(prediccion_imagen)
          return f"La persona no tiene sueño:[{prediccion_imagen}]"



In [3]:
class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.objness = objness
        self.classes = classes
        self.label = -1
        self.score = -1
        
    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)
        
        return self.label
    
    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]
        
        return self.score


def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

In [4]:
def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
    grid_h, grid_w = netout.shape[:2] # 0 and 1 is row and column 13*13
    nb_box = 3 # 3 anchor boxes
    netout = netout.reshape((grid_h, grid_w, nb_box, -1)) #13*13*3 ,-1
    nb_class = netout.shape[-1] - 5
    boxes = []
    netout[..., :2]  = _sigmoid(netout[..., :2])
    netout[..., 4:]  = _sigmoid(netout[..., 4:])
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
    netout[..., 5:] *= netout[..., 5:] > obj_thresh
    
    for i in range(grid_h*grid_w):
        row = i / grid_w
        col = i % grid_w
        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[int(row)][int(col)][b][4]
            if(objectness.all() <= obj_thresh): continue
            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[int(row)][int(col)][b][:4]
            x = (col + x) / grid_w # center position, unit: image width
            y = (row + y) / grid_h # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
            # last elements are class probabilities
            classes = netout[int(row)][col][b][5:]
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
            boxes.append(box)
    return boxes


def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
    new_w, new_h = net_w, net_h
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)

In [5]:
def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b
    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
            return 0
        else:
            return min(x2,x4) - x3

#intersection over union        
def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
    intersect = intersect_w * intersect_h
    
    
    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin  
    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
    
    #Union(A,B) = A + B - Inter(A,B)
    union = w1*h1 + w2*h2 - intersect
    return float(intersect) / union

In [6]:
def do_nms(boxes, nms_thresh):    #boxes from correct_yolo_boxes and  decode_netout
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return
    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])
        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]
            if boxes[index_i].classes[c] == 0: continue
            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]
                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0

In [7]:
# load and prepare an image
def load_image_pixels(image, shape):
    width, height = image.size
    # load the image with the required size
    image = load_img(filename, target_size=shape) # target_size argument to resize the image after loading
    # convert to numpy array
    image = img_to_array(image)
    # scale pixel values to [0, 1]
    image = image.astype('float32')
    image /= 255.0  #rescale the pixel values from 0-255 to 0-1 32-bit floating point values.
    # add a dimension so that we have one sample
    image = expand_dims(image, 0)
    return image, width, height

In [8]:
# draw all results
#def draw_boxes(image, v_boxes, v_labels, v_scores):
def draw_boxes(image, v_boxes, label):
    #load the image
    img = image
    for i in range(len(v_boxes)):
        # retrieving the coordinates from each bounding box
        box = v_boxes[i]
        # get coordinates
        y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
        start_point = (x1, y1) 
        # Ending coordinate
        # represents the bottom right corner of rectangle 
        end_point = (x2, y2) 
        # Red color in BGR 
        color = (0, 0, 255) 
        # Line thickness of 2 px 
        thickness = 2
        # font 
        font = cv2.FONT_HERSHEY_PLAIN 
        # fontScale 
        fontScale = 1.5
        #create the shape
        img = cv2.rectangle(img, start_point, end_point, color, thickness) 
        # draw text and score in top left corner
        #label = "%s (%.3f)" % (v_labels[i], v_scores[i])
        img = cv2.putText(img, label, (x1,y1), font,  
                   fontScale, color, thickness, cv2.LINE_AA)
    return img
    # show the plot
    #output = "outputs/"+filename.rsplit("/")[1].rsplit(".")[0]+'_yolov3.jpg'
    #save the image
    #cv2.imwrite(output,img)
    #cv2.imshow("yolov3",img)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()

In [9]:
# get all of the results above a threshold
def get_boxes(boxes, labels, thresh):
    v_boxes, v_labels, v_scores = list(), list(), list()
    # enumerate all boxes
    for box in boxes:
        # enumerate all possible labels
        for i in range(len(labels)):
            # check if the threshold for this label is high enough
            if box.classes[i] > thresh:
                v_boxes.append(box)
                v_labels.append(labels[i])
                v_scores.append(box.classes[i]*100)
    
    return v_boxes, v_labels, v_scores

In [10]:
def img_crop(img, v_boxes):
    # Create a directory to save cropped images
    #output_dir = "outputs/"
    #os.makedirs(output_dir, exist_ok=True)

    for i, box in enumerate(v_boxes):
        y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax

        # Crop the region inside the bounding box
        cropped_region = img[y1:y2, x1:x2]
        # Save the cropped region as a new image
        #output_path = os.path.join(output_dir, f"box_{i+1}.jpg")
        #cv2.imwrite(output_path, cropped_region)
    return cropped_region

In [11]:
def extractFace(yhat,frame):
    input_w, input_h = 416, 416
    image_w, image_h = frame.shape[0],frame.shape[1]
    # define the anchors
    anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]  

    # define the probability threshold for detected objects
    class_threshold = 0.6
    labels = ["face"]
    boxes = list()
    for i in range(len(yhat)):
        # decode the output of the network
        boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
        
    # correct the sizes of the bounding boxes for the shape of the image
    correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)

    # suppress non-maximal boxes
    do_nms(boxes, 0.5)  #Discard all boxes with pc less or equal to 0.5

    # get the details of the detected objects
    v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)


    # draw what we found
    # draw_boxes(frame, v_boxes, v_labels, v_scores)
    extractedFace = img_crop(frame,v_boxes)
    return extractedFace,v_boxes

In [12]:
def preprocessFrame(frame):
    YOLO_INPUT_WIDTH = 416
    YOLO_INPUT_HEIGHT = 416
    # Resize the frame to match the input size expected by the YOLO model
    resized_frame = cv2.resize(frame, (YOLO_INPUT_WIDTH, YOLO_INPUT_HEIGHT))

    # Normalize pixel values to be in the range [0, 1]
    normalized_frame = resized_frame / 255.0

    # Expand dimensions to add batch dimension
    input_frame = np.expand_dims(normalized_frame, axis=0)

    return input_frame    

In [13]:
projectPath=os.getcwd()
#modelo_cargado = load_model('VGGModel.keras')
modelo_cargado = load_model(os.path.join(projectPath,'Saved_model_vgg'))
YoloModel = load_model('YoloModel.h5')



In [None]:
try:
    # Open the webcam (default camera index is usually 0)
    cap = cv2.VideoCapture(0)
    # Check if the webcam is opened successfully
    if not cap.isOpened():
        print("Error: Could not open webcam.")
        exit()

    while True:
        # Read a frame from the webcam
        ret, frame = cap.read()
        #frame = cv2.imread('despierta.jpg')
        #frame = cv2.imread('con_sueno.jpg')
        preprocessedFrame=preprocessFrame(frame)
        facePrep = YoloModel.predict(preprocessedFrame)
        try:
            extractedFace, b_boxes = extractFace(facePrep,frame)
            pred = prediccion(modelo_cargado, extractedFace, 0.30)
            draw_boxes(frame, b_boxes, pred)
            #print(pred)
        except UnboundLocalError as e:
            # Handle the exception
            print(f"No se encuentra el rostro...")
        # Additional handling or logging if needed
        # Display the frame
        cv2.imshow('Webcam', frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the webcam and close the window
    cap.release()
    cv2.destroyAllWindows()
except Exception as e:
    # Release the webcam and close the window
    cap.release()
    cv2.destroyAllWindows()
    print(f"{e}")




[[0.71483433]]
[[0.4847006]]
[[0.47060916]]
[[0.5198218]]
[[0.43321314]]
[[0.40692422]]
[[0.43521482]]
[[0.44485804]]
[[0.46086264]]
[[0.44649208]]
[[0.43192425]]
[[0.50981086]]
[[0.52744895]]
[[0.57972217]]
[[0.50340515]]
[[0.42782634]]
[[0.5220216]]
[[0.5364653]]
[[0.5316587]]
[[0.5389854]]
[[0.48444596]]
[[0.48090154]]
[[0.5052455]]
[[0.38513172]]
[[0.4479109]]
[[0.40359777]]
[[0.4909784]]
[[0.4667524]]
[[0.5210157]]
[[0.48775986]]
[[0.51241475]]
[[0.48091495]]
[[0.53586715]]
[[0.4806508]]
[[0.45762974]]
[[0.5117499]]
[[0.4642168]]
[[0.4791865]]
[[0.5530231]]
[[0.63025904]]
[[0.57688576]]
[[0.6024304]]
[[0.58514184]]
[[0.6004601]]
[[0.5938042]]
[[0.9249848]]
[[0.6689504]]
[[0.71483433]]
[[0.7272364]]
[[0.5969177]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.71483433]]
[[0.628651]]
[[0.6853669]]
[[0.6408761]]
[[0.65062094]]
OpenCV(4.8.1) /Users/r

: 