# Interfacing the CNN Model with the Webcam:

Installing all the Dependencies:

In [None]:
! pip install tensorflow
! pip install opencv-python

In this section we will be interfacing the CNN model, that was previously trained with the webcam of our computer

In [1]:
# Importing the dependencies

import numpy as np
import cv2 as cv
import tensorflow as tf
import tensorflow.keras as keras

* At first, we will be writing a function to load and return a version of the CNN model
* The function signature will be: `def load_model(model_path)`

In [2]:
def load_model(model_path):
    model = keras.models.load_model(model_path)
    return model

* Now we will be writing a function, that will take a grayscale image of shape (50,50,1), feed it into the CNN model and return the prediction tuple
* The function signature will be of the format `def model_predict(model,image)` and will return a tuple of the format `(non_drowsy_percent,drowsy_percent)`

In [3]:
def model_predict(model,image):
    values = model.predict(tf.expand_dims(image, axis=0),verbose = 0)
    non_drowsy,drowsy = values[0]
    return (non_drowsy,drowsy)

* Loading the Frontal Face Cascade Model which is used to detect human faces and is maintained by OpenCV
* This model returns the coordinates of the human face, which we will use accordingly:

In [4]:
face_detector = cv.CascadeClassifier('haarcascade_frontalface_default.xml')

* Now we will be writing a function, that will preprocess the frame that is read from the webcam in order to make it fit for our CNN model:

In [5]:
def frame_preprocess(frame):

    # Converting the frame to Grayscale:
    frame = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)

    # Resizing the frame to 50x50 dimension:
    frame = cv.resize(frame,(50,50))

    # Normalzing the frame:
    frame = cv.normalize(frame,None,alpha=0.0,beta=1.0,norm_type=cv.NORM_MINMAX,dtype=cv.CV_32F)

    return frame

Code cell to get metadata about webcam video:

In [6]:
webcam_video = cv.VideoCapture(0)
original_fps = webcam_video.get(cv.CAP_PROP_FPS)
print("Original FPS = ",original_fps)

# Displaying the total frame count:
total_frame = webcam_video.get(cv.CAP_PROP_FRAME_COUNT)
print("Total Frames = ",total_frame)

for i in range(1):
    flag, frame = webcam_video.read()
    print(frame.shape)

webcam_video.release()

Original FPS =  30.0
Total Frames =  -1.0
(480, 640, 3)


* On my machine, the webcam video is read at a rate of 30 FPS
* Now we will be writing the driver function of our application:

In [30]:
def webcam_driver():

    # Loading the model and storing it in a variable:
    model = load_model(r"model")

    # Now, let us access the webcam of our computer:
    # webcam_video = cv.VideoCapture(r"C:\Users\DEBAYAN\Downloads\test_video.mp4")
    webcam_video = cv.VideoCapture(0)

    # Now we will be performing 4 major operations: Reading the frames, processing it at 1 FPS, displaying the output at 25 FPS and imprinting the metrics in the images itself:

    # Frame counter:
    i = 0

    while(True):

        # Reading the frame:
        flag, frame = webcam_video.read()

        if(i==0 or i%60==0):

            # Predictions list:
            predictions = [0,0]
        
            # Variable to store the text to be displayed on the screen:
            metrics_text = f"Face Not Aligned With Camera"
            
            # Detecting the Face using the Haar Frontal Face Detection Model:
            face_coordinates = face_detector.detectMultiScale(frame, scaleFactor=1.1, minNeighbors=9,minSize=(45,45))

            # Slicing the original image array to just extract the face using the coordinates that was returned by the detectMultiScale method:
            face_image = []

            if len(face_coordinates)>0:
                x,y,w,h = face_coordinates[0]
                face_image = frame[y:y+h+10,x:x+w]
            else:
                face_image =[]

            face_image = np.asarray(face_image)

            # Preprocessing the frame to feed it to the neural network:
            if(np.size(face_image)):
                CNN_frame = frame_preprocess(face_image)

                # Sending the processed frame to the model_predict function for predictions:
                predictions = list(model_predict(model,CNN_frame))

                # Rounding off the prediction values:
                predictions[0] = round(predictions[0]*100,4)
                predictions[1] = round(predictions[1]*100,4)

                # Making changes to the output text:
                metrics_text = f"Drowsy: {predictions[1]}% Non-Drowsy: {predictions[0]}%"

        # Writing the metrics onto image:
        frame = cv.putText(frame, metrics_text, (280, 50), cv.FONT_HERSHEY_SIMPLEX, 0.50, (0,0,255), 2)
        
        # Also for visual understanding, constructing a rectangle around the face:
        if (len(face_coordinates)>0):
            for (x, y, w, h) in face_coordinates:
                cv.rectangle(frame, (x, y), (x+w, y+h+10), (0, 0, 255), 1)

        # Displaying the frame:
        cv.imshow('Drowsiness Detection', frame)

        # This condition will stop the application when "q" is pressed:
        if cv.waitKey(20) & 0xFF == ord('q'):
            break

        # Increasing the frame counter:        
        i+=1

        # Refreshing the frame variable to avoid memory overflow:
        if(i==30000):
            i=0

    # Releasing the video capture object:
    webcam_video.release()

    # Destroying all created windows:
    cv.destroyAllWindows()


Driver Code:

In [32]:
webcam_driver()