In [1]:
import numpy as np
import cv2
import face_recognition
import os
import dialogflow
import time

#For Emotion Detection
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten,Conv2D,MaxPooling2D

#For speech recognition and audio conversion
import speech_recognition as sr
from playsound import playsound
import win32com.client

In [2]:
#Reading all the Input Image names for training that is present inside the images directory(File name is used as the name of the person)
path = 'C://Users//pc//Desktop//Data Science & Kaggle//Baymax//images//'
known_face_names = []
for r, d, f in os.walk(path):
    for file in f:
        if '.jpg' in file:            
            known_face_names.append(file.split('.')[0])
            
#Loading each image file and calculating face encoding
known_face_encodings = []
for i in known_face_names:
        image = face_recognition.load_image_file("images/"+i+".jpg")
        face_encoding = face_recognition.face_encodings(image)[0]
        known_face_encodings.append(face_encoding)

In [3]:
face_locations = []
face_encodings = []
face_names = []

def face_recognize(frame):
    """Function to Recognize all the known persons from the Image"""
    
    Name = None
        
    #resizing
    small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)

    rgb_small_frame = small_frame[:, :, ::-1]
    
    #Finding the face Encodings for the faces in the image
    face_locations = face_recognition.face_locations(rgb_small_frame)
    face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
    
    #Comparing with the trained images and finding a best match
    face_names = []
    for face_encoding in face_encodings:
        matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
        name = "Friend"

        face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
        best_match_index = np.argmin(face_distances)
        if matches[best_match_index]:
            name = known_face_names[best_match_index]
        face_names.append(name)
        Name=name
        
    return (face_locations, face_names,Name)

In [4]:
#Convolutional Neural Networks Model for Emotion Detection
model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(7, activation='softmax'))

model.load_weights('model.h5')

emotion_dict = {0: "Angry", 1: "Disgusted", 2: "Fearful", 3: "Happy", 4: "unexcited", 5: "Sad", 6: "Surprised"}

In [5]:
#Function for finding the Emotion for the detected faces.
def emotion(frame):
    facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5)
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
        roi_gray = gray[y:y + h, x:x + w]
        cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
        prediction = model.predict(cropped_img)
        maxindex = int(np.argmax(prediction))
        return emotion_dict[maxindex] 

In [6]:
#function for conversion of text into Audio (using Microsoft Speech API)
speaker = win32com.client.Dispatch("SAPI.SpVoice")
def audio(mytext):  
    speaker.Speak(mytext)

In [7]:
#function for conversion of audio to text (using google speech api)
def audtotext():
    r = sr.Recognizer()  
    with sr.Microphone() as source:
        playsound("notification.mp3")
        r.adjust_for_ambient_noise(source)
        audi = r.listen(source)        
        try:
            text = r.recognize_google(audi) 
        except:
            text = "xxxxx"
            return text
    return text

In [8]:
#Dialogflow Connection Details
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = 'Baymax-f894a3f33511.json'
DIALOGFLOW_PROJECT_ID = 'baymax-dkmasl'
DIALOGFLOW_LANGUAGE_CODE = 'en'
SESSION_ID = 'me'
session_client = dialogflow.SessionsClient()
session = session_client.session_path(DIALOGFLOW_PROJECT_ID, SESSION_ID)

In [9]:
def fun(name_emot):
    det = list(name_emot.items())
    if (det[0][1]==None):
        return (det[0][0],"unexcited")
    else:
        return (det[0][0],det[0][1])    


#Chatbot Function for the chat
def speek():
    while(True):
        text_to_be_analyzed = audtotext()
        if text_to_be_analyzed=="xxxxx":
            audio("Sorry could not recognize your voice please give text input")
            text_to_be_analyzed = input()
            
        else:
            t = input("you have said "+text_to_be_analyzed+" is this correct? (yes/no)")
            if(t.lower()=="no" or t.lower()=="wrong"or t.lower()=="incorrect"):
                text_to_be_analyzed=input("Enter the correct sentence")
        
        text_input = dialogflow.types.TextInput(text=text_to_be_analyzed, language_code=DIALOGFLOW_LANGUAGE_CODE)
        query_input = dialogflow.types.QueryInput(text=text_input)
        try:
            response = session_client.detect_intent(session=session, query_input=query_input)
        except InvalidArgument:
            raise
        
        audio(response.query_result.fulfillment_text)
        
        print("Baymax: ",response.query_result.fulfillment_text)
        
        res = text_to_be_analyzed.lower()
    
        if(res=='bye' or res=='goodbye' or res=='bye bye'):
            return

In [10]:
video_capture = cv2.VideoCapture(0)
while True:
    #capturing the Frame from the camera       
    ret, frame = video_capture.read()
    
    #finding all the faces
    face_locations, face_names,name = face_recognize(frame.copy())   
    no_human = True
    
    name_emot = {}
    
    if name!=None:
        no_human = False
    
    if no_human:
        audio("Hello I can't see you please come closer")
        print("Baymax: ","Hello I can't see you please come closer")
        time.sleep(2)
    
    else:
        
        #Detecting the Experession of the Face
        
        emt = emotion(frame.copy())
        name_emot[name] = emt
        name,expression = fun(name_emot)
        
        #get the response for the person from the dialogflow server
        
        text_input = dialogflow.types.TextInput(text=expression, language_code=DIALOGFLOW_LANGUAGE_CODE)
        query_input = dialogflow.types.QueryInput(text=text_input)
        
        try:
            response = session_client.detect_intent(session=session, query_input=query_input)
        except:
            print("Some Error")
            
        audio("Hi "+name+" Looks Like you are "+expression+" "+response.query_result.fulfillment_text)
        print("Baymax: ","Hi "+name+" Looks Like you are "+expression+" "+response.query_result.fulfillment_text)
        
        #After detecting start the chat
        speek()
        
        break
            
video_capture.release()
cv2.destroyAllWindows()

Baymax:  Hello I can't see you please come closer
Baymax:  Hello I can't see you please come closer
Baymax:  Hello I can't see you please come closer
Baymax:  Hello I can't see you please come closer
Baymax:  Hello I can't see you please come closer
Baymax:  Hello I can't see you please come closer
Baymax:  Hi Aakash Looks Like you are Angry Do you feel disappointed ? Because Anger and diappoinment ups your stroke risk. So try to be happy
you have said fever is this correct? (yes/no)yes
Baymax:  Stay in bed rest. Remove extra layers of clothing and blankets, unless you have the chills.Take hot water for Bath.You will feel better
you have said bhai is this correct? (yes/no)no
Enter the correct sentencebye
Baymax:  Bye bye  Love you


<b>Reference:</b><br>
https://github.com/ageitgey/face_recognition

https://github.com/ageitgey/face_recognition/issues/175#issue-257710508

<b>Requirements</b><br>
https://pypi.org/project/cmake/

https://pypi.org/project/dlib/

https://pypi.org/project/face-recognition/

https://pypi.org/project/SpeechRecognition/

https://pypi.org/project/playsound/

https://pypi.org/project/pywin32/