In [None]:
from time import sleep
from furhat_remote_api import FurhatRemoteAPI
import speech_recognition as sr
import cv2
import opencv_jupyter_ui as jcv2
from feat.utils import FEAT_EMOTION_COLUMNS
from feat import Detector
from train_model import train_model

# Detector choice
model = train_model()
detector = Detector(device="cuda")

In [None]:
# Furhat IP address
FURHAT_IP = "127.0.1.1"

# Connect to Furhat
furhat = FurhatRemoteAPI(FURHAT_IP)
furhat.set_led(red=100, green=50, blue=50)

# Furhat faces and voices
FACES = {'Bartender': 'Brooklyn'}
VOICES_EN = {'Bartender': 'GregoryNeural'}

emotion_detection_active = False

# Function to start emotion detection
def start_emotion_detection():
    global emotion_detection_active
    emotion_detection_active = True

# Function to stop emotion detection
def stop_emotion_detection():
    global emotion_detection_active
    emotion_detection_active = False

# Furhat speech
def bsay(line):
    furhat.say(text=line, blocking=True)
    sleep(1) 

# Speech recognition setup
recognizer = sr.Recognizer()

# Function to recognize speech
def recognize_speech():
    with sr.Microphone() as source:
        print("Say something!")
        audio = recognizer.listen(source, timeout=5)
    try:
        text = recognizer.recognize_google(audio)
        print("You said:", text)
        return text
    except sr.UnknownValueError:
        print("Speech Recognition could not understand audio.")
        return None
    

# Function for emotion detection
def detect_emotion(frame):
    faces = detector.detect_faces(frame)
    landmarks = detector.detect_landmarks(frame, faces)
    emotions = detector.detect_emotions(frame, faces, landmarks)

    faces = faces[0]
    emotions = emotions[0]

    strongest_emotion = emotions.argmax(axis=1)

    for (face, top_emo) in zip(faces, strongest_emotion):
        (x0, y0, x1, y1, p) = face
        cv2.rectangle(frame, (int(x0), int(y0)), (int(x1), int(y1)), (255, 0, 0), 3)
        cv2.putText(frame, FEAT_EMOTION_COLUMNS[top_emo], (int(x0), int(y0 - 10)),
                    cv2.FONT_HERSHEY_PLAIN, 1.5, (255, 0, 0), 2)
        
        print(f"Detected emotion: {FEAT_EMOTION_COLUMNS[top_emo]}")
        # Return the detected emotion
        return FEAT_EMOTION_COLUMNS[top_emo] 


# Function to react to speech
def speech(text):
    if text:
        if "hello" in text.lower():
            bsay("very tasty")
            furhat.gesture(name='Surprise')

# Function to react to emotion
def emotion(detected_emotion):
        
            print(f"Detected emotion: {detected_emotion}")
            if detected_emotion == "happiness":
                bsay("You are happy")
                furhat.gesture(name="Smile")
            elif detected_emotion == "sadness":
                bsay("You are sad")
                furhat.gesture(name='Oh')
            elif detected_emotion == "neutral":
                bsay("You are neutral")
                furhat.gesture(name='Wink')

# Interaction function
def interaction():
    furhat.set_face(character=FACES['Bartender'], mask="Adult")
    furhat.set_voice(name=VOICES_EN['Bartender'])
    bsay("Hi")
    furhat.gesture(name='BigSmile')

    global emotion_detection_active

    while True:
        ret, frame = cam.read()
        if not ret:
            print("Error: failed to capture image")
            break

        if emotion_detection_active:
            detected_emotion = detect_emotion(frame)
            emotion(detected_emotion)

        speech_text = recognize_speech()
        if speech_text:
            print(f"Recognized: {speech_text}")
            if "start" in speech_text.lower():
                start_emotion_detection()
                bsay("Emotion detection started")
            elif "stop" in speech_text.lower():
                stop_emotion_detection()
                bsay("Emotion detection stopped")


        
        jcv2.imshow("Emotion Detection", frame)
        key = jcv2.waitKey(33) & 0xFF
        if key == 27:  # ESC pressed
            break

# Set up camera
cam = cv2.VideoCapture(0)
cam.set(cv2.CAP_PROP_BUFFERSIZE, 1)

# main function
if __name__ == '__main__':
    interaction()

cam.release()
jcv2.destroyAllWindows()