In [11]:
import tensorflow as tf
import tkinter as tk
from tkinter import filedialog
import speech_recognition as sr
from langdetect import detect, LangDetectException
from genderize import Genderize
import librosa
import numpy as np
import soundfile as sf
import spacy

class EmotionClassifier(tf.Module):
    def __init__(self):
        super().__init__()
        self.model = tf.saved_model.load("Speech_Emotion_Model")

    def predict(self, features):
        return self.model.signatures['serving_default'](features)

# function to record voice and save to file
def record_voice():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Please speak...")
        r.adjust_for_ambient_noise(source)
        audio = r.listen(source)
    
    with open("audio.wav", "wb") as f:
        f.write(audio.get_wav_data())
    
    return "audio.wav"

# function to upload voice file
def upload_voice():
    file_path = filedialog.askopenfilename()
    return file_path

# function to detect language
def detect_language(audio_path):
    try:
        # Load the audio file
        audio, sample_rate = sf.read(audio_path)
        audio_data = audio.tobytes()

        # Use langdetect to detect the language
        detected_lang = detect(audio_data.decode('utf-8', errors='ignore'))
        return detected_lang
    
    except LangDetectException as e:
        print(f"Error: {e}")
        return None
    except Exception as e:
        print(f"Error: {e}")
        return None

# function to detect gender
def detect_gender(audio_path):
    genderize = Genderize()
    gender = genderize.get([audio_path])[0]['gender']
    return gender

# Function to preprocess audio and predict emotion
def predict_emotion(audio_path):
    audio, sample_rate = sf.read(audio_path)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_processed = np.mean(mfccs.T, axis=0)
    features = np.array([mfccs_processed])

    # Convert features to tensor 
    features_tensor = tf.convert_to_tensor(features, dtype=tf.float32)

    # Make predictions using the SavedModel
    emotion_classifier = EmotionClassifier()
    predictions = emotion_classifier.predict(features_tensor)

    # Get the predicted emotion label
    predicted_label_index = np.argmax(predictions['output_0'], axis=1)[0]
    emotion_labels = ['Anger', 'Disgust', 'Fear', 'Happiness', 'Neutral', 'Sad', 'Surprise']
    predicted_emotion = emotion_labels[predicted_label_index]

    return predicted_emotion

# Integration module
def main():
    root = tk.Tk()
    root.title("Emotion Predictor")

    # Function to handle record button click
    def record_click():
        audio = record_voice()
        handle_audio(audio)

    # Function to handle upload button click
    def upload_click():
        file_path = upload_voice()
        if file_path:
            handle_audio(file_path)

    # Function to handle audio data
    def handle_audio(audio):
        print("Processing audio:", audio)
        try:
            detected_lang = detect_language(audio)
            if detected_lang != 'en':
                result_label.config(text="Please upload an English language voice recording.")
                return

            gender = detect_gender(audio)
            if gender != 'female':
                result_label.config(text="Please upload a female voice recording.")
                return

            emotion = predict_emotion(audio)
            result_label.config(text="Predicted emotion: " + emotion)
        except Exception as e:
            print(f"Error: {e}")
            result_label.config(text="Error processing audio file.")

    # Create buttons
    record_button = tk.Button(root, text="Record Voice", command=record_click)
    record_button.pack(pady=10)

    upload_button = tk.Button(root, text="Upload Voice File", command=upload_click)
    upload_button.pack(pady=10)

    result_label = tk.Label(root, text="")
    result_label.pack(pady=10)

    root.mainloop()

if __name__ == "__main__":
    main()

Processing audio: C:/Users/Sidhi/Dropbox/My PC (LAPTOP-EGCN1V5E)/Desktop/Internship Projects/Speech Emotion Detection/audio.wav
Processing audio: C:/Users/Sidhi/Dropbox/My PC (LAPTOP-EGCN1V5E)/Desktop/Internship Projects/Speech Emotion Detection/OAF_bath_sad.wav
Processing audio: C:/Users/Sidhi/Dropbox/My PC (LAPTOP-EGCN1V5E)/Desktop/Internship Projects/Speech Emotion Detection/YAF_half_happy.wav
