In [13]:
import tkinter as tk
from tkinter import filedialog, messagebox
import librosa
import numpy as np
import os
import joblib
from keras.models import load_model
import speech_recognition as sr
import pickle

# Define the relative paths to the trained models
base_dir = os.getcwd()  # Get the current working directory

emotion_model_path = os.path.join(base_dir, '..', 'saved models', 'Emotion_speech', 'emotion_speech_task4.model')
gender_model_path = os.path.join(base_dir, '..', 'saved models', 'Voice_gender_model', 'rfc_model_voice_gender_task4.joblib')
scaler_path = os.path.join(base_dir, '..', 'saved models', 'Voice_gender_model', 'scaler.joblib')
language_model_path = os.path.join(base_dir, '..', 'saved models', 'Language_detection_model', 'language_detection_rf_model.pkl')
tfidf_vectorizer_path = os.path.join(base_dir, '..', 'saved models', 'Language_detection_model', 'tfidf_vectorizer.pkl')


# Load the models and vectorizer
try:
    tfidf_vectorizer = joblib.load(tfidf_vectorizer_path)
    print("TF-IDF vectorizer loaded successfully.")
except Exception as e:
    print(f"Error loading TF-IDF vectorizer: {e}")

try:
    emotion_model = load_model(emotion_model_path)
    print("Emotion model loaded successfully.")
except Exception as e:
    print(f"Error loading emotion model: {e}")

try:
    gender_model = joblib.load(gender_model_path)
    scaler = joblib.load(scaler_path)
    print("Gender model and scaler loaded successfully.")
except Exception as e:
    print(f"Error loading gender model or scaler: {e}")

try:
    with open(language_model_path, 'rb') as f:
        language_model = pickle.load(f)
    print("Language model loaded successfully.")
    # Check if the loaded model has a predict method
    if not hasattr(language_model, 'predict'):
        raise ValueError("Loaded language model does not have a predict method")
except Exception as e:
    print(f"Error loading language model: {e}")

# Emotion labels
emotion_labels = {
    0: 'neutral',
    1: 'calm',
    2: 'happy',
    3: 'sad',
    4: 'angry',
    5: 'fearful',
    6: 'disgust',
    7: 'surprised'
}

def preprocess_audio(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    mfccs = np.mean(mfccs.T, axis=0)
    mfccs = mfccs.reshape(1, -1)
    return mfccs, y, sr

def predict_gender(audio_features):
    if audio_features.shape[1] != 20:
        raise ValueError(f"Expected 20 MFCC features, got {audio_features.shape[1]}")
    scaled_features = scaler.transform(audio_features)
    prediction = gender_model.predict(scaled_features)
    return 'female' if prediction[0] == 0 else 'male'

def convert_audio_to_text(audio_data, sample_rate):
    recognizer = sr.Recognizer()
    audio = sr.AudioData(audio_data.tobytes(), sample_rate, 2)
    try:
        print("Attempting speech recognition...")
        text = recognizer.recognize_google(audio)
        print(f"Recognized text: {text}")
        return text
    except sr.UnknownValueError:
        print("Speech recognition could not understand audio")
        return "Speech recognition could not understand audio"
    except sr.RequestError as e:
        print(f"Could not request results from Google Speech Recognition service; {e}")
        return f"Could not request results from Google Speech Recognition service; {e}"
    except Exception as e:
        print(f"An error occurred during speech recognition: {e}")
        return f"An error occurred during speech recognition: {e}"

def predict_language(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    print(f"Audio data shape: {y.shape}, Sample rate: {sr}")
    text_data = convert_audio_to_text(y, sr)
    print(f"Converted audio to text: {text_data}")
    if not text_data or "Speech recognition could not understand audio" in text_data:
        return "Unable to determine language from audio"
    prediction = predict_language_from_text(text_data)
    return prediction

def predict_language_from_text(text_data):
    text_features = tfidf_vectorizer.transform([text_data])
    prediction = language_model.predict(text_features)
    return prediction[0]

def predict_emotion(audio_features):
    prediction = emotion_model.predict(audio_features)
    return emotion_labels[np.argmax(prediction)]

def process_audio(file_path):
    audio_features, y, sr = preprocess_audio(file_path)

    gender = predict_gender(audio_features)
    if gender != 'female':
        return "Please upload a female voice."

    language = predict_language(file_path)
    if language != 'English':
        return "Please upload an English language voice."

    emotion = predict_emotion(audio_features)
    return f"The detected emotion is: {emotion}"

def upload_file():
    file_path = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav *.mp3")])
    if file_path:
        result = process_audio(file_path)
        messagebox.showinfo("Result", result)

# GUI Setup
root = tk.Tk()
root.title("Emotion Detection from Audio")

upload_button = tk.Button(root, text="Upload Audio File", command=upload_file)
upload_button.pack(pady=20)

root.mainloop()

TF-IDF vectorizer loaded successfully.
Emotion model loaded successfully.
Gender model and scaler loaded successfully.
Language model loaded successfully.
Error loading language model: Loaded language model does not have a predict method


