In [1]:
import librosa
import numpy as np
import joblib
import scipy.signal as signal
from keras.models import load_model
from sklearn.preprocessing import LabelEncoder
from IPython.display import display
import ipywidgets as widgets

In [2]:
# AudioPreprocessing class
class AudioPreprocessing:
    def __init__(self, sample_rate=16000, frame_size=0.025, frame_stride=0.01):
        self.sample_rate = sample_rate
        self.frame_size = frame_size
        self.frame_stride = frame_stride

    def load_audio(self, file_path):
        audio, sr = librosa.load(file_path, sr=self.sample_rate)
        return audio, sr

    def noise_removal(self, audio):
        b, a = signal.butter(1, 100 / (0.5 * self.sample_rate), btype='high')
        audio_denoised = signal.lfilter(b, a, audio)
        return audio_denoised

    def silence_removal(self, audio, top_db=20):
        non_silent_intervals = librosa.effects.split(audio, top_db=top_db)
        audio_nonsilent = np.concatenate([audio[start:end] for start, end in non_silent_intervals])
        return audio_nonsilent

    def normalize(self, audio):
        return librosa.util.normalize(audio)

    def resample(self, audio, orig_sr, target_sr):
        if orig_sr != target_sr:
            audio_resampled = librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
            return audio_resampled
        return audio

    def preprocess(self, file_path):
        audio, sr = self.load_audio(file_path)
        audio = self.resample(audio, sr, self.sample_rate)
        audio = self.noise_removal(audio)
        audio = self.silence_removal(audio)
        audio = self.normalize(audio)
        return audio

In [3]:
# AudioFeatureExtractor class
class AudioFeatureExtractor:
    def __init__(self):
        self.preprocessor = AudioPreprocessing()

    def extract_audio_features(self, file_path):
        try:
            # Apply preprocessing before extracting features
            preprocessed_audio = self.preprocessor.preprocess(file_path)

            # Extract MFCC features
            mfccs = librosa.feature.mfcc(y=preprocessed_audio, sr=self.preprocessor.sample_rate, n_mfcc=40)
            mfccs_mean = np.mean(mfccs.T, axis=0)

            return mfccs_mean

        except Exception as e:
            print(f"Error processing audio file: {e}")
            return None


In [4]:
# AudioEmotionTester class
class AudioEmotionTester:
    def __init__(self):
        self.model = load_model('emotion_recognition_model.h5')  # Load your trained model
        self.feature_extractor = AudioFeatureExtractor()
        self.scaler = joblib.load('scaler.pkl')
        self.label_encoder = LabelEncoder()

    def load_label_encoder(self, encoder_path):
        self.label_encoder.classes_ = np.load(encoder_path, allow_pickle=True)

    def preprocess_features(self, features):
        if len(features.shape) == 1:
            features = features.reshape(1, -1)
        features_scaled = self.scaler.transform(features)
        return features_scaled.reshape(features_scaled.shape[0], 1, features_scaled.shape[1])

    def predict_emotion(self, features):
        processed_features = self.preprocess_features(features)
        prediction = self.model.predict(processed_features)
        predicted_class = np.argmax(prediction, axis=1)
        predicted_label = self.label_encoder.inverse_transform(predicted_class)
        return predicted_label[0]

In [5]:
# Function to handle audio file upload and prediction
def predict_emotion(audio_file):
    # Save the uploaded file temporarily
    with open("uploaded_audio.wav", "wb") as f:
        f.write(audio_file.read())

    # Extract features and predict emotion
    feature_extractor = AudioFeatureExtractor()
    extracted_features = feature_extractor.extract_audio_features("uploaded_audio.wav")

    if extracted_features is not None:
        tester = AudioEmotionTester()
        tester.load_label_encoder('label_encoder_classes.npy')
        predicted_emotion = tester.predict_emotion(extracted_features)
        print(f"Predicted Emotion: {predicted_emotion}")
    else:
        print("Failed to extract audio features.")

In [7]:
# Create a file upload widget
upload_widget = widgets.FileUpload(accept='.wav,.mp3,.ogg', multiple=False)

# Button to trigger prediction
button = widgets.Button(description="Predict Emotion")
output = widgets.Output()

def on_button_clicked(b):
    with output:
        output.clear_output()
        if upload_widget.value:
            # Get the first uploaded file
            audio_file = next(iter(upload_widget.value.values()))
            predict_emotion(audio_file['content'])
        else:
            print("Please upload an audio file.")

button.on_click(on_button_clicked)

# Display the upload widget and button
display(upload_widget, button, output)

FileUpload(value=(), accept='.wav,.mp3,.ogg', description='Upload')

Button(description='Predict Emotion', style=ButtonStyle())

Output()