In [1]:
from google.colab import drive
drive.mount('/content/drive') # Mount to root first

# Now you can access your folder:
import os
speechrecognition_path = os.path.join('/content/drive/MyDrive', 'FinalYrProject')

Mounted at /content/drive


In [2]:
# Install required libraries
!pip install librosa gradio --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.2/54.2 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.1/323.1 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m97.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import pickle
import librosa
import gradio as gr
import numpy as np
from librosa.feature import zero_crossing_rate, chroma_stft, mfcc, rms, melspectrogram

In [4]:
# Debugging: Check if librosa has 'feature'
print("Librosa available functions: ", dir(librosa))

Librosa available functions:  ['A4_to_tuning', 'A_weighting', 'B_weighting', 'C_weighting', 'D_weighting', 'LibrosaError', 'ParameterError', 'Z_weighting', 'amplitude_to_db', 'autocorrelate', 'beat', 'blocks_to_frames', 'blocks_to_samples', 'blocks_to_time', 'cache', 'chirp', 'cite', 'clicks', 'core', 'cqt', 'cqt_frequencies', 'db_to_amplitude', 'db_to_power', 'decompose', 'display', 'effects', 'estimate_tuning', 'ex', 'example', 'f0_harmonics', 'feature', 'fft_frequencies', 'fifths_to_note', 'filters', 'fmt', 'fourier_tempo_frequencies', 'frames_to_samples', 'frames_to_time', 'frequency_weighting', 'get_duration', 'get_fftlib', 'get_samplerate', 'griffinlim', 'griffinlim_cqt', 'hybrid_cqt', 'hz_to_fjs', 'hz_to_mel', 'hz_to_midi', 'hz_to_note', 'hz_to_octs', 'hz_to_svara_c', 'hz_to_svara_h', 'icqt', 'iirt', 'interp_harmonics', 'interval_frequencies', 'interval_to_fjs', 'istft', 'key_to_degrees', 'key_to_notes', 'list_mela', 'list_thaat', 'load', 'lpc', 'magphase', 'mel_frequencies', 'm

In [5]:
model_path = '/content/drive/MyDrive/FinalYrProject/model.pkl'
encoder_path = '/content/drive/MyDrive/FinalYrProject/encoder.pkl'
scaler_path = '/content/drive/MyDrive/FinalYrProject/scaler.pkl'

with open(model_path, 'rb') as model_file:
    model = pickle.load(model_file)

with open(encoder_path, 'rb') as encoder_file:
    encoder = pickle.load(encoder_file)

with open(scaler_path, 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)

In [6]:
def noise(data):
    noise_amp = 0.035 * np.random.uniform() * np.amax(data)
    return data + noise_amp * np.random.normal(size=data.shape[0])


def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(y=data, rate=rate)


def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=pitch_factor)

In [7]:
def extract_features(data, sample_rate):
    """Extract multiple features from an audio file."""
    zcr = np.mean(zero_crossing_rate(y=data).T, axis=0)
    chroma = np.mean(chroma_stft(S=np.abs(librosa.stft(data)), sr=sample_rate).T, axis=0)
    mfcc_feat = np.mean(mfcc(y=data, sr=sample_rate).T, axis=0)
    rms_feat = np.mean(rms(y=data).T, axis=0)
    mel_feat = np.mean(melspectrogram(y=data, sr=sample_rate).T, axis=0)

    return np.hstack([zcr, chroma, mfcc_feat, rms_feat, mel_feat])


def get_features(audio):
    """Process the audio file and extract features with augmentations."""
    data, sample_rate = librosa.load(audio, sr=None)
    result = np.array([extract_features(data, sample_rate)])

    # Augmentations
    result = np.vstack([result, extract_features(noise(data), sample_rate)])
    result = np.vstack([result, extract_features(pitch(stretch(data), sample_rate), sample_rate)])

    return result

In [8]:
def predict_emotion(audio):
    """Predict the emotion from an audio file."""
    features = get_features(audio)
    prediction = model.predict(features)
    return encoder.inverse_transform(prediction)[0]

# Create Gradio Interface
gr.Interface(
    fn=predict_emotion,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.Textbox(label="Predicted Emotion"),
    title="Speech Emotion Recognition",
    description="Upload or record your voice to predict the emotion."
).launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e4f4eab76cc37536df.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


