In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import warnings
warnings.filterwarnings('ignore')

In [None]:
paths = []
labels = []
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        paths.append(os.path.join(dirname, filename))
        label = filename.split('_')[-1]
        label = label.split('.')[0]
        labels.append(label.lower())
    if len(paths) == 2800:
        break
print('Dataset is Loaded')

In [None]:
len(paths)

In [None]:
paths[:5]

In [None]:
labels[:5]

In [None]:
## Create a dataframe
df = pd.DataFrame()
df['speech'] = paths
df['label'] = labels
df.head()

In [None]:
df['label'].value_counts()

In [None]:
sns.countplot(data=df, x='label')

In [None]:
def waveplot(data, sr, emotion):
    plt.figure(figsize=(10,4))
    plt.title(emotion, size=20)
    librosa.display.waveshow(data, sr=sr)
    plt.show()
    
def spectogram(data, sr, emotion):
    x = librosa.stft(data)
    xdb = librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(11,4))
    plt.title(emotion, size=20)
    librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()

In [None]:
emotion = 'fear'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'angry'
path = np.array(df['speech'][df['label']==emotion])[1]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'disgust'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'neutral'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'sad'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'ps'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'happy'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

#feature extraction 

In [None]:
def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc

In [None]:
extract_mfcc(df['speech'][0])

In [None]:
X_mfcc = df['speech'].apply(lambda x: extract_mfcc(x))
X_mfcc

In [None]:
X = [x for x in X_mfcc]
X = np.array(X)
X.shape

In [None]:
## input split
X = np.expand_dims(X, -1)
X.shape

In [None]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
y = enc.fit_transform(df[['label']])
y = y.toarray()


## Create the LSTM Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

model = Sequential([
    LSTM(256, return_sequences=False, input_shape=(40,1)),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(7, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
# Train the model
history = model.fit(X, y, validation_split=0.2, epochs=50, batch_size=64 , shuffle =True)

## Plot the results

In [None]:
epochs = list(range(50))
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

plt.plot(epochs, acc, label='train accuracy')
plt.plot(epochs, val_acc, label='val accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(epochs, loss, label='train loss')
plt.plot(epochs, val_loss, label='val loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

In [None]:
# single_file_test_keras.py
import numpy as np
import librosa
import tensorflow as tf
from pathlib import Path

# --- CONFIG (match training) ---
MODEL_PATH = "model.h5"
SAMPLE_RATE = 16000
DURATION = 3.0            # seconds used during training
N_MFCC = 40
LABELS = ['neutral','happy','sad','angry']   # replace with your labels

# --- helpers ---
def load_audio(path, sr=SAMPLE_RATE, duration=DURATION):
    y, _ = librosa.load(path, sr=sr, mono=True, duration=duration)
    # pad or truncate to exact duration
    target_len = int(sr * duration)
    if len(y) < target_len:
        y = np.pad(y, (0, target_len - len(y)))
    else:
        y = y[:target_len]
    return y

def extract_mfcc(y, sr=SAMPLE_RATE, n_mfcc=N_MFCC):
    mf = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    # typical shape: (n_mfcc, time_steps) -> normalize/scale if training used that
    mf = (mf - np.mean(mf)) / (np.std(mf) + 1e-9)
    return mf

# --- load model ---
model = tf.keras.models.load_model(MODEL_PATH)

# --- predict single file ---
audio_path = "test_audio.wav"  # change
y = load_audio(audio_path)
mf = extract_mfcc(y)
# expand dims to model input shape e.g. (1, n_mfcc, time_steps, 1) or (1, time_steps, n_mfcc)
inp = mf[np.newaxis, ..., np.newaxis]   # adapt if your model expects channels_first etc
probs = model.predict(inp)[0]
pred_idx = np.argmax(probs)
print("Prediction:", LABELS[pred_idx], "Confidence:", float(probs[pred_idx]))


In [None]:
# streamlit_test_app.py
import streamlit as st
import librosa, numpy as np
import tensorflow as tf

MODEL_PATH = "model.h5"
SAMPLE_RATE = 16000
DURATION = 3.0
N_MFCC = 40
LABELS = ['neutral','happy','sad','angry']

@st.cache_resource
def load_model():
    return tf.keras.models.load_model(MODEL_PATH)

def preprocess_file(wav_bytes):
    y, _ = librosa.load(librosa.util.example_audio_file() if wav_bytes is None else wav_bytes,
                        sr=SAMPLE_RATE, mono=True, duration=DURATION)
    target_len = int(SAMPLE_RATE * DURATION)
    if len(y) < target_len:
        y = np.pad(y, (0, target_len - len(y)))
    else:
        y = y[:target_len]
    mf = librosa.feature.mfcc(y=y, sr=SAMPLE_RATE, n_mfcc=N_MFCC)
    mf = (mf - np.mean(mf)) / (np.std(mf) + 1e-9)
    return mf

st.title("Speech Emotion Test")
uploaded = st.file_uploader("Upload a WAV/MP3", type=['wav','mp3','flac'])
model = load_model()

if uploaded:
    # read bytes to temp file
    with open("tmp_upload.wav","wb") as f:
        f.write(uploaded.getbuffer())
    mf = preprocess_file("tmp_upload.wav")
    inp = mf[np.newaxis, ..., np.newaxis]
    probs = model.predict(inp)[0]
    pred = LABELS[np.argmax(probs)]
    st.write("Prediction:", pred)
    st.write({LABELS[i]: float(probs[i]) for i in range(len(LABELS))})
