In [1]:
import numpy as np
import soundfile
import librosa
import glob
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, LSTM, Dropout, TimeDistributed

def extract_features(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma:
            stft = np.abs(librosa.stft(X))
        result = []
        if mfcc:
            mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40)
            mfccs_mean = np.mean(mfccs, axis=1)
            result.append(mfccs_mean)
        if chroma:
            chroma = librosa.feature.chroma_stft(S=stft, sr=sample_rate)
            chroma_mean = np.mean(chroma, axis=1)
            result.append(chroma_mean)
        if mel:
            mel = librosa.feature.melspectrogram(y=X, sr=sample_rate)
            mel_mean = np.mean(mel, axis=1)
            result.append(mel_mean)
    max_len = max(feature.shape[0] for feature in result)
    result = [np.pad(feature, (0, max_len - feature.shape[0])) if feature.shape[0] < max_len else feature[:max_len] for feature in result]
    return np.concatenate(result, axis=0)

def load_data(test_size=0.2):
    x, y = [], []
    for file in glob.glob("D:\\D\\Sudharsan\\Mini project\\ravdess\\Actor_*\\*.wav"):
        file_name = os.path.basename(file)
        emotion = emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature = extract_features(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    
    # Convert string labels to integer labels
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
    
    return train_test_split(x, y, test_size=test_size, random_state=9)

emotions = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

# Define emotions to observe
observed_emotions = ['neutral', 'calm','happy', 'sad','angry', 'fearful', 'disgust', 'surprised']

# Split the dataset
x_train, x_test, y_train, y_test = load_data(test_size=0.25)
# Determine the number of classes in your dataset
num_classes = len(np.unique(y_train))

# Convert labels to one-hot encoded format
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

# Pad or truncate sequences to a fixed length
max_sequence_length = 100
# Reshape sequences to have consistent length
x_train = np.array([np.resize(sequence, (max_sequence_length,)) for sequence in x_train])
x_test = np.array([np.resize(sequence, (max_sequence_length,)) for sequence in x_test])

# Pad sequences to a fixed length
x_train = np.expand_dims(x_train, axis=-1)  # Add channel dimension
x_test = np.expand_dims(x_test, axis=-1)  # Add channel dimension

# Define the CNN-LSTM model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(max_sequence_length, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(units=128))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_test, y_test))

# Save the trained model
model.save('voice_emotion_cnn_lstm.h5')


  return pitch_tuning(


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78