In [17]:
import numpy as np
import librosa
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [18]:
dataset_path = 'D:\\YapayZekaProje\\Voice'
emotion_labels = ['angry_05', 'disgust_07', 'fearful_06', 'happy_03', 'neutral_01', 'sad_04', 'suprised_08']

In [19]:
def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    return np.mean(mfccs.T, axis=0)

In [20]:
def load_data(dataset_path):
    features = []
    labels = []
    
    for label in emotion_labels:
        emotion_path = os.path.join(dataset_path, label)
        for file_name in os.listdir(emotion_path):
            if file_name.endswith(".wav"):
                file_path = os.path.join(emotion_path, file_name)
                mfccs = extract_features(file_path)
                features.append(mfccs)
                labels.append(label)
    
    return np.array(features), np.array(labels)

In [21]:
X, y = load_data(dataset_path)

In [22]:
label_encoder = LabelEncoder()
y = to_categorical(label_encoder.fit_transform(y))

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

In [36]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [47]:
def create_model(input_shape):
    model = Sequential([
        LSTM(128, return_sequences=True, input_shape=input_shape),
        Dropout(0.5),
        LSTM(64),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(7, activation='softmax')  # 7 duygu sınıfı için
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [48]:
def train_model(X_train, y_train):
    input_shape = (X_train.shape[1], X_train.shape[2])
    model = create_model(input_shape)
    model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)
    return model

In [49]:
model = train_model(X_train, y_train)

Epoch 1/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 64ms/step - accuracy: 0.1871 - loss: 1.9290 - val_accuracy: 0.1867 - val_loss: 1.8608
Epoch 2/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - accuracy: 0.2008 - loss: 1.8432 - val_accuracy: 0.2229 - val_loss: 1.8020
Epoch 3/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - accuracy: 0.2199 - loss: 1.8215 - val_accuracy: 0.2560 - val_loss: 1.7527
Epoch 4/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - accuracy: 0.2724 - loss: 1.7411 - val_accuracy: 0.2711 - val_loss: 1.7278
Epoch 5/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - accuracy: 0.2696 - loss: 1.7167 - val_accuracy: 0.3343 - val_loss: 1.7044
Epoch 6/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - accuracy: 0.2647 - loss: 1.7205 - val_accuracy: 0.3313 - val_loss: 1.6725
Epoch 7/100
[1m42/42[0m [

In [50]:
model.save('emotion_detection_model.h5')

