# DEMO

In [17]:
# Extract speech.waw and emo tags from datasets
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.utils import np_utils
import torch
import os

# Data extraction

In [18]:
# Function to extract multiple acoustic features from audio
def extract_features(audio_path):
    # Load audio file
    y, sr = librosa.load(audio_path, sr=None)

    # Extract different features (MFCCs, chroma, spectral contrast)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Compute statistical features for each feature
    mfccs_stats = np.hstack((np.mean(mfccs, axis=1), np.std(mfccs, axis=1)))
    chroma_stats = np.hstack((np.mean(chroma, axis=1), np.std(chroma, axis=1)))
    spectral_contrast_stats = np.hstack((np.mean(spectral_contrast, axis=1), np.std(spectral_contrast, axis=1)))

    # Concatenate multiple features
    combined_features = np.hstack((mfccs_stats, chroma_stats, spectral_contrast_stats))

    return combined_features


In [19]:
import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data)  # Assuming 'data' is a NumPy array or a list
        self.labels = torch.tensor(labels)  # Assuming 'labels' is a NumPy array or a list

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index], self.labels[index]


### Generate datasets  

In [31]:
'''
data\
    ToyData\
        angry\
            xxxx.waw
            xxxx.waw
            xxxx.waw
        happy\
            xxxx.waw
            xxxx.waw
            xxxx.waw
        neutral\
            xxxx.waw
            xxxx.waw
            xxxx.waw
'''


'\ndata    ToyData        angry            xxxx.waw\n            xxxx.waw\n            xxxx.waw\n        happy            xxxx.waw\n            xxxx.waw\n            xxxx.waw\n        neutral            xxxx.waw\n            xxxx.waw\n            xxxx.waw\n'

In [20]:
Audio_folder = './data/ToyData/'
feature_tensors = []
label_tensors = []

emo_dict = {'happy': 0, 'angry': 1, 'neutral': 2}

for emotion_folder in os.listdir(Audio_folder):
    subfolder = os.path.join(Audio_folder, emotion_folder)
    if os.path.isdir(subfolder):  # Check if it's a directory
        print(f"Processing emotion: {emotion_folder}")
        # Loop through audio files within each emotion folder
        for audio_file in os.listdir(subfolder):
            if audio_file.endswith(".wav"):  # Ensure the file is a .wav file
                file_path = os.path.join(subfolder, audio_file)
                features = extract_features(file_path)
                feature_tensors.append(features)
                label_tensors.append(emo_dict[emotion_folder])

Processing emotion: angry
Processing emotion: happy
Processing emotion: neutral


In [21]:
dataset = CustomDataset(feature_tensors, label_tensors)
train_size = int(0.8 * len(dataset))  
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model

In [22]:
import torch
from torch.utils.data import DataLoader, TensorDataset


epochs = 100
learning_rate = 0.001

# create DataLoader
# data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# simple MLP
class SimpleMLP(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleMLP, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.fc2 = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Training process

In [23]:
model = SimpleMLP(input_size=64, hidden_size=32, output_size=3)  

# Loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Loop
for epoch in range(epochs):
    for batch_X, batch_y in train_loader:

        batch_X, batch_y = batch_X.float(), batch_y.long()  
        optimizer.zero_grad()  
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}')


Epoch [1/100], Loss: 0.9098253846168518
Epoch [2/100], Loss: 0.7968207001686096
Epoch [3/100], Loss: 0.8064934611320496
Epoch [4/100], Loss: 1.1449300050735474
Epoch [5/100], Loss: 0.7718663811683655
Epoch [6/100], Loss: 0.670131504535675
Epoch [7/100], Loss: 0.7896829843521118
Epoch [8/100], Loss: 0.6588603854179382
Epoch [9/100], Loss: 0.6428120732307434
Epoch [10/100], Loss: 0.7966267466545105
Epoch [11/100], Loss: 0.5648611783981323
Epoch [12/100], Loss: 0.5816508531570435
Epoch [13/100], Loss: 0.5406836867332458
Epoch [14/100], Loss: 1.626785159111023
Epoch [15/100], Loss: 0.7451679706573486
Epoch [16/100], Loss: 0.532037079334259
Epoch [17/100], Loss: 0.5537184476852417
Epoch [18/100], Loss: 0.43712693452835083
Epoch [19/100], Loss: 1.2303595542907715
Epoch [20/100], Loss: 0.6777822375297546
Epoch [21/100], Loss: 1.0936566591262817
Epoch [22/100], Loss: 0.4806949496269226
Epoch [23/100], Loss: 0.4495686888694763
Epoch [24/100], Loss: 0.4269716739654541
Epoch [25/100], Loss: 0.874

# Evaluation

### test data

In [24]:
criterion = torch.nn.CrossEntropyLoss()
correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        inputs, labels = inputs.float(), labels.long()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f'Accuracy on test set: {accuracy:.2f}%')


Accuracy on test set: 99.17%


### Real data

In [25]:
TestWaw = './data/archive/Crema/1009_IWW_ANG_XX.wav'
testFeature = torch.tensor(extract_features(TestWaw))
out = model(testFeature.float())
max_value = torch.argmax(out)
print("Index of maximum value:", max_value.item(), 'expect: ', 1)

Index of maximum value: 0 expect:  1


In [26]:
TestWaw = './data/archive/Crema/1001_DFA_HAP_XX.wav'
testFeature = torch.tensor(extract_features(TestWaw))
out = model(testFeature.float())
max_value = torch.argmax(out)
print("Index of maximum value:", max_value.item(), 'expect: ', 0)

Index of maximum value: 0 expect:  0


In [27]:
TestWaw = './data/archive/Crema/1001_IEO_NEU_XX.wav'
testFeature = torch.tensor(extract_features(TestWaw))
out = model(testFeature.float())
max_value = torch.argmax(out)
print("Index of maximum value:", max_value.item(), 'expect: ', 2)

Index of maximum value: 2 expect:  2


In [28]:
TestWaw = './data/archive/Crema/1001_ITH_ANG_XX.wav'
testFeature = torch.tensor(extract_features(TestWaw))
out = model(testFeature.float())
max_value = torch.argmax(out)
print("Index of maximum value:", max_value.item(), 'expect: ', 1)

Index of maximum value: 0 expect:  1


In [29]:
TestWaw = './data/archive/Crema/1003_IEO_ANG_MD.wav'
testFeature = torch.tensor(extract_features(TestWaw))
out = model(testFeature.float())
max_value = torch.argmax(out)
print("Index of maximum value:", max_value.item(), 'expect: ', 1)

Index of maximum value: 0 expect:  1


In [30]:
TestWaw = './data/archive/Crema/1004_IWW_NEU_XX.wav'
testFeature = torch.tensor(extract_features(TestWaw))
out = model(testFeature.float())
max_value = torch.argmax(out)
print("Index of maximum value:", max_value.item(), 'expect: ', 2)

Index of maximum value: 2 expect:  2
