## Machine learning

In [1]:
import os
import numpy as np
import opensmile
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

# Emotion labels
emotion_labels = {
    'W': 'Anger',
    'L': 'Boredom',
    'E': 'Disgust',
    'A': 'Anxiety/Fear',
    'F': 'Happiness',
    'T': 'Sadness',
    'N': 'Neutral'
}

# Feature extraction
def extract_audio_features(file_path):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.GeMAPSv01b,
        feature_level=opensmile.FeatureLevel.Functionals,
    )
    features = smile.process_file(file_path)
    return features.values

# Load data
data_folder = "Emo-DB-database"
file_list = os.listdir(data_folder)


print(f"Total files in the folder: {len(file_list)}")
print(f"File list: {file_list}\n")



data = []
labels = []



for file_name in file_list:
    emotion_code = file_name[5]
    print(f"Processing {file_name}, emotion_code: {emotion_code}")  # Add this line
    if emotion_code in emotion_labels:
        file_path = os.path.join(data_folder, file_name)
        features = extract_audio_features(file_path)
        print(f"Features shape for {file_name}: {features.shape}")
        data.append(features)
        labels.append(emotion_code)
    else:
        print(f"Skipping {file_name}, emotion_code: {emotion_code}")  # Add this line

data = np.vstack(data)
labels = np.array(labels)

# Preprocessing
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Train a classifier
classifier = LogisticRegression(max_iter=1000)
classifier.fit(X_train, y_train)

# Evaluate the model
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred, target_names=emotion_labels.values()))

Total files in the folder: 535
File list: ['03a01Fa.wav', '03a01Nc.wav', '03a01Wa.wav', '03a02Fc.wav', '03a02Nc.wav', '03a02Ta.wav', '03a02Wb.wav', '03a02Wc.wav', '03a04Ad.wav', '03a04Fd.wav', '03a04Lc.wav', '03a04Nc.wav', '03a04Ta.wav', '03a04Wc.wav', '03a05Aa.wav', '03a05Fc.wav', '03a05Nd.wav', '03a05Tc.wav', '03a05Wa.wav', '03a05Wb.wav', '03a07Fa.wav', '03a07Fb.wav', '03a07La.wav', '03a07Nc.wav', '03a07Wc.wav', '03b01Fa.wav', '03b01Lb.wav', '03b01Nb.wav', '03b01Td.wav', '03b01Wa.wav', '03b01Wc.wav', '03b02Aa.wav', '03b02La.wav', '03b02Na.wav', '03b02Tb.wav', '03b02Wb.wav', '03b03Nb.wav', '03b03Tc.wav', '03b03Wc.wav', '03b09La.wav', '03b09Nc.wav', '03b09Tc.wav', '03b09Wa.wav', '03b10Ab.wav', '03b10Ec.wav', '03b10Na.wav', '03b10Nc.wav', '03b10Wb.wav', '03b10Wc.wav', '08a01Ab.wav', '08a01Fd.wav', '08a01Lc.wav', '08a01Na.wav', '08a01Wa.wav', '08a01Wc.wav', '08a02Ab.wav', '08a02Ac.wav', '08a02Fe.wav', '08a02La.wav', '08a02Na.wav', '08a02Tb.wav', '08a02Wc.wav', '08a04Ff.wav', '08a04La.wav

Features shape for 03a01Wa.wav: (1, 62)
Processing 03a02Fc.wav, emotion_code: F
Features shape for 03a02Fc.wav: (1, 62)
Processing 03a02Nc.wav, emotion_code: N
Features shape for 03a02Nc.wav: (1, 62)
Processing 03a02Ta.wav, emotion_code: T
Features shape for 03a02Ta.wav: (1, 62)
Processing 03a02Wb.wav, emotion_code: W
Features shape for 03a02Wb.wav: (1, 62)
Processing 03a02Wc.wav, emotion_code: W
Features shape for 03a02Wc.wav: (1, 62)
Processing 03a04Ad.wav, emotion_code: A
Features shape for 03a04Ad.wav: (1, 62)
Processing 03a04Fd.wav, emotion_code: F
Features shape for 03a04Fd.wav: (1, 62)
Processing 03a04Lc.wav, emotion_code: L
Features shape for 03a04Lc.wav: (1, 62)
Processing 03a04Nc.wav, emotion_code: N
Features shape for 03a04Nc.wav: (1, 62)
Processing 03a04Ta.wav, emotion_code: T
Features shape for 03a04Ta.wav: (1, 62)
Processing 03a04Wc.wav, emotion_code: W
Features shape for 03a04Wc.wav: (1, 62)
Processing 03a05Aa.wav, emotion_code: A
Features shape for 03a05Aa.wav: (1, 62)


Features shape for 08b10Tc.wav: (1, 62)
Processing 08b10Wa.wav, emotion_code: W
Features shape for 08b10Wa.wav: (1, 62)
Processing 09a01Ea.wav, emotion_code: E
Features shape for 09a01Ea.wav: (1, 62)
Processing 09a01Fa.wav, emotion_code: F
Features shape for 09a01Fa.wav: (1, 62)
Processing 09a01Nb.wav, emotion_code: N
Features shape for 09a01Nb.wav: (1, 62)
Processing 09a01Wb.wav, emotion_code: W
Features shape for 09a01Wb.wav: (1, 62)
Processing 09a02Ea.wav, emotion_code: E
Features shape for 09a02Ea.wav: (1, 62)
Processing 09a02Eb.wav, emotion_code: E
Features shape for 09a02Eb.wav: (1, 62)
Processing 09a02La.wav, emotion_code: L
Features shape for 09a02La.wav: (1, 62)
Processing 09a02Wb.wav, emotion_code: W
Features shape for 09a02Wb.wav: (1, 62)
Processing 09a04Fd.wav, emotion_code: F
Features shape for 09a04Fd.wav: (1, 62)
Processing 09a04La.wav, emotion_code: L
Features shape for 09a04La.wav: (1, 62)
Processing 09a04Nb.wav, emotion_code: N
Features shape for 09a04Nb.wav: (1, 62)


Features shape for 11a05Td.wav: (1, 62)
Processing 11a05Wd.wav, emotion_code: W
Features shape for 11a05Wd.wav: (1, 62)
Processing 11a07Ac.wav, emotion_code: A
Features shape for 11a07Ac.wav: (1, 62)
Processing 11a07Ld.wav, emotion_code: L
Features shape for 11a07Ld.wav: (1, 62)
Processing 11a07Ta.wav, emotion_code: T
Features shape for 11a07Ta.wav: (1, 62)
Processing 11a07Wc.wav, emotion_code: W
Features shape for 11a07Wc.wav: (1, 62)
Processing 11b01Ab.wav, emotion_code: A
Features shape for 11b01Ab.wav: (1, 62)
Processing 11b01Eb.wav, emotion_code: E
Features shape for 11b01Eb.wav: (1, 62)
Processing 11b01Fc.wav, emotion_code: F
Features shape for 11b01Fc.wav: (1, 62)
Processing 11b01Lb.wav, emotion_code: L
Features shape for 11b01Lb.wav: (1, 62)
Processing 11b01Nc.wav, emotion_code: N
Features shape for 11b01Nc.wav: (1, 62)
Processing 11b01Wd.wav, emotion_code: W
Features shape for 11b01Wd.wav: (1, 62)
Processing 11b02Ab.wav, emotion_code: A
Features shape for 11b02Ab.wav: (1, 62)


Features shape for 13b01Fc.wav: (1, 62)
Processing 13b01Ld.wav, emotion_code: L
Features shape for 13b01Ld.wav: (1, 62)
Processing 13b01Nc.wav, emotion_code: N
Features shape for 13b01Nc.wav: (1, 62)
Processing 13b01Wa.wav, emotion_code: W
Features shape for 13b01Wa.wav: (1, 62)
Processing 13b02Fb.wav, emotion_code: F
Features shape for 13b02Fb.wav: (1, 62)
Processing 13b02Lc.wav, emotion_code: L
Features shape for 13b02Lc.wav: (1, 62)
Processing 13b02Nb.wav, emotion_code: N
Features shape for 13b02Nb.wav: (1, 62)
Processing 13b02Wa.wav, emotion_code: W
Features shape for 13b02Wa.wav: (1, 62)
Processing 13b03Ac.wav, emotion_code: A
Features shape for 13b03Ac.wav: (1, 62)
Processing 13b03Ed.wav, emotion_code: E
Features shape for 13b03Ed.wav: (1, 62)
Processing 13b03Fd.wav, emotion_code: F
Features shape for 13b03Fd.wav: (1, 62)
Processing 13b03Lb.wav, emotion_code: L
Features shape for 13b03Lb.wav: (1, 62)
Processing 13b03Na.wav, emotion_code: N
Features shape for 13b03Na.wav: (1, 62)


Features shape for 15a02Ea.wav: (1, 62)
Processing 15a02La.wav, emotion_code: L
Features shape for 15a02La.wav: (1, 62)
Processing 15a02Na.wav, emotion_code: N
Features shape for 15a02Na.wav: (1, 62)
Processing 15a02Ta.wav, emotion_code: T
Features shape for 15a02Ta.wav: (1, 62)
Processing 15a02Wb.wav, emotion_code: W
Features shape for 15a02Wb.wav: (1, 62)
Processing 15a02Wd.wav, emotion_code: W
Features shape for 15a02Wd.wav: (1, 62)
Processing 15a04Ab.wav, emotion_code: A
Features shape for 15a04Ab.wav: (1, 62)
Processing 15a04Ac.wav, emotion_code: A
Features shape for 15a04Ac.wav: (1, 62)
Processing 15a04Fd.wav, emotion_code: F
Features shape for 15a04Fd.wav: (1, 62)
Processing 15a04Nc.wav, emotion_code: N
Features shape for 15a04Nc.wav: (1, 62)
Processing 15a04Wa.wav, emotion_code: W
Features shape for 15a04Wa.wav: (1, 62)
Processing 15a04Wb.wav, emotion_code: W
Features shape for 15a04Wb.wav: (1, 62)
Processing 15a05Eb.wav, emotion_code: E
Features shape for 15a05Eb.wav: (1, 62)


Features shape for 16b03La.wav: (1, 62)
Processing 16b03Nb.wav, emotion_code: N
Features shape for 16b03Nb.wav: (1, 62)
Processing 16b03Ta.wav, emotion_code: T
Features shape for 16b03Ta.wav: (1, 62)
Processing 16b03Wb.wav, emotion_code: W
Features shape for 16b03Wb.wav: (1, 62)
Processing 16b09Ab.wav, emotion_code: A
Features shape for 16b09Ab.wav: (1, 62)
Processing 16b09Eb.wav, emotion_code: E
Features shape for 16b09Eb.wav: (1, 62)
Processing 16b09Fb.wav, emotion_code: F
Features shape for 16b09Fb.wav: (1, 62)
Processing 16b09La.wav, emotion_code: L
Features shape for 16b09La.wav: (1, 62)
Processing 16b09Lb.wav, emotion_code: L
Features shape for 16b09Lb.wav: (1, 62)
Processing 16b09Wb.wav, emotion_code: W
Features shape for 16b09Wb.wav: (1, 62)
Processing 16b10Aa.wav, emotion_code: A
Features shape for 16b10Aa.wav: (1, 62)
Processing 16b10Eb.wav, emotion_code: E
Features shape for 16b10Eb.wav: (1, 62)
Processing 16b10Fb.wav, emotion_code: F
Features shape for 16b10Fb.wav: (1, 62)


## open smile with a very simple NN

### feature extraction 

In [2]:
import os
import numpy as np
import opensmile
import pickle
from sklearn.preprocessing import StandardScaler


# Emotion labels
emotion_labels = {
    'W': 'Anger',
    'L': 'Boredom',
    'E': 'Disgust',
    'A': 'Anxiety/Fear',
    'F': 'Happiness',
    'T': 'Sadness',
    'N': 'Neutral'
}
label_to_idx = {label: idx for idx, label in enumerate(emotion_labels.keys())}

# Feature extraction
def extract_audio_features(file_path):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.GeMAPSv01b,
        feature_level=opensmile.FeatureLevel.Functionals,
    )
    features = smile.process_file(file_path)
    return features.values

# Load data
data_folder = "Emo-DB-database"
file_list = os.listdir(data_folder)

data = []
labels = []

for file_name in file_list:
    emotion_code = file_name[5]
    if emotion_code in emotion_labels:
        file_path = os.path.join(data_folder, file_name)
        features = extract_audio_features(file_path)
        data.append(features)
        labels.append(emotion_code)

data = np.vstack(data)
labels = np.array(labels)

# Preprocessing
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Save features and labels
with open('features.pkl', 'wb') as f:
    pickle.dump(data, f)
with open('labels.pkl', 'wb') as f:
    pickle.dump(labels, f)

### training

In [3]:
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# Emotion labels
emotion_labels = {
    'W': 'Anger',
    'L': 'Boredom',
    'E': 'Disgust',
    'A': 'Anxiety/Fear',
    'F': 'Happiness',
    'T': 'Sadness',
    'N': 'Neutral'
}
label_to_idx = {label: idx for idx, label in enumerate(emotion_labels.keys())}

# Load features and labels
with open('features.pkl', 'rb') as f:
    data = pickle.load(f)
with open('labels.pkl', 'rb') as f:
    labels = pickle.load(f)

# Convert labels to categorical
labels = np.array([label_to_idx[label] for label in labels])
labels = to_categorical(labels)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

def train_model(epochs, batch_size, learning_rate):
    # Create a feed-forward neural network
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.5),
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(len(emotion_labels), activation='softmax')
    ])

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test loss: {loss}, Test accuracy: {accuracy}")

    # Get predictions and convert back to original labels
    y_pred = model.predict(X_test)
    y_pred_labels = np.argmax(y_pred, axis=1)
    y_test_labels = np.argmax(y_test, axis=1)
    idx_to_label = {idx: label for label, idx in label_to_idx.items()}
    y_pred_labels = np.array([idx_to_label[idx] for idx in y_pred_labels])
    y_test_labels = np.array([idx_to_label[idx] for idx in y_test_labels])

    print(classification_report(y_test_labels, y_pred_labels, target_names=emotion_labels.values()))



# Trry for different hyperparamters
train_model(epochs=20, batch_size=32, learning_rate=0.001)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss: 1.2048426866531372, Test accuracy: 0.5887850522994995
              precision    recall  f1-score   support

       Anger       0.33      0.43      0.38        14
     Boredom       1.00      0.17      0.29        12
     Disgust       0.62      0.28      0.38        18
Anxiety/Fear       0.65      0.85      0.74        20
   Happiness       0.53      0.56      0.55        16
     Sadness       0.82      1.00      0.90         9
     Neutral       0.60      0.83      0.70        18

    accuracy                           0.59       107
   macro avg       0.65      0.59      0.56       107
weighted avg       0.63      0.59      0.56       107



## open smile with a very simple NN and f1 added 

## training 

In [1]:

import os
import numpy as np
import opensmile
import pickle
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report



def f1_metric(y_true, y_pred):
    y_pred = tf.round(y_pred)
    tp = tf.reduce_sum(tf.cast(y_true * y_pred, 'float'), axis=0)
    tn = tf.reduce_sum(tf.cast((1 - y_true) * (1 - y_pred), 'float'), axis=0)
    fp = tf.reduce_sum(tf.cast((1 - y_true) * y_pred, 'float'), axis=0)
    fn = tf.reduce_sum(tf.cast(y_true * (1 - y_pred), 'float'), axis=0)

    p = tp / (tp + fp + tf.keras.backend.epsilon())
    r = tp / (tp + fn + tf.keras.backend.epsilon())

    f1 = 2 * p * r / (p + r + tf.keras.backend.epsilon())
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1)
    return tf.reduce_mean(f1)






# Emotion labels
emotion_labels = {
    'W': 'Anger',
    'L': 'Boredom',
    'E': 'Disgust',
    'A': 'Anxiety/Fear',
    'F': 'Happiness',
    'T': 'Sadness',
    'N': 'Neutral'
}
label_to_idx = {label: idx for idx, label in enumerate(emotion_labels.keys())}

# Load features and labels
with open('features.pkl', 'rb') as f:
    data = pickle.load(f)
with open('labels.pkl', 'rb') as f:
    labels = pickle.load(f)

# Convert labels to categorical
labels = np.array([label_to_idx[label] for label in labels])
labels = to_categorical(labels)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

def train_model(epochs, batch_size, learning_rate):
    # Create a feed-forward neural network
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.5),
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(len(emotion_labels), activation='softmax')
    ])

    # Compile the model
    #model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy', f1_metric])

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)

    # Evaluate the model
    loss, accuracy, f1 = model.evaluate(X_test, y_test)
    print(f"Test loss: {loss}, Test accuracy: {accuracy}, Test F1 score: {f1}")

    # Get predictions and convert back to original labels
    y_pred = model.predict(X_test)
    y_pred_labels = np.argmax(y_pred, axis=1)
    y_test_labels = np.argmax(y_test, axis=1)
    idx_to_label = {idx: label for label, idx in label_to_idx.items()}
    y_pred_labels = np.array([idx_to_label[idx] for idx in y_pred_labels])
    y_test_labels = np.array([idx_to_label[idx] for idx in y_test_labels])

    print(classification_report(y_test_labels, y_pred_labels, target_names=emotion_labels.values()))



# Trry for different hyperparamters
train_model(epochs=20, batch_size=32, learning_rate=0.001)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss: 1.0828922986984253, Test accuracy: 0.6261682510375977, Test F1 score: 0.33803853392601013
              precision    recall  f1-score   support

       Anger       0.60      0.43      0.50        14
     Boredom       1.00      0.17      0.29        12
     Disgust       1.00      0.17      0.29        18
Anxiety/Fear       0.66      0.95      0.78        20
   Happiness       0.68      0.81      0.74        16
     Sadness       0.82      1.00      0.90         9
     Neutral       0.45      0.83      0.59        18

    accuracy                           0.63       107
   macro avg       0.74      0.62      0.58       107
weighted avg       0.73      0.63      0.58       107

