## Machine learning

In [None]:
import os
import numpy as np
import opensmile
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

# Emotion labels
emotion_labels = {
    'W': 'Anger',
    'L': 'Boredom',
    'E': 'Disgust',
    'A': 'Anxiety/Fear',
    'F': 'Happiness',
    'T': 'Sadness',
    'N': 'Neutral'
}

# Feature extraction
def extract_audio_features(file_path):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.GeMAPSv01b,
        feature_level=opensmile.FeatureLevel.Functionals,
    )
    features = smile.process_file(file_path)
    return features.values

# Load data
data_folder = "Emo-DB-database"
file_list = os.listdir(data_folder)


print(f"Total files in the folder: {len(file_list)}")
print(f"File list: {file_list}\n")



data = []
labels = []



for file_name in file_list:
    emotion_code = file_name[5]
    print(f"Processing {file_name}, emotion_code: {emotion_code}")  # Add this line
    if emotion_code in emotion_labels:
        file_path = os.path.join(data_folder, file_name)
        features = extract_audio_features(file_path)
        print(f"Features shape for {file_name}: {features.shape}")
        data.append(features)
        labels.append(emotion_code)
    else:
        print(f"Skipping {file_name}, emotion_code: {emotion_code}")  # Add this line

data = np.vstack(data)
labels = np.array(labels)

# Preprocessing
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Train a classifier
classifier = LogisticRegression(max_iter=1000)
classifier.fit(X_train, y_train)

# Evaluate the model
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred, target_names=emotion_labels.values()))

## open smile with a very simple NN

### feature extraction 

In [None]:
import os
import numpy as np
import opensmile
import pickle
from sklearn.preprocessing import StandardScaler


# Emotion labels
emotion_labels = {
    'W': 'Anger',
    'L': 'Boredom',
    'E': 'Disgust',
    'A': 'Anxiety/Fear',
    'F': 'Happiness',
    'T': 'Sadness',
    'N': 'Neutral'
}
label_to_idx = {label: idx for idx, label in enumerate(emotion_labels.keys())}

# Feature extraction
def extract_audio_features(file_path):
    smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.GeMAPSv01b,
        feature_level=opensmile.FeatureLevel.Functionals,
    )
    features = smile.process_file(file_path)
    return features.values

# Load data
data_folder = "Emo-DB-database"
file_list = os.listdir(data_folder)

data = []
labels = []

for file_name in file_list:
    emotion_code = file_name[5]
    if emotion_code in emotion_labels:
        file_path = os.path.join(data_folder, file_name)
        features = extract_audio_features(file_path)
        data.append(features)
        labels.append(emotion_code)

data = np.vstack(data)
labels = np.array(labels)

# Preprocessing
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Save features and labels
with open('features.pkl', 'wb') as f:
    pickle.dump(data, f)
with open('labels.pkl', 'wb') as f:
    pickle.dump(labels, f)

### training

In [None]:
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# Emotion labels
emotion_labels = {
    'W': 'Anger',
    'L': 'Boredom',
    'E': 'Disgust',
    'A': 'Anxiety/Fear',
    'F': 'Happiness',
    'T': 'Sadness',
    'N': 'Neutral'
}
label_to_idx = {label: idx for idx, label in enumerate(emotion_labels.keys())}

# Load features and labels
with open('features.pkl', 'rb') as f:
    data = pickle.load(f)
with open('labels.pkl', 'rb') as f:
    labels = pickle.load(f)

# Convert labels to categorical
labels = np.array([label_to_idx[label] for label in labels])
labels = to_categorical(labels)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

def train_model(epochs, batch_size, learning_rate):
    # Create a feed-forward neural network
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.5),
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(len(emotion_labels), activation='softmax')
    ])

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test loss: {loss}, Test accuracy: {accuracy}")

    # Get predictions and convert back to original labels
    y_pred = model.predict(X_test)
    y_pred_labels = np.argmax(y_pred, axis=1)
    y_test_labels = np.argmax(y_test, axis=1)
    idx_to_label = {idx: label for label, idx in label_to_idx.items()}
    y_pred_labels = np.array([idx_to_label[idx] for idx in y_pred_labels])
    y_test_labels = np.array([idx_to_label[idx] for idx in y_test_labels])

    print(classification_report(y_test_labels, y_pred_labels, target_names=emotion_labels.values()))



# Trry for different hyperparamters
train_model(epochs=20, batch_size=32, learning_rate=0.001)
