In [None]:
import pandas as pd
import numpy as np
import librosa
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

base_dir = ''
dataset = pd.read_csv(base_dir + 'DEAM_dataset_paths.csv')
dataset['file_path'] = dataset['file_path'].str.replace('\\', '/')
dataset['file_path'] = dataset['file_path']

def extract_features(file_path, index):
    y, sr = librosa.load(file_path, duration=30)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=y, chroma=chroma, sr=sr)
    features = [mfccs, chroma, mel, contrast, tonnetz]
    averaged_features = [np.mean(feat, axis=1) for feat in features]
    return np.concatenate(averaged_features)

features = []
for index, row in enumerate(dataset['file_path']):
    print(f"Processing file at index {index}...")
    features.append(extract_features(row, index))

features = np.array(features)   
np.save('DEAM_extracted_features_base.npy', features)

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

base_dir = ''
dataset = pd.read_csv(base_dir + 'DEAM_dataset_paths.csv')
dataset['file_path'] = dataset['file_path'].str.replace('\\', '/')
dataset['file_path'] = dataset['file_path']

features = np.load('DEAM_extracted_features_base.npy')
valence = dataset[' valence_mean'].values.astype(np.float32)  
arousal = dataset[' arousal_mean'].values.astype(np.float32)

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

features_tensor = torch.tensor(features_scaled, dtype=torch.float32)
valence_tensor = torch.tensor(valence, dtype=torch.float32)
arousal_tensor = torch.tensor(arousal, dtype=torch.float32)

In [5]:
from sklearn.model_selection import train_test_split

torch.manual_seed(42)

class AudioFeaturesDataset(Dataset):    
    def __init__(self, features, targets):

        self.features = features
        self.targets = targets
        
    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]


X_train, X_val, y_train, y_val = train_test_split(features_tensor, valence_tensor, test_size=0.2, random_state=42)

train_dataset = AudioFeaturesDataset(X_train, y_train)
test_dataset = AudioFeaturesDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [7]:
train_features, train_labels = next(iter(train_loader))
print("Training example features shape:", train_features.shape)
print("Training example label:", train_labels[0])

val_features, val_labels = next(iter(test_loader))
print("Validation example features shape:", val_features.shape)
print("Validation example label:", val_labels[0])

Training example features shape: torch.Size([32, 193])
Training example label: tensor(4.5000)
Validation example features shape: torch.Size([32, 193])
Validation example label: tensor(3.8000)


In [8]:
class ComplexNN(nn.Module):
    def __init__(self, input_size):
        super(ComplexNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 500)
        self.fc2 = nn.Linear(500, 500)
        self.fc3 = nn.Linear(500, 500)
        self.fc4 = nn.Linear(500, 500)
        self.fc5 = nn.Linear(500, 500)
        self.fc6 = nn.Linear(500, 500)
        self.fc7 = nn.Linear(500, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = torch.relu(self.fc5(x))
        x = torch.relu(self.fc6(x))
        x = self.fc7(x)
        return x


class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        return x

In [9]:
input_shape = X_train.shape[1]
print(input_shape)
valence_model = ComplexNN(input_shape)

193


In [10]:
from torch.optim import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import numpy as np 
import os
from math import sqrt

def evaluate_model(model, data_loader, device):
    model.eval()
    predictions = []
    targets = []
    with torch.no_grad():
        for inputs, targets_batch in data_loader:
            inputs, targets_batch = inputs.to(device), targets_batch.to(device)  # Move to the specified device
            outputs = model(inputs)
            predictions.extend(outputs.squeeze().tolist())
            targets.extend(targets_batch.tolist())
    mse = mean_squared_error(targets, predictions)
    rmse = sqrt(mse)
    r2 = r2_score(targets, predictions)
    return mse, rmse, r2


def train_and_evaluate(model, train_loader, test_loader, lr, epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    optimizer = Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    for epoch in range(epochs):
        model.train()
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets)
            loss.backward()
            optimizer.step()

    test_mse, test_rmse, test_r2 = evaluate_model(model, test_loader, device = 'cuda')
    print(f"LR: {lr}, Epochs: {epochs}, Test MSE: {test_mse:.4f}, Test RMSE: {test_rmse:.4f}, Test R2: {test_r2:.4f}")
    
    model_directory = f"base_model/"
    if not os.path.exists(model_directory):
        os.makedirs(model_directory)
    torch.save(model.state_dict(), f"{model_directory}/model.pth")
    print(f"Model saved in '{model_directory}/model.pth'")

    return test_mse, test_rmse, test_r2

In [27]:
device = 'cuda'
lr =  0.005
epochs = 50
mse, rmse, r2 = train_and_evaluate(valence_model, train_loader, test_loader, lr, epochs)

LR: 0.005, Epochs: 120, Test MSE: 0.8677, Test RMSE: 0.9315, Test R2: 0.2353
Model saved in 'base_model/grid_search//model.pth'


In [12]:
from sklearn.model_selection import train_test_split

torch.manual_seed(42)

class AudioFeaturesDataset(Dataset):    
    def __init__(self, features, targets):

        self.features = features
        self.targets = targets
        
    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]


X_train, X_val, y_train, y_val = train_test_split(features_tensor, arousal_tensor, test_size=0.2, random_state=42)

train_dataset = AudioFeaturesDataset(X_train, y_train)
test_dataset = AudioFeaturesDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [13]:
input_shape = X_train.shape[1]
print(input_shape)
arousal_model = ComplexNN(input_shape)

193


In [15]:
device = 'cuda'
lr =  0.005
epochs = 100
mse, rmse, r2 = train_and_evaluate(arousal_model, train_loader, test_loader, lr, epochs)

LR: 0.005, Epochs: 100, Test MSE: 1.4920, Test RMSE: 1.2215, Test R2: 0.1665
Model saved in 'base_model/grid_search//model.pth'


In [29]:
import itertools
from sklearn.metrics import mean_squared_error, r2_score

device = 'cuda'
hyperparams_grid = {
    'lr': [0.001, 0.005],
    'epochs': [30, 50, 100]
}

def grid_search_hyperparams(model_class, input_shape, train_loader, test_loader, hyperparams_grid):
    best_mse = float('inf')
    best_r2 = -float('inf')
    best_params_mse = {}
    best_params_r2 = {}

    combinations = list(itertools.product(*(hyperparams_grid[key] for key in hyperparams_grid)))

    for lr, epochs in combinations:
        model = model_class(input_shape).to(device)
        
        mse, rmse, r2 = train_and_evaluate(model, train_loader, test_loader, lr, epochs)

        if mse < best_mse:
            best_mse = mse
            best_params_mse = {'lr': lr, 'epochs': epochs}

        if r2 > best_r2:
            best_r2 = r2
            best_params_r2 = {'lr': lr, 'epochs': epochs}

    print(f"Best MSE: {best_mse}, Configuration: {best_params_mse}")
    print(f"Best R^2: {best_r2}, Configuration: {best_params_r2}")
    
grid_search_hyperparams(ComplexNN, input_shape, train_loader, test_loader, hyperparams_grid)

LR: 0.001, Epochs: 30, Test MSE: 0.7429, Test RMSE: 0.8619, Test R2: 0.3453
Model saved in 'base_model/grid_search//model.pth'
LR: 0.001, Epochs: 50, Test MSE: 0.7510, Test RMSE: 0.8666, Test R2: 0.3381
Model saved in 'base_model/grid_search//model.pth'
LR: 0.001, Epochs: 100, Test MSE: 0.7064, Test RMSE: 0.8405, Test R2: 0.3774
Model saved in 'base_model/grid_search//model.pth'
LR: 0.005, Epochs: 30, Test MSE: 0.7867, Test RMSE: 0.8870, Test R2: 0.3067
Model saved in 'base_model/grid_search//model.pth'
LR: 0.005, Epochs: 50, Test MSE: 0.7882, Test RMSE: 0.8878, Test R2: 0.3054
Model saved in 'base_model/grid_search//model.pth'
LR: 0.005, Epochs: 100, Test MSE: 0.8266, Test RMSE: 0.9092, Test R2: 0.2716
Model saved in 'base_model/grid_search//model.pth'
Best MSE: 0.7064342346625143, Configuration: {'lr': 0.001, 'epochs': 100}
Best R^2: 0.3774265575739525, Configuration: {'lr': 0.001, 'epochs': 100}
