In [2]:
import pandas as pd
import numpy as np
import librosa
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

base_dir = ''
dataset = pd.read_csv(base_dir + 'augmented_dataset.csv')
dataset['file_path'] = dataset['file_path'].str.replace('\\', '/')
dataset['file_path'] = dataset['file_path']

def extract_features(file_path, index):
    y, sr = librosa.load(file_path, duration=30)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=y, chroma=chroma, sr=sr)
    features = [mfccs, chroma, mel, contrast, tonnetz]
    averaged_features = [np.mean(feat, axis=1) for feat in features]
    return np.concatenate(averaged_features)

features = []
for index, row in enumerate(dataset['file_path']):
    print(f"Processing file at index {index}...")
    features.append(extract_features(row, index))

features = np.array(features)   
np.save('base_features/base_features.npy', features)


Processing file at index 0...
Processing file at index 1...
Processing file at index 2...
Processing file at index 3...
Processing file at index 4...
Processing file at index 5...
Processing file at index 6...
Processing file at index 7...
Processing file at index 8...
Processing file at index 9...
Processing file at index 10...
Processing file at index 11...
Processing file at index 12...
Processing file at index 13...
Processing file at index 14...
Processing file at index 15...
Processing file at index 16...
Processing file at index 17...
Processing file at index 18...
Processing file at index 19...
Processing file at index 20...
Processing file at index 21...
Processing file at index 22...
Processing file at index 23...
Processing file at index 24...
Processing file at index 25...
Processing file at index 26...
Processing file at index 27...
Processing file at index 28...
Processing file at index 29...
Processing file at index 30...
Processing file at index 31...
Processing file a

In [10]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

dataset_path = 'augmented_dataset.csv'
features_path = 'base_features/base_features.npy'

df = pd.read_csv(dataset_path)

features = np.load(features_path) 

def worker_init_fn(worker_id):
    np.random.seed(seed + worker_id)

valence = df[' valence_mean'].values
arousal = df[' arousal_mean'].values
targets = np.vstack((valence, arousal)).T

X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

train_dataset = TensorDataset(torch.tensor(X_train_scaled, dtype=torch.float), torch.tensor(y_train, dtype=torch.float))
test_dataset = TensorDataset(torch.tensor(X_test_scaled, dtype=torch.float), torch.tensor(y_test, dtype=torch.float))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, worker_init_fn=worker_init_fn)
test_loader = DataLoader(test_dataset, batch_size=32, worker_init_fn=worker_init_fn)

In [11]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from math import sqrt
import joblib

model_valence = RandomForestRegressor(n_estimators=200, random_state=42)
model_valence.fit(X_train_scaled, y_train[:, 0])  

model_arousal = RandomForestRegressor(n_estimators=200, random_state=42)
model_arousal.fit(X_train_scaled, y_train[:, 1])  

predictions_valence = model_valence.predict(X_test_scaled)
predictions_arousal = model_arousal.predict(X_test_scaled)

r2_valence_rf = r2_score(y_test[:, 0], predictions_valence)
r2_arousal_rf = r2_score(y_test[:, 1], predictions_arousal)
print(f'R^2 Score for Valence (RandomForest): {r2_valence_rf}')
print(f'R^2 Score for Arousal (RandomForest): {r2_arousal_rf}')

mse_valence = mean_squared_error(y_test[:, 0], predictions_valence)
mse_arousal = mean_squared_error(y_test[:, 1], predictions_arousal)
print(f'MSE for Valence: {mse_valence}, MSE for Arousal: {mse_arousal}')

rmse_valence = sqrt(mse_valence)
rmse_arousal = sqrt(mse_arousal)
print(f'R^2 Score for Valence (RandomForest): {r2_valence_rf}')
print(f'R^2 Score for Arousal (RandomForest): {r2_arousal_rf}')
print(f'MSE for Valence: {mse_valence}, MSE for Arousal: {mse_arousal}')
print(f'RMSE for Valence: {rmse_valence}, RMSE for Arousal: {rmse_arousal}')

models_directory = "benchmark_models"
import os
if not os.path.exists(models_directory):
    os.makedirs(models_directory)

joblib.dump(model_valence, f'{models_directory}/RFR_valence.pkl')
joblib.dump(model_arousal, f'{models_directory}/RFR_arousal.pkl')

print(f"Models are saved in '{models_directory}'.")

R^2 Score for Valence (RandomForest): 0.39125138358653533
R^2 Score for Arousal (RandomForest): 0.4900748688679988
MSE for Valence: 0.6723005716432748, MSE for Arousal: 0.9189735762807016
R^2 Score for Valence (RandomForest): 0.39125138358653533
R^2 Score for Arousal (RandomForest): 0.4900748688679988
MSE for Valence: 0.6723005716432748, MSE for Arousal: 0.9189735762807016
RMSE for Valence: 0.8199393707118074, RMSE for Arousal: 0.9586310949894655
Models are saved in 'benchmark_models'.


In [12]:
from sklearn.svm import SVR

model_valence_svr = SVR(C=1.0, epsilon=0.1, kernel='rbf')
model_valence_svr.fit(X_train_scaled, y_train[:, 0])  

model_arousal_svr = SVR(C=1.0, epsilon=0.1, kernel='rbf')
model_arousal_svr.fit(X_train_scaled, y_train[:, 1])  

predictions_valence_svr = model_valence_svr.predict(X_test_scaled)
predictions_arousal_svr = model_arousal_svr.predict(X_test_scaled)

r2_valence_svr = r2_score(y_test[:, 0], predictions_valence_svr)
r2_arousal_svr = r2_score(y_test[:, 1], predictions_arousal_svr)
print(f'R^2 Score for Valence (SVR): {r2_valence_svr}')
print(f'R^2 Score for Arousal (SVR): {r2_arousal_svr}')

mse_valence_svr = mean_squared_error(y_test[:, 0], predictions_valence_svr)
mse_arousal_svr = mean_squared_error(y_test[:, 1], predictions_arousal_svr)
print(f'MSE for Valence (SVR): {mse_valence_svr}, MSE for Arousal (SVR): {mse_arousal_svr}')

rmse_valence_svr = sqrt(mse_valence_svr)
rmse_arousal_svr = sqrt(mse_arousal_svr)

print(f'R^2 Score for Valence (SVR): {r2_valence_svr}')
print(f'R^2 Score for Arousal (SVR): {r2_arousal_svr}')
print(f'MSE for Valence (SVR): {mse_valence_svr}, MSE for Arousal (SVR): {mse_arousal_svr}')
print(f'RMSE for Valence (SVR): {rmse_valence_svr}, RMSE for Arousal (SVR): {rmse_arousal_svr}')

models_directory_svr = "benchmark_models"
import os
if not os.path.exists(models_directory_svr):
    os.makedirs(models_directory_svr)

joblib.dump(model_valence_svr, f'{models_directory_svr}/model_valence_svr.pkl')
joblib.dump(model_arousal_svr, f'{models_directory_svr}/model_arousal_svr.pkl')

print(f"SVR Models are saved in '{models_directory_svr}'.")

R^2 Score for Valence (SVR): 0.3218789699601775
R^2 Score for Arousal (SVR): 0.45505820661004914
MSE for Valence (SVR): 0.7489153056726605, MSE for Arousal (SVR): 0.9820796782944723
R^2 Score for Valence (SVR): 0.3218789699601775
R^2 Score for Arousal (SVR): 0.45505820661004914
MSE for Valence (SVR): 0.7489153056726605, MSE for Arousal (SVR): 0.9820796782944723
RMSE for Valence (SVR): 0.8653989286292539, RMSE for Arousal (SVR): 0.9909993331453217
SVR Models are saved in 'benchmark_models'.
