In [1]:
import pandas as pd

# Load the arousal and valence CSV files to inspect their contents
arousal_df = pd.read_csv('./data/annotations/arousal.csv')
valence_df = pd.read_csv('./data/annotations/valence.csv')

# Display the first few rows of each file to understand the data structure
arousal_df.head(), valence_df.head()


(   song_id  sample_15000ms  sample_15500ms  sample_16000ms  sample_16500ms  \
 0        2       -0.109386       -0.114942       -0.116413       -0.118613   
 1        3       -0.110846       -0.123973       -0.131103       -0.135956   
 2        4        0.222327        0.179446        0.178388        0.184056   
 3        5       -0.255613       -0.251579       -0.251958       -0.251124   
 4        7        0.464234        0.460789        0.460991        0.461046   
 
    sample_17000ms  sample_17500ms  sample_18000ms  sample_18500ms  \
 0       -0.126457       -0.133199       -0.136855       -0.144713   
 1       -0.140775       -0.144664       -0.163118       -0.165218   
 2        0.176042        0.178720        0.176345        0.175793   
 3       -0.250763       -0.251957       -0.251957       -0.251957   
 4        0.457240        0.465702        0.471809        0.469918   
 
    sample_19000ms  ...  sample_622000ms  sample_622500ms  sample_623000ms  \
 0       -0.138985  ... 

In [3]:
# Calculate the mean, median, and standard deviation of samples for each song in both arousal and valence dataframes
arousal_features = arousal_df.iloc[:, 1:].agg(['mean', 'median', 'std'], axis=1)
valence_features = valence_df.iloc[:, 1:].agg(['mean', 'median', 'std'], axis=1)

# Combine these statistics into a single feature set
features = pd.concat([arousal_features, valence_features], axis=1)
features.columns = ['arousal_mean', 'arousal_median', 'arousal_std', 
                    'valence_mean', 'valence_median', 'valence_std']

# Extract the target labels (arousal and valence mean values) for model training
y_arousal = arousal_features['mean'].values  # Target for arousal
y_valence = valence_features['mean'].values  # Target for valence

# Display the processed feature set and target arrays
features.head(), y_arousal[:5], y_valence[:5]


(   arousal_mean  arousal_median  arousal_std  valence_mean  valence_median  \
 0     -0.197517       -0.164383     0.060047     -0.215511       -0.242763   
 1     -0.193187       -0.173634     0.042909     -0.265855       -0.283232   
 2      0.243072        0.261933     0.056383      0.155210        0.166481   
 3     -0.236207       -0.235395     0.009459      0.140160        0.133046   
 4      0.376292        0.344691     0.054167      0.332455        0.334178   
 
    valence_std  
 0     0.101869  
 1     0.045872  
 2     0.046598  
 3     0.022091  
 4     0.013138  ,
 array([-0.19751742, -0.19318693,  0.24307227, -0.23620671,  0.37629202]),
 array([-0.2155107 , -0.26585485,  0.15520962,  0.14015958,  0.33245465]))

In [9]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib

# Assuming you have `arousal.csv` and `valence.csv` data files
# with one song per row and multiple time-sample columns
arousal_df = pd.read_csv('./data/annotations/arousal.csv')
valence_df = pd.read_csv('./data/annotations/valence.csv')

# Extract mean, median, and std of arousal and valence values as new labels
arousal_mean = arousal_df.iloc[:, 1:].mean(axis=1)
valence_mean = valence_df.iloc[:, 1:].mean(axis=1)

# Prepare features with only 3 values for each song (mean values for tempo, energy, and spectral centroid)
def extract_features_for_training(df):
    features = []
    for _, row in df.iterrows():
        tempo = np.mean(row[1:])
        energy = np.sum(row[1:] ** 2) / len(row[1:])
        spectral_centroid = np.mean(row[1:])
        features.append([tempo, energy, spectral_centroid])
    return np.array(features)

# Extract features and labels
X = extract_features_for_training(arousal_df)  # Feature matrix
y_arousal = arousal_mean.values  # Arousal labels
y_valence = valence_mean.values  # Valence labels

# Split data for training and testing
X_train, X_test, y_train_arousal, y_test_arousal = train_test_split(X, y_arousal, test_size=0.2, random_state=42)
X_train, X_test, y_train_valence, y_test_valence = train_test_split(X, y_valence, test_size=0.2, random_state=42)

# Train Random Forest models for arousal and valence prediction with the 3 features
rf_arousal = RandomForestRegressor(n_estimators=100, random_state=42)
rf_arousal.fit(X_train, y_train_arousal)

rf_valence = RandomForestRegressor(n_estimators=100, random_state=42)
rf_valence.fit(X_train, y_train_valence)

# Evaluate models using Mean Squared Error (MSE)
arousal_pred = rf_arousal.predict(X_test)
valence_pred = rf_valence.predict(X_test)

arousal_mse = mean_squared_error(y_test_arousal, arousal_pred)
valence_mse = mean_squared_error(y_test_valence, valence_pred)

# Save the trained models
joblib.dump(rf_arousal, "./model/rf_arousal_model_3_features.pkl")
joblib.dump(rf_valence, "./model/rf_valence_model_3_features.pkl")

print(f"Arousal MSE: {arousal_mse}, Valence MSE: {valence_mse}")


Arousal MSE: 4.4580093385233504e-06, Valence MSE: 0.048523434095012224
