In [2]:
import sklearn.linear_model
import numpy as np
import pandas as pd
import os
from SoundLights.features_groups import  ARAUS_features, Freesound_features, mix_features, chosen_features

normalise = lambda X: (X-np.mean(X,axis=0,keepdims=True))/np.std(X,axis=0,keepdims=True) # Normalise an (n,p) numpy array to mean 0, variance 1.
clip = lambda x, x_min = -1, x_max = 1: np.where(np.where(x < x_min,x_min,x) > x_max, x_max, np.where(x < x_min,x_min,x)) # Clip an array to values between x_min and x_max.

In [3]:
responses_ARAUS= pd.read_csv(os.path.join('..','data','SoundLights_ARAUS.csv'), dtype = {'info.participant':str}) #, dtype = {'participant':str}
responses_ARAUS=responses_ARAUS.drop("info.file", axis=1)
responses_ARAUS=responses_ARAUS.drop("info.participant", axis=1)

# Drop columns that contain all zero values
# Store column names before dropping
columns_before = responses_ARAUS.columns.tolist()
# Drop zero-columns
responses_ARAUS = responses_ARAUS.loc[:, (responses_ARAUS != 0).any(axis=0)]
# Store column names after dropping
columns_after = responses_ARAUS.columns.tolist()
# Determine which columns were dropped
columns_dropped = [col for col in columns_before if col not in columns_after]
# Drop those columns from ARAUS_features
ARAUS_features = [col for col in ARAUS_features if col not in columns_dropped]
print(ARAUS_features)

['ARAUS.sharpness.avg', 'ARAUS.sharpness.max', 'ARAUS.sharpness.p05', 'ARAUS.sharpness.p10', 'ARAUS.sharpness.p20', 'ARAUS.sharpness.p30', 'ARAUS.sharpness.p40', 'ARAUS.sharpness.p50', 'ARAUS.sharpness.p60', 'ARAUS.sharpness.p70', 'ARAUS.sharpness.p80', 'ARAUS.sharpness.p90', 'ARAUS.sharpness.p95', 'ARAUS.loudness.avg', 'ARAUS.loudness.max', 'ARAUS.loudness.p05', 'ARAUS.loudness.p10', 'ARAUS.loudness.p20', 'ARAUS.loudness.p30', 'ARAUS.loudness.p40', 'ARAUS.loudness.p50', 'ARAUS.loudness.p60', 'ARAUS.loudness.p70', 'ARAUS.loudness.p80', 'ARAUS.loudness.p90', 'ARAUS.loudness.p95', 'ARAUS.fluctuation.avg', 'ARAUS.fluctuation.max', 'ARAUS.fluctuation.p05', 'ARAUS.fluctuation.p10', 'ARAUS.fluctuation.p20', 'ARAUS.fluctuation.p30', 'ARAUS.fluctuation.p40', 'ARAUS.fluctuation.p50', 'ARAUS.fluctuation.p60', 'ARAUS.fluctuation.p70', 'ARAUS.fluctuation.p80', 'ARAUS.fluctuation.p90', 'ARAUS.fluctuation.p95', 'ARAUS.LA.avg', 'ARAUS.LA.min', 'ARAUS.LA.max', 'ARAUS.LA.p05', 'ARAUS.LA.p10', 'ARAUS.LA

In [4]:
responses_Freesound= pd.read_csv(os.path.join('..','data','SoundLights_Freesound.csv')) #, dtype = {'participant':str}
responses_Freesound=responses_Freesound.drop("info.file", axis=1)
responses_Freesound=responses_Freesound.drop("info.participant", axis=1)


In [5]:
responses_Mix= pd.read_csv(os.path.join('..','data','SoundLights_mix.csv')) #, dtype = {'participant':str}
responses_Mix=responses_Mix.drop("info.file", axis=1)
responses_Mix=responses_Mix.drop("info.participant", axis=1)

In [6]:
responses_complete= pd.read_csv(os.path.join('..','data','SoundLights_complete.csv')) #, dtype = {'participant':str}
responses_complete=responses_complete.drop("info.file", axis=1)
responses_complete=responses_complete.drop("info.participant", axis=1)

### Adjust alpha

In [8]:
import warnings
from sklearn.exceptions import ConvergenceWarning
# Suppress ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

from sklearn.linear_model import ElasticNet
# Define your ElasticNet model with specific hyperparameters
alpha = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1] 
l1_ratio = 0.5
print('     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------')

for value in alpha:

    model = ElasticNet(alpha=value, l1_ratio=l1_ratio, selection="random")
    #print(f'Investigating performance of {model} model...')

    MSEs_train = []
    MSEs_val = []
    MSEs_test = []
    MEs_train = []
    MEs_val = []
    MEs_test = []

    
    for val_fold in [1,2,3,4,5]:

        # Extract dataframes
        df_train = responses_complete[(responses_complete['info.fold'] != val_fold) & (responses_complete['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
        df_val   = responses_complete[responses_complete['info.fold'] == val_fold]
        df_test  = responses_complete[responses_complete['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

        # Get ground-truth labels
        Y_train = df_train['info.P_ground_truth'].values
        Y_val = df_val['info.P_ground_truth'].values
        Y_test = df_test['info.P_ground_truth'].values

        # Get features
        X_train = df_train[chosen_features].values
        X_val =df_val[chosen_features].values
        X_test = df_test[chosen_features].values    

        # Fit model
        X_LR = model.fit(X_train, Y_train)

        # Get MSEs
        MSE_train = np.mean((clip(X_LR.predict(X_train)) - Y_train)**2)
        MSE_val = np.mean((clip(X_LR.predict(X_val)) - Y_val)**2)
        MSE_test = np.mean((clip(X_LR.predict(X_test)) - Y_test)**2)
        ME_train = np.mean(np.abs(clip(X_LR.predict(X_train)) - Y_train))
        ME_val = np.mean(np.abs(clip(X_LR.predict(X_val)) - Y_val))
        ME_test = np.mean(np.abs(clip(X_LR.predict(X_test)) - Y_test))

        # Add metrics
        MSEs_train.append(MSE_train)
        MSEs_val.append(MSE_val)
        MSEs_test.append(MSE_test)
        MEs_train.append(ME_train)
        MEs_val.append(ME_val)
        MEs_test.append(ME_test)

        #print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {ME_train:.4f} | {ME_val:.4f} | {ME_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')
    print("Parameters ",value, l1_ratio )
    print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} | {np.mean(MEs_train):.4f} | {np.mean(MEs_val):.4f} | {np.mean(MEs_test):.4f} |')

     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------
Parameters  0.1 0.5
Mean | 0.1243 | 0.1288 | 0.0794 | 0.2854 | 0.2902 | 0.2377 |
Parameters  0.2 0.5
Mean | 0.1255 | 0.1289 | 0.0768 | 0.2868 | 0.2906 | 0.2269 |
Parameters  0.3 0.5
Mean | 0.1257 | 0.1289 | 0.0774 | 0.2872 | 0.2906 | 0.2270 |
Parameters  0.4 0.5
Mean | 0.1258 | 0.1289 | 0.0779 | 0.2874 | 0.2907 | 0.2277 |
Parameters  0.5 0.5
Mean | 0.1259 | 0.1289 | 0.0784 | 0.2875 | 0.2908 | 0.2283 |
Parameters  0.6 0.5
Mean | 0.1260 | 0.1289 | 0.0786 | 0.2877 | 0.2908 | 0.2286 |
Parameters  0.7 0.5
Mean | 0.1261 | 0.1289 | 0.0788 | 0.2878 | 0.2909 | 0.2287 |
P

### Adjust l1_ratio

In [9]:
import warnings
from sklearn.exceptions import ConvergenceWarning
# Suppress ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

from sklearn.linear_model import ElasticNet
# Define your ElasticNet model with specific hyperparameters
alpha = 0.2 
l1_ratio = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1] #[0.5,0.5]#
print('     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------')

for value in l1_ratio:

    model = ElasticNet(alpha=alpha, l1_ratio=value, selection="random")
    #print(f'Investigating performance of {model} model...')

    MSEs_train = []
    MSEs_val = []
    MSEs_test = []
    MEs_train = []
    MEs_val = []
    MEs_test = []

    
    for val_fold in [1,2,3,4,5]:

        # Extract dataframes
        df_train = responses_complete[(responses_complete['info.fold'] != val_fold) & (responses_complete['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
        df_val   = responses_complete[responses_complete['info.fold'] == val_fold]
        df_test  = responses_complete[responses_complete['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

        # Get ground-truth labels
        Y_train = df_train['info.P_ground_truth'].values
        Y_val = df_val['info.P_ground_truth'].values
        Y_test = df_test['info.P_ground_truth'].values

        # Get features
        X_train = df_train[chosen_features].values
        X_val =df_val[chosen_features].values
        X_test = df_test[chosen_features].values    

        # Fit model
        X_LR = model.fit(X_train, Y_train)

        # Get MSEs
        MSE_train = np.mean((clip(X_LR.predict(X_train)) - Y_train)**2)
        MSE_val = np.mean((clip(X_LR.predict(X_val)) - Y_val)**2)
        MSE_test = np.mean((clip(X_LR.predict(X_test)) - Y_test)**2)
        ME_train = np.mean(np.abs(clip(X_LR.predict(X_train)) - Y_train))
        ME_val = np.mean(np.abs(clip(X_LR.predict(X_val)) - Y_val))
        ME_test = np.mean(np.abs(clip(X_LR.predict(X_test)) - Y_test))

        # Add metrics
        MSEs_train.append(MSE_train)
        MSEs_val.append(MSE_val)
        MSEs_test.append(MSE_test)
        MEs_train.append(ME_train)
        MEs_val.append(ME_val)
        MEs_test.append(ME_test)

        #print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {ME_train:.4f} | {ME_val:.4f} | {ME_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')
    print("Parameters ",alpha, value )
    print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} | {np.mean(MEs_train):.4f} | {np.mean(MEs_val):.4f} | {np.mean(MEs_test):.4f} |')

     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------
Parameters  0.2 0.1
Mean | 0.1233 | 0.1286 | 0.0848 | 0.2840 | 0.2897 | 0.2516 |
Parameters  0.2 0.2
Mean | 0.1241 | 0.1287 | 0.0806 | 0.2851 | 0.2900 | 0.2420 |
Parameters  0.2 0.3
Mean | 0.1246 | 0.1288 | 0.0782 | 0.2858 | 0.2903 | 0.2338 |
Parameters  0.2 0.4
Mean | 0.1251 | 0.1288 | 0.0770 | 0.2864 | 0.2904 | 0.2291 |
Parameters  0.2 0.5
Mean | 0.1255 | 0.1289 | 0.0768 | 0.2868 | 0.2906 | 0.2269 |
Parameters  0.2 0.6
Mean | 0.1256 | 0.1289 | 0.0770 | 0.2871 | 0.2906 | 0.2266 |
Parameters  0.2 0.7
Mean | 0.1257 | 0.1289 | 0.0773 | 0.2872 | 0.2906 | 0.2269 |
P