In [2]:
import sklearn.linear_model
import numpy as np
import pandas as pd
import os
from SoundLights.features_groups import  ARAUS_features, Freesound_features, mix_features

normalise = lambda X: (X-np.mean(X,axis=0,keepdims=True))/np.std(X,axis=0,keepdims=True) # Normalise an (n,p) numpy array to mean 0, variance 1.
clip = lambda x, x_min = -1, x_max = 1: np.where(np.where(x < x_min,x_min,x) > x_max, x_max, np.where(x < x_min,x_min,x)) # Clip an array to values between x_min and x_max.

## 0) Prepare data

In [3]:
responses_ARAUS= pd.read_csv(os.path.join('..','data','SoundLights_ARAUS.csv'), dtype = {'info.participant':str}) #, dtype = {'participant':str}
responses_ARAUS=responses_ARAUS.drop("info.file", axis=1)
responses_ARAUS=responses_ARAUS.drop("info.participant", axis=1)

# Drop columns that contain all zero values
# Store column names before dropping
columns_before = responses_ARAUS.columns.tolist()
# Drop zero-columns
responses_ARAUS = responses_ARAUS.loc[:, (responses_ARAUS != 0).any(axis=0)]
# Store column names after dropping
columns_after = responses_ARAUS.columns.tolist()
# Determine which columns were dropped
columns_dropped = [col for col in columns_before if col not in columns_after]
# Drop those columns from ARAUS_features
ARAUS_features = [col for col in ARAUS_features if col not in columns_dropped]
print(ARAUS_features)


['ARAUS.sharpness.avg', 'ARAUS.sharpness.max', 'ARAUS.sharpness.p05', 'ARAUS.sharpness.p10', 'ARAUS.sharpness.p20', 'ARAUS.sharpness.p30', 'ARAUS.sharpness.p40', 'ARAUS.sharpness.p50', 'ARAUS.sharpness.p60', 'ARAUS.sharpness.p70', 'ARAUS.sharpness.p80', 'ARAUS.sharpness.p90', 'ARAUS.sharpness.p95', 'ARAUS.loudness.avg', 'ARAUS.loudness.max', 'ARAUS.loudness.p05', 'ARAUS.loudness.p10', 'ARAUS.loudness.p20', 'ARAUS.loudness.p30', 'ARAUS.loudness.p40', 'ARAUS.loudness.p50', 'ARAUS.loudness.p60', 'ARAUS.loudness.p70', 'ARAUS.loudness.p80', 'ARAUS.loudness.p90', 'ARAUS.loudness.p95', 'ARAUS.fluctuation.avg', 'ARAUS.fluctuation.max', 'ARAUS.fluctuation.p05', 'ARAUS.fluctuation.p10', 'ARAUS.fluctuation.p20', 'ARAUS.fluctuation.p30', 'ARAUS.fluctuation.p40', 'ARAUS.fluctuation.p50', 'ARAUS.fluctuation.p60', 'ARAUS.fluctuation.p70', 'ARAUS.fluctuation.p80', 'ARAUS.fluctuation.p90', 'ARAUS.fluctuation.p95', 'ARAUS.LA.avg', 'ARAUS.LA.min', 'ARAUS.LA.max', 'ARAUS.LA.p05', 'ARAUS.LA.p10', 'ARAUS.LA

In [4]:
responses_Freesound= pd.read_csv(os.path.join('..','data','SoundLights_Freesound.csv')) #, dtype = {'participant':str}
responses_Freesound=responses_Freesound.drop("info.file", axis=1)
responses_Freesound=responses_Freesound.drop("info.participant", axis=1)


In [5]:
responses_Mix= pd.read_csv(os.path.join('..','data','SoundLights_mix.csv')) #, dtype = {'participant':str}
responses_Mix=responses_Mix.drop("info.file", axis=1)
responses_Mix=responses_Mix.drop("info.participant", axis=1)

## 1) PLEASANTNESS

### 1.1) ARAUS - With default parameters

In [4]:
model = sklearn.linear_model.ElasticNet()
print(f'Investigating performance of {model} model...')
MSEs_train = []
MSEs_val = []
MSEs_test = []
MEs_train = []
MEs_val = []
MEs_test = []

print('     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------')
for val_fold in [1,2,3,4,5]:

    # Extract dataframes
    df_train = responses_ARAUS[(responses_ARAUS['info.fold'] != val_fold) & (responses_ARAUS['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
    df_val   = responses_ARAUS[responses_ARAUS['info.fold'] == val_fold]
    df_test  = responses_ARAUS[responses_ARAUS['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

    # Get ground-truth labels
    Y_train = df_train['info.P_ground_truth'].values
    Y_val = df_val['info.P_ground_truth'].values
    Y_test = df_test['info.P_ground_truth'].values

    # Get features
    X_train = df_train[ARAUS_features].values
    X_val =df_val[ARAUS_features].values
    X_test = df_test[ARAUS_features].values    

    # Fit model
    X_LR = model.fit(X_train, Y_train)


    # Get MSEs
    MSE_train = np.mean((clip(X_LR.predict(X_train)) - Y_train)**2)
    MSE_val = np.mean((clip(X_LR.predict(X_val)) - Y_val)**2)
    MSE_test = np.mean((clip(X_LR.predict(X_test)) - Y_test)**2)
    ME_train = np.mean(np.abs(clip(X_LR.predict(X_train)) - Y_train))
    ME_val = np.mean(np.abs(clip(X_LR.predict(X_val)) - Y_val))
    ME_test = np.mean(np.abs(clip(X_LR.predict(X_test)) - Y_test))

    # Add metrics
    MSEs_train.append(MSE_train)
    MSEs_val.append(MSE_val)
    MSEs_test.append(MSE_test)
    MEs_train.append(ME_train)
    MEs_val.append(ME_val)
    MEs_test.append(ME_test)

    # Display important coefficients
    """ coefficients = X_LR.coef_
    feature_importances = list(zip(ARAUS_features, coefficients))
    feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
    for feature, importance in feature_importances:
        if float(importance)!=0:
            print(f"{feature}: {importance}") """

    print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {ME_train:.4f} | {ME_val:.4f} | {ME_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')

print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} | {np.mean(MEs_train):.4f} | {np.mean(MEs_val):.4f} | {np.mean(MEs_test):.4f} |')
print()


coefficients = X_LR.coef_
feature_importances = list(zip(ARAUS_features, coefficients))
feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
for feature, importance in feature_importances:
    if float(importance)!=0:
        print(f"{feature}: {importance}")

Investigating performance of ElasticNet() model...
     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------
   1 | 0.1439 | 0.1415 | 0.0901 | 0.3134 | 0.3103 | 0.2468 |  0.2171 | 20160 |  5040 |  48  |  113  |   2   |
   2 | 0.1447 | 0.1377 | 0.0931 | 0.3146 | 0.3045 | 0.2513 |  0.2331 | 20160 |  5040 |  48  |  113  |   2   |
   3 | 0.1421 | 0.1504 | 0.0928 | 0.3110 | 0.3203 | 0.2509 |  0.1883 | 20160 |  5040 |  48  |  113  |   2   |
   4 | 0.1422 | 0.1502 | 0.0933 | 0.3108 | 0.3235 | 0.2516 |  0.2074 | 20160 |  5040 |  48  |  113  |   2   |
   5 | 0.1445 | 0.1403 | 0.0904 | 0.3142 | 0.3086 | 0.2472 |  0.1996 | 20160 

### 1.2) Freesound - With default parameters

In [5]:
model = sklearn.linear_model.ElasticNet()
print(f'Investigating performance of {model} model...')
MSEs_train = []
MSEs_val = []
MSEs_test = []
MEs_train = []
MEs_val = []
MEs_test = []

print('     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------')
for val_fold in [1,2,3,4,5]:

    # Extract dataframes
    df_train = responses_Freesound[(responses_Freesound['info.fold'] != val_fold) & (responses_Freesound['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
    df_val   = responses_Freesound[responses_Freesound['info.fold'] == val_fold]
    df_test  = responses_Freesound[responses_Freesound['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

    # Get ground-truth labels
    Y_train = df_train['info.P_ground_truth'].values
    Y_val = df_val['info.P_ground_truth'].values
    Y_test = df_test['info.P_ground_truth'].values

    # Get features
    X_train = df_train[Freesound_features].values
    X_val =df_val[Freesound_features].values
    X_test = df_test[Freesound_features].values    

    # Fit model
    X_LR = model.fit(X_train, Y_train)


    # Get MSEs
    MSE_train = np.mean((clip(X_LR.predict(X_train)) - Y_train)**2)
    MSE_val = np.mean((clip(X_LR.predict(X_val)) - Y_val)**2)
    MSE_test = np.mean((clip(X_LR.predict(X_test)) - Y_test)**2)
    ME_train = np.mean(np.abs(clip(X_LR.predict(X_train)) - Y_train))
    ME_val = np.mean(np.abs(clip(X_LR.predict(X_val)) - Y_val))
    ME_test = np.mean(np.abs(clip(X_LR.predict(X_test)) - Y_test))

    # Add metrics
    MSEs_train.append(MSE_train)
    MSEs_val.append(MSE_val)
    MSEs_test.append(MSE_test)
    MEs_train.append(ME_train)
    MEs_val.append(ME_val)
    MEs_test.append(ME_test)

    # Display important coefficients
    """ coefficients = X_LR.coef_
    feature_importances = list(zip(ARAUS_features, coefficients))
    feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
    for feature, importance in feature_importances:
        if float(importance)!=0:
            print(f"{feature}: {importance}") """

    print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {ME_train:.4f} | {ME_val:.4f} | {ME_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')

print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} | {np.mean(MEs_train):.4f} | {np.mean(MEs_val):.4f} | {np.mean(MEs_test):.4f} |')
print()


coefficients = X_LR.coef_
feature_importances = list(zip(Freesound_features, coefficients))
feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
for feature, importance in feature_importances:
    if float(importance)!=0:
        print(f"{feature}: {importance}")

Investigating performance of ElasticNet() model...
     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------


  model = cd_fast.enet_coordinate_descent(


   1 | 0.1261 | 0.1291 | 0.0769 | 0.2877 | 0.2918 | 0.2263 | -1.7373 | 20160 |  5040 |  48  |  139  |  14   |


  model = cd_fast.enet_coordinate_descent(


   2 | 0.1294 | 0.1166 | 0.0795 | 0.2924 | 0.2758 | 0.2304 | -1.6041 | 20160 |  5040 |  48  |  139  |  15   |


  model = cd_fast.enet_coordinate_descent(


   3 | 0.1257 | 0.1326 | 0.0792 | 0.2882 | 0.2918 | 0.2259 | -1.5496 | 20160 |  5040 |  48  |  139  |  14   |


  model = cd_fast.enet_coordinate_descent(


   4 | 0.1248 | 0.1358 | 0.0802 | 0.2859 | 0.3023 | 0.2280 | -1.6148 | 20160 |  5040 |  48  |  139  |  15   |
   5 | 0.1256 | 0.1310 | 0.0777 | 0.2867 | 0.2942 | 0.2304 | -1.7943 | 20160 |  5040 |  48  |  139  |  17   |
Mean | 0.1263 | 0.1290 | 0.0787 | 0.2882 | 0.2912 | 0.2282 |

freesound.lowlevel.mfcc.avg.c0: -0.001948133089964581
freesound.lowlevel.mfcc.var.c4: 0.0002039009812083063
freesound.lowlevel.mfcc.var.c2: -0.00018790639543036085
freesound.lowlevel.mfcc.var.c3: 0.00011957705865956737
freesound.lowlevel.mfcc.var.c6: 0.00011847394006583769
freesound.lowlevel.spectral_rolloff.avg: 6.288231807923362e-05
freesound.lowlevel.mfcc.var.c0: -5.14027003379636e-05
freesound.lowlevel.spectral_kurtosis.var: -3.0189709897704384e-05
freesound.lowlevel.spectral_centroid.p20: -2.0784049753560968e-05
freesound.lowlevel.spectral_rolloff.p80: -1.1197597092819076e-05
freesound.lowlevel.mfcc.var.c5: 9.302821612018578e-06
freesound.lowlevel.spectral_rolloff.var: -2.9019303640113923e-08
freesound.l

  model = cd_fast.enet_coordinate_descent(


### 1.3) ARAUS - Defining parameters

In [11]:
from sklearn.linear_model import ElasticNet
# Define your ElasticNet model with specific hyperparameters
alpha = 0.1  # Example value, adjust as needed
l1_ratio = 0.7  # Example value, adjust as needed
model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, selection="random")
#print(f'Investigating performance of {model} model...')

MSEs_train = []
MSEs_val = []
MSEs_test = []
MEs_train = []
MEs_val = []
MEs_test = []

print('     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------')
for val_fold in [1,2,3,4,5]:

    # Extract dataframes
    df_train = responses_ARAUS[(responses_ARAUS['info.fold'] != val_fold) & (responses_ARAUS['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
    df_val   = responses_ARAUS[responses_ARAUS['info.fold'] == val_fold]
    df_test  = responses_ARAUS[responses_ARAUS['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

    # Get ground-truth labels
    Y_train = df_train['info.P_ground_truth'].values
    Y_val = df_val['info.P_ground_truth'].values
    Y_test = df_test['info.P_ground_truth'].values

    # Get features
    X_train = df_train[ARAUS_features].values
    X_val =df_val[ARAUS_features].values
    X_test = df_test[ARAUS_features].values    

    # Fit model
    X_LR = model.fit(X_train, Y_train)

    # Get MSEs
    MSE_train = np.mean((clip(X_LR.predict(X_train)) - Y_train)**2)
    MSE_val = np.mean((clip(X_LR.predict(X_val)) - Y_val)**2)
    MSE_test = np.mean((clip(X_LR.predict(X_test)) - Y_test)**2)
    ME_train = np.mean(np.abs(clip(X_LR.predict(X_train)) - Y_train))
    ME_val = np.mean(np.abs(clip(X_LR.predict(X_val)) - Y_val))
    ME_test = np.mean(np.abs(clip(X_LR.predict(X_test)) - Y_test))

    # Add metrics
    MSEs_train.append(MSE_train)
    MSEs_val.append(MSE_val)
    MSEs_test.append(MSE_test)
    MEs_train.append(ME_train)
    MEs_val.append(ME_val)
    MEs_test.append(ME_test)

    # Display important coefficients
    """ coefficients = X_LR.coef_
    feature_importances = list(zip(ARAUS_features, coefficients))
    feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
    for feature, importance in feature_importances:
        if float(importance)!=0:
            print(f"{feature}: {importance}") """

    print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {ME_train:.4f} | {ME_val:.4f} | {ME_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')

print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} | {np.mean(MEs_train):.4f} | {np.mean(MEs_val):.4f} | {np.mean(MEs_test):.4f} |')
print()


coefficients = X_LR.coef_
feature_importances = list(zip(ARAUS_features, coefficients))
feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
list_features=[]
for feature, importance in feature_importances:
    if float(importance)!=0:
        print(f"{feature}: {importance}")
        list_features.append(feature)
print("LIST ",list_features)

     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------
   1 | 0.1334 | 0.1330 | 0.0740 | 0.2989 | 0.2974 | 0.2309 |  0.5024 | 20160 |  5040 |  48  |  113  |  12   |
   2 | 0.1343 | 0.1318 | 0.0774 | 0.2999 | 0.2936 | 0.2345 |  0.1541 | 20160 |  5040 |  48  |  113  |  10   |
   3 | 0.1336 | 0.1369 | 0.0747 | 0.2992 | 0.3015 | 0.2307 |  0.7271 | 20160 |  5040 |  48  |  113  |  10   |
   4 | 0.1309 | 0.1424 | 0.0795 | 0.2951 | 0.3130 | 0.2404 |  0.3981 | 20160 |  5040 |  48  |  113  |  10   |
   5 | 0.1340 | 0.1338 | 0.0720 | 0.2993 | 0.3002 | 0.2259 |  0.4347 | 20160 |  5040 |  48  |  113  |  11   |
Mean | 0.1333 | 0.

### 1.4) Freesound - Defining parameters

In [10]:
from sklearn.linear_model import ElasticNet
# Define your ElasticNet model with specific hyperparameters
alpha = 0.2  # Example value, adjust as needed
l1_ratio = 0.5  # Example value, adjust as needed
model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, selection="random")
print(f'Investigating performance of {model} model...')
MSEs_train = []
MSEs_val = []
MSEs_test = []
MEs_train = []
MEs_val = []
MEs_test = []

print('     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------')
for val_fold in [1,2,3,4,5]:

    # Extract dataframes
    df_train = responses_Freesound[(responses_Freesound['info.fold'] != val_fold) & (responses_Freesound['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
    df_val   = responses_Freesound[responses_Freesound['info.fold'] == val_fold]
    df_test  = responses_Freesound[responses_Freesound['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

    # Get ground-truth labels
    Y_train = df_train['info.P_ground_truth'].values
    Y_val = df_val['info.P_ground_truth'].values
    Y_test = df_test['info.P_ground_truth'].values

    # Get features
    X_train = df_train[Freesound_features].values
    X_val =df_val[Freesound_features].values
    X_test = df_test[Freesound_features].values    

    # Fit model
    X_LR = model.fit(X_train, Y_train)


    # Get MSEs
    MSE_train = np.mean((clip(X_LR.predict(X_train)) - Y_train)**2)
    MSE_val = np.mean((clip(X_LR.predict(X_val)) - Y_val)**2)
    MSE_test = np.mean((clip(X_LR.predict(X_test)) - Y_test)**2)
    ME_train = np.mean(np.abs(clip(X_LR.predict(X_train)) - Y_train))
    ME_val = np.mean(np.abs(clip(X_LR.predict(X_val)) - Y_val))
    ME_test = np.mean(np.abs(clip(X_LR.predict(X_test)) - Y_test))

    # Add metrics
    MSEs_train.append(MSE_train)
    MSEs_val.append(MSE_val)
    MSEs_test.append(MSE_test)
    MEs_train.append(ME_train)
    MEs_val.append(ME_val)
    MEs_test.append(ME_test)

    # Display important coefficients
    """ coefficients = X_LR.coef_
    feature_importances = list(zip(ARAUS_features, coefficients))
    feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
    for feature, importance in feature_importances:
        if float(importance)!=0:
            print(f"{feature}: {importance}") """

    print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {ME_train:.4f} | {ME_val:.4f} | {ME_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')

print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} | {np.mean(MEs_train):.4f} | {np.mean(MEs_val):.4f} | {np.mean(MEs_test):.4f} |')
print()


coefficients = X_LR.coef_
feature_importances = list(zip(Freesound_features, coefficients))
feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
list_features=[]
for feature, importance in feature_importances:
    if float(importance)!=0:
        print(f"{feature}: {importance}")
        list_features.append(feature)
print("LIST ",list_features)


Investigating performance of ElasticNet(alpha=0.2, selection='random') model...
     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------


  model = cd_fast.enet_coordinate_descent(


   1 | 0.1255 | 0.1284 | 0.0750 | 0.2868 | 0.2906 | 0.2239 | -1.7995 | 20160 |  5040 |  48  |  139  |  25   |


  model = cd_fast.enet_coordinate_descent(


   2 | 0.1286 | 0.1153 | 0.0809 | 0.2911 | 0.2741 | 0.2351 | -1.8058 | 20160 |  5040 |  48  |  139  |  23   |


  model = cd_fast.enet_coordinate_descent(


   3 | 0.1247 | 0.1327 | 0.0752 | 0.2865 | 0.2912 | 0.2183 | -1.5313 | 20160 |  5040 |  48  |  139  |  23   |


  model = cd_fast.enet_coordinate_descent(


   4 | 0.1235 | 0.1359 | 0.0803 | 0.2841 | 0.3019 | 0.2306 | -1.8059 | 20160 |  5040 |  48  |  139  |  25   |
   5 | 0.1243 | 0.1335 | 0.0767 | 0.2849 | 0.2966 | 0.2307 | -1.7516 | 20160 |  5040 |  48  |  139  |  26   |
Mean | 0.1253 | 0.1292 | 0.0776 | 0.2867 | 0.2909 | 0.2277 |

freesound.lowlevel.mfcc.avg.c0: -0.0020697146781121665
freesound.lowlevel.mfcc.p20.c4: -0.0010782946422394494
freesound.lowlevel.mfcc.avg.c1: -0.000509201181315753
freesound.lowlevel.mfcc.p20.c5: -0.0003510832550836617
freesound.lowlevel.mfcc.p80.c1: -0.000299237713641901
freesound.lowlevel.mfcc.var.c6: 0.00028185256423993496
freesound.lowlevel.mfcc.var.c2: -0.00024820452445719075
freesound.lowlevel.mfcc.var.c7: 0.00019870783259350668
freesound.lowlevel.mfcc.var.c4: 0.00014714275683947315
freesound.lowlevel.spectral_centroid.p20: -0.00014707458786113845
freesound.lowlevel.spectral_centroid.avg: 0.0001363724325270457
freesound.lowlevel.mfcc.var.c3: 0.00011446808099579026
freesound.lowlevel.spectral_rolloff.avg

  model = cd_fast.enet_coordinate_descent(


In [None]:
chosen_features=['freesound.lowlevel.mfcc.avg.c0', 'freesound.lowlevel.mfcc.p20.c4', 'freesound.lowlevel.mfcc.avg.c1', 'freesound.lowlevel.mfcc.p20.c5', 'freesound.lowlevel.mfcc.p80.c1', 'freesound.lowlevel.mfcc.var.c6', 'freesound.lowlevel.mfcc.var.c2', 'freesound.lowlevel.mfcc.var.c7', 'freesound.lowlevel.mfcc.var.c4', 'freesound.lowlevel.spectral_centroid.p20', 'freesound.lowlevel.spectral_centroid.avg', 'freesound.lowlevel.mfcc.var.c3', 'freesound.lowlevel.spectral_rolloff.avg', 'freesound.lowlevel.spectral_rolloff.p80', 'freesound.lowlevel.spectral_kurtosis.var', 'freesound.lowlevel.mfcc.var.c0', 'freesound.lowlevel.mfcc.var.c5', 'freesound.lowlevel.mfcc.var.c8', 'freesound.lowlevel.mfcc.var.c1', 'freesound.lowlevel.spectral_rolloff.p20', 'freesound.lowlevel.spectral_rolloff.var', 'freesound.lowlevel.spectral_spread.p20', 'freesound.lowlevel.spectral_spread.p80', 'freesound.lowlevel.spectral_centroid.var', 'freesound.lowlevel.spectral_spread.avg', 'freesound.lowlevel.spectral_spread.var','ARAUS.energy_frequency.00006_3', 'ARAUS.energy_frequency.02000_0', 'ARAUS.LC.max', 'ARAUS.energy_frequency.00063_0', 'ARAUS.energy_frequency.04000_0', 'ARAUS.energy_frequency.10000_0', 'ARAUS.energy_frequency.00031_5', 'ARAUS.energy_frequency.08000_0', 'ARAUS.loudness.max', 'ARAUS.energy_frequency.00200_0', 'ARAUS.energy_frequency.20000_0']

In [9]:
from sklearn.linear_model import ElasticNet
# Define your ElasticNet model with specific hyperparameters
alpha = 0.2  # Example value, adjust as needed
l1_ratio = 0.5  # Example value, adjust as needed
model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, selection="random")
print(f'Investigating performance of {model} model...')
MSEs_train = []
MSEs_val = []
MSEs_test = []
MEs_train = []
MEs_val = []
MEs_test = []

print('     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------')
for val_fold in [1,2,3,4,5]:

    # Extract dataframes
    df_train = responses_Mix[(responses_Mix['info.fold'] != val_fold) & (responses_Mix['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
    df_val   = responses_Mix[responses_Mix['info.fold'] == val_fold]
    df_test  = responses_Mix[responses_Mix['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

    # Get ground-truth labels
    Y_train = df_train['info.P_ground_truth'].values
    Y_val = df_val['info.P_ground_truth'].values
    Y_test = df_test['info.P_ground_truth'].values

    # Get features
    X_train = df_train[mix_features].values
    X_val =df_val[mix_features].values
    X_test = df_test[mix_features].values    

    # Fit model
    X_LR = model.fit(X_train, Y_train)


    # Get MSEs
    MSE_train = np.mean((clip(X_LR.predict(X_train)) - Y_train)**2)
    MSE_val = np.mean((clip(X_LR.predict(X_val)) - Y_val)**2)
    MSE_test = np.mean((clip(X_LR.predict(X_test)) - Y_test)**2)
    ME_train = np.mean(np.abs(clip(X_LR.predict(X_train)) - Y_train))
    ME_val = np.mean(np.abs(clip(X_LR.predict(X_val)) - Y_val))
    ME_test = np.mean(np.abs(clip(X_LR.predict(X_test)) - Y_test))

    # Add metrics
    MSEs_train.append(MSE_train)
    MSEs_val.append(MSE_val)
    MSEs_test.append(MSE_test)
    MEs_train.append(ME_train)
    MEs_val.append(ME_val)
    MEs_test.append(ME_test)

    # Display important coefficients
    """ coefficients = X_LR.coef_
    feature_importances = list(zip(ARAUS_features, coefficients))
    feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
    for feature, importance in feature_importances:
        if float(importance)!=0:
            print(f"{feature}: {importance}") """

    print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {ME_train:.4f} | {ME_val:.4f} | {ME_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')

print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} | {np.mean(MEs_train):.4f} | {np.mean(MEs_val):.4f} | {np.mean(MEs_test):.4f} |')
print()


coefficients = X_LR.coef_
feature_importances = list(zip(mix_features, coefficients))
feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)
for feature, importance in feature_importances:
    if float(importance)!=0:
        print(f"{feature}: {importance}")

Investigating performance of ElasticNet(alpha=0.2, selection='random') model...
     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------
   1 | 0.1258 | 0.1285 | 0.0747 | 0.2870 | 0.2907 | 0.2213 | -1.8169 | 20160 |  5040 |  48  |  91   |  17   |
   2 | 0.1290 | 0.1153 | 0.0794 | 0.2914 | 0.2741 | 0.2315 | -1.7857 | 20160 |  5040 |  48  |  91   |  17   |
   3 | 0.1250 | 0.1321 | 0.0760 | 0.2869 | 0.2902 | 0.2180 | -1.6329 | 20160 |  5040 |  48  |  91   |  18   |
   4 | 0.1238 | 0.1344 | 0.0779 | 0.2843 | 0.3005 | 0.2273 | -1.7497 | 20160 |  5040 |  48  |  91   |  20   |
   5 | 0.1246 | 0.1328 | 0.0770 | 0.2851 | 0.295

## 2) EVENTFULNESS

### 2.1) With default parameters

In [8]:
from sklearn.linear_model import ElasticNet
# Define your ElasticNet model with specific hyperparameters

model = ElasticNet()
print(f'Investigating performance of {model} model...')
MSEs_train = []
MSEs_val = []
MSEs_test = []
MEs_train = []
MEs_val = []
MEs_test = []

print('     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------')
for val_fold in [1,2,3,4,5]:

    # Extract dataframes
    df_train = responses_ARAUS[(responses_ARAUS['info.fold'] != val_fold) & (responses_ARAUS['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
    df_val   = responses_ARAUS[responses_ARAUS['info.fold'] == val_fold]
    df_test  = responses_ARAUS[responses_ARAUS['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

    # Get ground-truth labels
    Y_train = df_train['info.E_ground_truth'].values
    Y_val = df_val['info.E_ground_truth'].values
    Y_test = df_test['info.E_ground_truth'].values

    # Get features
    X_train = df_train[ARAUS_features].values
    X_val =df_val[ARAUS_features].values
    X_test = df_test[ARAUS_features].values    

    # Fit model
    X_LR = model.fit(X_train, Y_train)


    # Get MSEs
    MSE_train = np.mean((clip(X_LR.predict(X_train)) - Y_train)**2)
    MSE_val = np.mean((clip(X_LR.predict(X_val)) - Y_val)**2)
    MSE_test = np.mean((clip(X_LR.predict(X_test)) - Y_test)**2)
    ME_train = np.mean(np.abs(clip(X_LR.predict(X_train)) - Y_train))
    ME_val = np.mean(np.abs(clip(X_LR.predict(X_val)) - Y_val))
    ME_test = np.mean(np.abs(clip(X_LR.predict(X_test)) - Y_test))

    # Add metrics
    MSEs_train.append(MSE_train)
    MSEs_val.append(MSE_val)
    MSEs_test.append(MSE_test)
    MEs_train.append(ME_train)
    MEs_val.append(ME_val)
    MEs_test.append(ME_test)

    print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {ME_train:.4f} | {ME_val:.4f} | {ME_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')

print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} | {np.mean(MEs_train):.4f} | {np.mean(MEs_val):.4f} | {np.mean(MEs_test):.4f} |')
print()

# Assuming X_LR is your linear regression model
coefficients = X_LR.coef_

# Pairing coefficients with feature names
feature_importances = list(zip(ARAUS_features, coefficients))

# Sorting feature importances by absolute value of coefficient
feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)

# Displaying feature importances
for feature, importance in feature_importances:
    print(f"{feature}: {importance}")

Investigating performance of ElasticNet() model...
     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------
   1 | 0.1393 | 0.1288 | 0.0394 | 0.3107 | 0.2963 | 0.1688 | -0.2925 | 20160 |  5040 |  48  |  113  |   1   |
   2 | 0.1378 | 0.1366 | 0.0412 | 0.3088 | 0.3069 | 0.1732 | -0.2851 | 20160 |  5040 |  48  |  113  |   1   |
   3 | 0.1390 | 0.1273 | 0.0394 | 0.3102 | 0.2950 | 0.1688 | -0.3629 | 20160 |  5040 |  48  |  113  |   2   |
   4 | 0.1372 | 0.1402 | 0.0418 | 0.3083 | 0.3111 | 0.1746 | -0.2774 | 20160 |  5040 |  48  |  113  |   1   |
   5 | 0.1336 | 0.1565 | 0.0441 | 0.3032 | 0.3340 | 0.1801 | -0.2803 | 20160 

### 2.2) Defining parameters

In [9]:
from sklearn.linear_model import ElasticNet
# Define your ElasticNet model with specific hyperparameters
alpha = 0.1  # Example value, adjust as needed
l1_ratio = 0.7  # Example value, adjust as needed
model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
print(f'Investigating performance of {model} model...')
MSEs_train = []
MSEs_val = []
MSEs_test = []
MEs_train = []
MEs_val = []
MEs_test = []

print('     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------')
for val_fold in [1,2,3,4,5]:

    # Extract dataframes
    df_train = responses_ARAUS[(responses_ARAUS['info.fold'] != val_fold) & (responses_ARAUS['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
    df_val   = responses_ARAUS[responses_ARAUS['info.fold'] == val_fold]
    df_test  = responses_ARAUS[responses_ARAUS['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

    # Get ground-truth labels
    Y_train = df_train['info.E_ground_truth'].values
    Y_val = df_val['info.E_ground_truth'].values
    Y_test = df_test['info.E_ground_truth'].values

    # Get features
    X_train = df_train[ARAUS_features].values
    X_val =df_val[ARAUS_features].values
    X_test = df_test[ARAUS_features].values    

    # Fit model
    X_LR = model.fit(X_train, Y_train)


    # Get MSEs
    MSE_train = np.mean((clip(X_LR.predict(X_train)) - Y_train)**2)
    MSE_val = np.mean((clip(X_LR.predict(X_val)) - Y_val)**2)
    MSE_test = np.mean((clip(X_LR.predict(X_test)) - Y_test)**2)
    ME_train = np.mean(np.abs(clip(X_LR.predict(X_train)) - Y_train))
    ME_val = np.mean(np.abs(clip(X_LR.predict(X_val)) - Y_val))
    ME_test = np.mean(np.abs(clip(X_LR.predict(X_test)) - Y_test))

    # Add metrics
    MSEs_train.append(MSE_train)
    MSEs_val.append(MSE_val)
    MSEs_test.append(MSE_test)
    MEs_train.append(ME_train)
    MEs_val.append(ME_val)
    MEs_test.append(ME_test)

    print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {ME_train:.4f} | {ME_val:.4f} | {ME_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')

print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} | {np.mean(MEs_train):.4f} | {np.mean(MEs_val):.4f} | {np.mean(MEs_test):.4f} |')
print()

# Assuming X_LR is your linear regression model
coefficients = X_LR.coef_

# Pairing coefficients with feature names
feature_importances = list(zip(ARAUS_features, coefficients))

# Sorting feature importances by absolute value of coefficient
feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)

# Displaying feature importances
for feature, importance in feature_importances:
    print(f"{feature}: {importance}")

Investigating performance of ElasticNet(alpha=0.1, l1_ratio=0.7) model...
     |    Mean squared error    |        Mean  error       |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------|--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+--------+--------+--------+---------+-------+-------+------+-------+------
   1 | 0.1322 | 0.1233 | 0.0310 | 0.3000 | 0.2875 | 0.1417 | -1.3174 | 20160 |  5040 |  48  |  113  |   9   |
   2 | 0.1293 | 0.1339 | 0.0360 | 0.2965 | 0.2990 | 0.1486 | -1.5575 | 20160 |  5040 |  48  |  113  |   9   |
   3 | 0.1317 | 0.1268 | 0.0352 | 0.2991 | 0.2910 | 0.1469 | -1.5710 | 20160 |  5040 |  48  |  113  |   9   |
   4 | 0.1303 | 0.1314 | 0.0329 | 0.2977 | 0.2993 | 0.1436 | -1.3900 | 20160 |  5040 |  48  |  113  |   9   |
   5 | 0.1260 | 0.1479 | 0.0343 | 0.2918 | 0.3233 | 0.