In [1]:
import sklearn.linear_model
import numpy as np
import pandas as pd
import os
from SoundLights.features_groups import  ARAUS_features

In [2]:
responses= pd.read_csv(os.path.join('..','data','SoundLights_ARAUS.csv'), dtype = {'info.participant':str}) #, dtype = {'participant':str}
responses=responses.drop("info.file", axis=1)
responses=responses.drop("info.participant", axis=1)
print(responses)


       info.fold                     info.soundscape             info.masker  \
0              0    R1008_segment_binaural_44100.wav         water_10002.wav   
1              0    R1007_segment_binaural_44100.wav          bird_10001.wav   
2              0    R1006_segment_binaural_44100.wav  construction_10001.wav   
3              0    R1006_segment_binaural_44100.wav          bird_10001.wav   
4              0    R1001_segment_binaural_44100.wav         water_10002.wav   
...          ...                                 ...                     ...   
24435          5  R0075_segment_binaural_44100_1.wav       traffic_00019.wav   
24436          5  R0075_segment_binaural_44100_2.wav       traffic_00014.wav   
24437          5  R0035_segment_binaural_44100_2.wav  construction_00034.wav   
24438          5  R0119_segment_binaural_44100_1.wav  construction_00004.wav   
24439          5  R0062_segment_binaural_44100_1.wav         water_00022.wav   

       info.smr  info.stimulus_index  i

In [7]:
model = sklearn.linear_model.ElasticNet
print(f'Investigating performance of {model()} model...')
MSEs_train = []
MSEs_val = []
MSEs_test = []

print('     |    Mean squared error    |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+---------+-------+-------+------+-------+------')
for val_fold in [1,2,3,4,5]:

    # Extract dataframes
    df_train = responses[(responses['info.fold'] != val_fold) & (responses['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
    df_val   = responses[responses['info.fold'] == val_fold]
    df_test  = responses[responses['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

    # Get ground-truth labels
    Y_train = df_train['info.P_ground_truth'].values
    Y_val = df_val['info.P_ground_truth'].values
    Y_test = df_test['info.P_ground_truth'].values

    # Get features
    X_train = df_train[ARAUS_features].values 
    X_val = df_val[ARAUS_features].values
    X_test = df_test[ARAUS_features].values        

    # Fit model
    X_LR = model().fit(X_train, Y_train)


    # Get MSEs
    MSE_train = np.mean((X_LR.predict(X_train) - Y_train)**2)
    MSE_val = np.mean((X_LR.predict(X_val) - Y_val)**2)
    MSE_test = np.mean((X_LR.predict(X_test) - Y_test)**2)

    # Add metrics
    MSEs_train.append(MSE_train)
    MSEs_val.append(MSE_val)
    MSEs_test.append(MSE_test)

    print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')

print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} |')
print()

# Assuming X_LR is your linear regression model
coefficients = X_LR.coef_

# Pairing coefficients with feature names
feature_importances = list(zip(ARAUS_features, coefficients))

# Sorting feature importances by absolute value of coefficient
feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)

# Displaying feature importances
for feature, importance in feature_importances:
    print(f"{feature}: {importance}")

Investigating performance of ElasticNet() model...
     |    Mean squared error    |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+---------+-------+-------+------+-------+------
   1 | 0.1448 | 0.1416 | 0.0890 |  0.2143 | 19160 |  5040 |  48  |  117  |   2   |
   2 | 0.1458 | 0.1375 | 0.0920 |  0.2317 | 19160 |  5040 |  48  |  117  |   2   |
   3 | 0.1421 | 0.1569 | 0.0928 |  0.1883 | 20160 |  4040 |  48  |  117  |   2   |
   4 | 0.1431 | 0.1501 | 0.0923 |  0.2044 | 19160 |  5040 |  48  |  117  |   2   |
   5 | 0.1455 | 0.1404 | 0.0893 |  0.1957 | 19160 |  5040 |  48  |  117  |   2   |
Mean | 0.1443 | 0.1453 | 0.0911 |

ARAUS.loudness.max: -0.005310076226013088
ARAUS.energy_frequency.10000_0: -0.0009497878980805982
ARAUS.sharpness.avg: -0.0
ARAUS.sharpness.max: 0.0
ARAUS.sharpness.p05: 0.0
AR

In [8]:
model = sklearn.linear_model.ElasticNet
print(f'Investigating performance of {model()} model...')
MSEs_train = []
MSEs_val = []
MSEs_test = []

print('     |    Mean squared error    |         |       # samples      | #     | # NZ ')
print('Fold |--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-')
print('     | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures ')
print('-----+--------+--------+--------+---------+-------+-------+------+-------+------')
for val_fold in [1,2,3,4,5]:

    # Extract dataframes
    df_train = responses[(responses['info.fold'] != val_fold) & (responses['info.fold'] > 0)] # For the training set, use all samples that are not in the test set (fold 0) and current validation fold.
    df_val   = responses[responses['info.fold'] == val_fold]
    df_test  = responses[responses['info.fold'] == 0].groupby(['info.soundscape','info.masker','info.smr']).mean() # For the test set, the same 48 stimuli were shown to all participants so we take the mean of their ratings as the ground truth

    # Get ground-truth labels
    Y_train = df_train['info.E_ground_truth'].values
    Y_val = df_val['info.E_ground_truth'].values
    Y_test = df_test['info.E_ground_truth'].values

    # Get features
    X_train = df_train[ARAUS_features].values 
    X_val = df_val[ARAUS_features].values
    X_test = df_test[ARAUS_features].values        

    # Fit model
    X_LR = model().fit(X_train, Y_train)


    # Get MSEs
    MSE_train = np.mean((X_LR.predict(X_train) - Y_train)**2)
    MSE_val = np.mean((X_LR.predict(X_val) - Y_val)**2)
    MSE_test = np.mean((X_LR.predict(X_test) - Y_test)**2)

    # Add metrics
    MSEs_train.append(MSE_train)
    MSEs_val.append(MSE_val)
    MSEs_test.append(MSE_test)

    print(f'{val_fold:4d} | {MSE_train:.4f} | {MSE_val:.4f} | {MSE_test:.4f} | {X_LR.intercept_:7.4f} | {X_train.shape[0]:5d} | {X_val.shape[0]:5d} | {X_test.shape[0]:^4d} | {X_train.shape[1]:^5d} | {np.sum(np.abs(X_LR.coef_) > 0):^5d} |')

print(f'Mean | {np.mean(MSEs_train):.4f} | {np.mean(MSEs_val):.4f} | {np.mean(MSEs_test):.4f} |')
print()

# Assuming X_LR is your linear regression model
coefficients = X_LR.coef_

# Pairing coefficients with feature names
feature_importances = list(zip(ARAUS_features, coefficients))

# Sorting feature importances by absolute value of coefficient
feature_importances.sort(key=lambda x: abs(x[1]), reverse=True)

# Displaying feature importances
for feature, importance in feature_importances:
    print(f"{feature}: {importance}")

Investigating performance of ElasticNet() model...
     |    Mean squared error    |         |       # samples      | #     | # NZ 
Fold |--------+--------+--------| Inter-  |-------+-------+------| feat- | feat-
     | Train  |   Val  |  Test  |  cept   | Train |  Val  | Test | ures  | ures 
-----+--------+--------+--------+---------+-------+-------+------+-------+------
   1 | 0.1412 | 0.1287 | 0.0388 | -0.2973 | 19160 |  5040 |  48  |  117  |   1   |
   2 | 0.1395 | 0.1365 | 0.0405 | -0.3033 | 19160 |  5040 |  48  |  117  |   2   |
   3 | 0.1390 | 0.1343 | 0.0394 | -0.3629 | 20160 |  4040 |  48  |  117  |   2   |
   4 | 0.1391 | 0.1400 | 0.0414 | -0.2811 | 19160 |  5040 |  48  |  117  |   1   |
   5 | 0.1352 | 0.1563 | 0.0437 | -0.2872 | 19160 |  5040 |  48  |  117  |   2   |
Mean | 0.1388 | 0.1391 | 0.0408 |

ARAUS.loudness.max: 0.009498902654049448
ARAUS.energy_frequency.00630_0: 8.7571961329682e-05
ARAUS.sharpness.avg: -0.0
ARAUS.sharpness.max: -0.0
ARAUS.sharpness.p05: -0.0
ARAU