In [1]:
import pandas as pd
import numpy as np 

from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from statistics import mean

In [2]:
windowed_cms = np.load('windowed_cms.npy')

In [3]:
windowed_cms.shape

(16, 141, 64, 64)

In [4]:
uppers = []

for first in range(windowed_cms.shape[0]):
    for second in range(windowed_cms.shape[1]):
        one_matrix = windowed_cms[first, second, :, :]
        upper = one_matrix[np.triu_indices(one_matrix.shape[0], k=1)]
        uppers.append(upper)

all_data = np.stack(uppers, axis=0)

In [5]:
all_data.shape

(2256, 2016)

In [6]:
scaler = StandardScaler()
all_data = scaler.fit_transform(all_data) 

In [7]:
audio_features = np.load('mean_slices_audio_features.npy')

In [8]:
audio_features.shape

(141, 5)

In [9]:
audio_features_all = np.tile(audio_features,(16,1))

In [10]:
audio_features_all.shape

(2256, 5)

In [11]:
# Nested Cross Validation
kfold = KFold(n_splits=5)

list_mse = []
list_r2 = []
for i, (train_indexes, test_indexes) in enumerate(kfold.split(all_data)):
    
    X_train = all_data[train_indexes]
    X_test = all_data[test_indexes]
    
    y_train = audio_features_all[train_indexes]
    y_test = audio_features_all[test_indexes]
    
    params = {'alpha':[0.0001, 0.001, 0.01, 0.1, 1, 10, 100], 'solver':['sparse_cg', 'cholesky','sag']}

    regr_model = Ridge()
    # scoring: for Train scores
    params_search = GridSearchCV(regr_model, params, scoring='neg_mean_squared_error',cv=5)

    params_search.fit(X_train, y_train)
    print(params_search.best_estimator_)
    
    temp_pred = params_search.predict(X_test)
    mse = mean_squared_error(y_test, temp_pred)
    print(mse)
    r2 = r2_score(y_test, temp_pred)
    print(r2)
    
    list_mse.append(mse)
    list_r2.append(r2)

print('Mean MSE: ', mean(list_mse))
print('Mean r2: ', mean(list_r2))

KeyboardInterrupt: 