In [None]:
import os

# adding tools directory to path, so we can access the utils easily
import sys
root_path = os.path.join('.', 'tools')
sys.path.append(root_path)

import file_tools
_EXP_DIR = os.path.join('.', 'experiments')
_DATASET_DIR = os.path.join(_EXP_DIR, 'datasets')

_RESULTS_DIR = os.path.join(_EXP_DIR, 'results')
file_tools.ensure_dir(_RESULTS_DIR)

In [None]:
import dataset_tools

import numpy as np
from scipy.spatial import distance
from sklearn.preprocessing import StandardScaler

X = dataset_tools.eeg_raw
y = [distance.euclidean(row1, row2) for row1, row2 in zip(dataset_tools.observed_faces, dataset_tools.target_faces)]
scaler = StandardScaler()
y = scaler.fit_transform(np.array(y).reshape(-1, 1)).flatten().tolist()

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, mean_squared_error

estimator = Pipeline([
    ('scaler_x', StandardScaler()),
    ('svr', SVR())
])

param_grid = {
    'svr__C': [0.01, 0.1],
    'svr__gamma': ['scale', 'auto'],
    'svr__kernel': ['linear', 'rbf']
}
# best svr
# {'svr__C': 0.01, 'svr__gamma': 'scale', 'svr__kernel': 'linear'}


# estimator = Pipeline([
#     ('scaler_x', StandardScaler()),
#     ('mlp', MLPRegressor(max_iter=1000))
# ])

# param_grid = {
#     'mlp__hidden_layer_sizes': [2*(100,)],
#     'mlp__activation': ['identity', 'relu'],
#     'mlp__alpha': [0.1],
#     'mlp__learning_rate': ['adaptive']
# }

# best mlp
# {'mlp__activation': 'identity',
#  'mlp__alpha': 0.1,
#  'mlp__hidden_layer_sizes': (100, 100),
#  'mlp__learning_rate': 'adaptive'}

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

rmse_scorer = make_scorer(rmse, greater_is_better=False)

n_splits = 10
test_size = 0.1
ss = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=42)

# GridSearchCV
grid_search = GridSearchCV(estimator, 
                            param_grid,
                            cv=ss,    
                            scoring=rmse_scorer,
                            n_jobs=-1,
                            verbose=2)

grid_search.fit(X, y)

In [None]:
import pandas as pd

# Convert cv_results_ to a DataFrame
results_df = pd.DataFrame(grid_search.cv_results_)

# Save to CSV
results_df.to_csv('grid_search_results.csv', index=False)

import pickle

# Save the entire GridSearchCV object
with open('grid_search_results.pkl', 'wb') as f:
    pickle.dump(grid_search, f)

# To load:
# with open('grid_search_results.pkl', 'rb') as f:
#     loaded_grid_search = pickle.load(f)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

results = pd.DataFrame(grid_search.cv_results_)
plt.plot(results['mean_test_score'])
plt.show()

In [None]:
grid_search.best_params_

In [None]:
y_pred = grid_search.predict(X)
plt.scatter(y, y_pred)
plt.plot([-2, 2], [-2, 2], 'r')

In [None]:
grid_search.cv_results_