In [1]:
# -*- coding: utf-8 -*-
# learning
# author: Cheng Zheng

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Machine learning algorithms
from sklearn.kernel_ridge import KernelRidge #Kernel ridge regression

# Cross-validation
'''
GridSearchCV = Perform an optimization of the parameters. 可以保证在指定的参数范围内找到精度最高的参数
                1. search for the best parameters for model; 
                2. automatically fit a new model on the training dataset w/ the parameters that 
                    can yield the best cross-validation performance.
cross_val_score = to implement cross-validation in scikit-learn.
KFold: just to divide the dataset.
    - When an integer is passed to the cv parameter of cross_val_score():
        cv=int (same as cv=StratifiedKFold(n_splits=int)) is used if the estimator is a classifier 
        and y is either binary or multiclass; In all other cases, KFold is used.
'''
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold

path = os.getcwd()#get current path
path_up1Dir = os.path.dirname(path)#go up one directory
dataset = pd.read_excel(path_up1Dir +'/x_TotalArea_y_MVPA/x_TotalArea_y_MVPA.xlsx')#to import the preprocessed dataset into a variable

# print(dataset)

     Unnamed: 0  Subject  Vision Surface         X         Y
0             0        1    Open    Firm -0.922794  0.702471
1             1        1  Closed    Firm -0.929073  0.702471
2             2        1    Open    Foam -0.673078  0.702471
3             3        1  Closed    Foam -0.453918  0.702471
4             4        2    Open    Firm -0.941319 -0.368982
..          ...      ...     ...     ...       ...       ...
639         639      162    Open    Foam  0.332269 -0.287135
640         640      163  Closed    Firm -0.806407 -0.517795
641         641      163    Open    Foam -0.112514 -0.517795
642         642      163    Open    Firm -0.797555 -0.517795
643         643      163  Closed    Foam -0.049677 -0.517795

[644 rows x 6 columns]


In [2]:
X = dataset['X']
y = dataset['Y']
other = pd.DataFrame(dataset.iloc[:,:4])
# print(X, y, other)

0     -0.922794
1     -0.929073
2     -0.673078
3     -0.453918
4     -0.941319
         ...   
639    0.332269
640   -0.806407
641   -0.112514
642   -0.797555
643   -0.049677
Name: X, Length: 644, dtype: float64 0      0.702471
1      0.702471
2      0.702471
3      0.702471
4     -0.368982
         ...   
639   -0.287135
640   -0.517795
641   -0.517795
642   -0.517795
643   -0.517795
Name: Y, Length: 644, dtype: float64      Unnamed: 0  Subject  Vision Surface
0             0        1    Open    Firm
1             1        1  Closed    Firm
2             2        1    Open    Foam
3             3        1  Closed    Foam
4             4        2    Open    Firm
..          ...      ...     ...     ...
639         639      162    Open    Foam
640         640      163  Closed    Firm
641         641      163    Open    Foam
642         642      163    Open    Firm
643         643      163  Closed    Foam

[644 rows x 4 columns]


In [4]:
X_ = X.values.reshape(-1, 1) # some algorithms need to reshape X if X has a single feature

In [9]:
# Kernel ridge regression
'''
The number of weak learners is controlled by the parameter n_estimators. 
The learning_rate parameter controls the contribution of the weak learners in the final combination. 
# kernel = Kernel mapping used internally. This parameter is directly passed to pairwise_kernel. 
    If kernel is a string, it must be one of the metrics in pairwise.PAIRWISE_KERNEL_FUNCTIONS. 
    If kernel is “precomputed”, X is assumed to be a kernel matrix. 
    Alternatively, if kernel is a callable function, it is called on each pair of instances (rows) 
        and the resulting value recorded. 
    The callable should take two rows from X as input and return the corresponding kernel value as a single number. 
# gamma = Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. 
    Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. 
    Ignored by other kernels.
# alpha?
'''

'''
# kernelRidge要考虑的参数是kernel和gamma，通过GridSearchCV我们需要确定param_grid也就是这个参数有哪些值才能找到最佳模型。
# np.logspace用于创建等比数列, 开始点和结束点是10的幂, 
    i.e. logspace(-2,1,4)表示起始数字为10^-2，结尾数字为10^1，元素个数为4的等比数列
'''
# Set param_grid, aka the main parameters in Ridge
param_grid_KernelRidge = [{'kernel':['rbf','laplacian','poly','chi2','sigmoid'],
                           'gamma':np.logspace(-3,2,6)},
                          {'kernel':['linear','cosine','additive_chi2']}]

# GridSearchCV
kr = GridSearchCV(estimator=KernelRidge(), # algorithm - Kernel ridge regression
                    param_grid=param_grid_KernelRidge, # specify the parameters to search over using a dict or list of dictionaries
                    cv=10 # 10-Fold
                    )

# Build the model
kr.fit(X_, y)

# Output the best parameter, cross-validation score, estimator, and the index of best estimator.
print("\n------------------ Kernel Ridge Regression Model")
print("Best parameter: {}".format(kr.best_params_))
print("Best cross-validation score: {:.2f}".format(kr.best_score_))
print("Average score in 10-Fold: \n", kr.cv_results_['mean_test_score'])
print("Std score in 10-Fold: \n", kr.cv_results_['std_test_score'])
print("Best estimator: {}".format(kr.best_estimator_))
print("The Index of Best estimator: {}".format(kr.best_index_))

Traceback (most recent call last):
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 172, in fit
    K = self._get_kernel(X)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 133, in _get_kernel
    return pairwise_kernels(X, Y, metric=self.kernel,
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1954, in pairwise_kernels
    return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1359, in _parallel_pairwise
    return f

Traceback (most recent call last):
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 172, in fit
    K = self._get_kernel(X)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 133, in _get_kernel
    return pairwise_kernels(X, Y, metric=self.kernel,
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1954, in pairwise_kernels
    return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1359, in _parallel_pairwise
    return f

Traceback (most recent call last):
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 172, in fit
    K = self._get_kernel(X)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 133, in _get_kernel
    return pairwise_kernels(X, Y, metric=self.kernel,
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1954, in pairwise_kernels
    return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1359, in _parallel_pairwise
    return f

Traceback (most recent call last):
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 172, in fit
    K = self._get_kernel(X)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 133, in _get_kernel
    return pairwise_kernels(X, Y, metric=self.kernel,
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1954, in pairwise_kernels
    return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1359, in _parallel_pairwise
    return f

Traceback (most recent call last):
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 172, in fit
    K = self._get_kernel(X)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 133, in _get_kernel
    return pairwise_kernels(X, Y, metric=self.kernel,
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1954, in pairwise_kernels
    return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1359, in _parallel_pairwise
    return f

Traceback (most recent call last):
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 172, in fit
    K = self._get_kernel(X)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 133, in _get_kernel
    return pairwise_kernels(X, Y, metric=self.kernel,
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1954, in pairwise_kernels
    return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1359, in _parallel_pairwise
    return f




------------------ Kernel Ridge Regression Model
Best parameter: {'kernel': 'cosine'}
Best cross-validation score: -0.15
Best estimator: KernelRidge(kernel='cosine')
The Index of Best estimator: 31


Traceback (most recent call last):
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 172, in fit
    K = self._get_kernel(X)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/kernel_ridge.py", line 133, in _get_kernel
    return pairwise_kernels(X, Y, metric=self.kernel,
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1954, in pairwise_kernels
    return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
  File "/Users/zclalala/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1359, in _parallel_pairwise
    return f