In [1]:
import pandas as pd
#import seaborn as sns
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import numpy as np

#from sklearn.svm import SVR
#from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error

#from sklearn.linear_model import Ridge

from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.multioutput import MultiOutputRegressor
import math
#from yellowbrick.regressor import ResidualsPlot
#from sklearn.metrics import r2_score


In [2]:
df=pd.read_csv("database.csv")

In [3]:
def generate_default_kernel_ridge_model(X,Y):
    array_var=[]
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, 
        test_size=0.25, 
        random_state=42)

    scaling_x=StandardScaler()
    scaling_y=StandardScaler()
    X_train=scaling_x.fit_transform(X_train)
    X_test=scaling_x.transform(X_test)
    Y_train=scaling_y.fit_transform(Y_train)
    #Y_test=scaling_y.transform(Y_test)

    regressor= KernelRidge()
    model=regressor.fit(X_train,Y_train)
    print(model.get_params())

    array_var.extend([model, X_train, Y_train, X_test, Y_test, scaling_y])
    return array_var


In [4]:
def grid_search_cv(parameters, model, X_train, Y_train):
    grid_search=GridSearchCV(estimator=MultiOutputRegressor(model),
                        param_grid=parameters,
                         cv=2,
                         scoring='neg_mean_absolute_percentage_error',
                        n_jobs=-1
                        )
    grid_search= grid_search.fit(X_train, Y_train)
    print('Best score in CV: ', grid_search.best_score_)
    print('Best parameters in CV: ', grid_search.best_params_)
    return grid_search

In [5]:
def calculate_mean_absolute_percentage_error_multi(parameter_alpha, parameter_kernel, parameter_gamma, X_train, Y_train, X_test, Y_test, scaling_y):
    
    regressor=KernelRidge(alpha= parameter_alpha, kernel=parameter_kernel, gamma=parameter_gamma)
    
    wrapper=MultiOutputRegressor(regressor)
    
    #model=regressor.fit(X_train,Y_train)
    wrapper.fit(X_train, Y_train)

    #Y_pred=model.predict(X_test)
    Y_pred=wrapper.predict(X_test)
    
    Y_pred=scaling_y.inverse_transform(Y_pred)

    error= mean_absolute_percentage_error(Y_test, Y_pred, multioutput='raw_values')    
    return error

# Leaving random part of dataset out

In [25]:
Y = df.iloc[:,25:28]
X = df.iloc[:,:8]
#print(X.head())
Y.head()

Unnamed: 0,q_abs,q_sca,g
0,0.11691,0.000389,0.003798
1,0.153,0.001005,0.008979
2,0.2006,0.002514,0.02297
3,0.23162,0.004187,0.037044
4,0.25746,0.005988,0.051049


In [6]:
parameters=[{'estimator__alpha':[0, 0.0001, 0.001, 0.01, 0.1],
            'estimator__kernel':['rbf'],
            'estimator__gamma':[0, 0.5,1]}]

In [27]:
array_var=generate_default_kernel_ridge_model(X,Y)

grid_search=grid_search_cv(parameters, array_var[0], array_var[1], array_var[2])

parameter_alpha=grid_search.best_params_['alpha']
parameter_kernel=grid_search.best_params_['kernel']
parameter_gamma=grid_search.best_params_['gamma']

error=calculate_mean_absolute_percentage_error_multi(parameter_alpha, parameter_kernel, parameter_gamma, array_var[1], array_var[2], array_var[3], array_var[4], array_var[5])
print('Mean absolute percentage error on test set: ', error)


{'alpha': 1, 'coef0': 1, 'degree': 3, 'gamma': None, 'kernel': 'linear', 'kernel_params': None}


ValueError: Invalid parameter alpha for estimator MultiOutputRegressor(estimator=KernelRidge()). Check the list of available parameters with `estimator.get_params().keys()`.

# Calculating remaining optical properties

In [104]:
def calculate_dependent_optical_properties(q_abs, q_sca, g, vol_equi_radius_outer, mass_bc, mass_organics, mass_total):
    q_ext=q_abs+q_sca
    c_geo=(math.pi)*((vol_equi_radius_outer)**2)
    c_ext= q_ext*c_geo*10e-7
    c_abs= q_abs*c_geo*10e-7
    c_sca= q_sca*c_geo*10e-7
    ssa=q_sca/q_ext
    mac_bc=c_abs*10e-13/mass_bc
    mac_organics=c_abs*10e-12/mass_organics
    mac_total=c_abs*10e-13/mass_total

    
    optical_all={
        'q_ext': q_ext,
        'q_abs':q_abs,
        'q_sca':q_sca,
        'g': g,
        'c_geo': c_geo,
        'c_ext': c_ext,
        'c_abs': c_abs,
        'c_sca': c_sca,
        'ssa': ssa,
        'mac_total':mac_total,
        'mac_organics': mac_organics,
        'mac_bc': mac_bc
    }
    return optical_all

In [42]:
Y = df.iloc[:,24:]
X = df.iloc[:,:24]
#print(X.head())
X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, 
        test_size=0.25, 
        random_state=42)


In [46]:

Y_train_features=Y_train.iloc[:,1:4]
X_train_features = X_train.iloc[:,:8]

Y_test_features=Y_test.iloc[:,1:4]
X_test_features = X_test.iloc[:,:8]

scaling_x=StandardScaler()
scaling_y=StandardScaler()
X_train_features=scaling_x.fit_transform(X_train_features)
X_test_features=scaling_x.transform(X_test_features)
Y_train_features=scaling_y.fit_transform(Y_train_features)

regressor=KernelRidge(alpha= 0.0001, kernel='rbf', gamma=1)
    
wrapper=MultiOutputRegressor(regressor)
    
    #model=regressor.fit(X_train,Y_train)
wrapper.fit(X_train_features, Y_train_features)

    #Y_pred=model.predict(X_test)
Y_pred_features=wrapper.predict(X_test_features)
    
Y_pred_features=scaling_y.inverse_transform(Y_pred_features)




array([[1.13765005, 0.31543298, 0.62253828],
       [0.44526496, 0.27563726, 0.70829572],
       [0.57716551, 0.05728544, 0.17186893],
       ...,
       [1.27274114, 0.25183712, 0.56776192],
       [1.06178332, 0.31620977, 0.75234468],
       [1.32138129, 0.59871472, 0.76230745]])

In [122]:
X_test=X_test.reset_index(drop=True)
X_test

Unnamed: 0,wavelength,fractal_dimension,fraction_of_coating,primary_particle_size,number_of_primary_particles,vol_equi_radius_outer,vol_equi_radius_inner,equi_mobility_dia,mie_epsilon,length_scale_factor,...,volume_total,volume_bc,volume_organics,density_bc,density_organics,mass_bc,mass_organics,mass_total,mr_total/bc,mr_nonBC/BC
0,467,1.9,0,15.0,190,86.233456,86.233456,346.163353,2,0.134544,...,2.686062e+06,2.686062e+06,0.000000e+00,1.5,1.1,4.029090e-15,0.000000e+00,4.029090e-15,1.000000,0.000000
1,530,2.2,80,25.6,95,116.810307,68.443540,414.862803,2,0.118551,...,6.676243e+06,1.343031e+06,5.333212e+06,1.5,1.1,2.014550e-15,5.866530e-15,7.881080e-15,3.912086,2.912086
2,467,1.8,5,15.3,18,40.097343,39.311121,106.146554,2,0.134544,...,2.700445e+05,2.544690e+05,1.557554e+04,1.5,1.1,3.817040e-16,1.713310e-17,3.988370e-16,1.044886,0.044886
3,467,1.7,25,16.5,225,100.356333,91.233030,415.071125,2,0.134544,...,4.233728e+06,3.180863e+06,1.052866e+06,1.5,1.1,4.771290e-15,1.158150e-15,5.929450e-15,1.242733,0.242733
4,660,1.8,25,16.5,250,103.943487,94.494079,437.984601,2,0.095200,...,4.704142e+06,3.534292e+06,1.169851e+06,1.5,1.1,5.301440e-15,1.286840e-15,6.588270e-15,1.242733,0.242733
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2452,467,1.6,5,15.3,95,69.812410,68.443540,247.945347,2,0.134544,...,1.425235e+06,1.343031e+06,8.220423e+04,1.5,1.1,2.014550e-15,9.042470e-17,2.104970e-15,1.044886,0.044886
2453,660,2.2,90,30.0,350,211.418962,105.709481,945.412741,2,0.095200,...,3.958407e+07,4.948008e+06,3.463606e+07,1.5,1.1,7.422010e-15,3.809970e-14,4.552170e-14,6.133333,5.133333
2454,530,1.6,5,15.3,400,112.731364,110.520945,516.140003,2,0.118551,...,6.000990e+06,5.654867e+06,3.461231e+05,1.5,1.1,8.482300e-15,3.807350e-16,8.863040e-15,1.044886,0.044886
2455,660,2.0,25,16.5,800,153.172431,139.247665,792.657058,2,0.095200,...,1.505326e+07,1.130973e+07,3.743522e+06,1.5,1.1,1.696460e-14,4.117870e-15,2.108250e-14,1.242733,0.242733


In [48]:
predicted_y_features=pd.DataFrame(Y_pred_features, columns=["q_abs", "q_sca", "g"])
predicted_y_features

Unnamed: 0,q_abs,q_sca,g
0,1.137650,0.315433,0.622538
1,0.445265,0.275637,0.708296
2,0.577166,0.057285,0.171869
3,1.233863,0.285980,0.614051
4,0.730723,0.128643,0.606940
...,...,...,...
2452,0.997771,0.162970,0.478116
2453,0.308241,0.445723,0.807850
2454,1.272741,0.251837,0.567762
2455,1.061783,0.316210,0.752345


In [124]:
predicted_optical_all=calculate_dependent_optical_properties(predicted_y_features['q_abs'], predicted_y_features['q_sca'],  predicted_y_features['g'], X_test['vol_equi_radius_outer'], X_test['mass_bc'], X_test['mass_organics'], X_test['mass_total'])
predicted_optical_properties=pd.DataFrame.from_dict(predicted_optical_all)
predicted_optical_properties

Unnamed: 0,q_ext,q_abs,q_sca,g,c_geo,c_ext,c_abs,c_sca,ssa,mac_total,mac_organics,mac_bc
0,1.453083,1.137650,0.315433,0.622538,23361.539454,0.033946,0.026577,0.007369,0.217078,6.596342,inf,6.596342
1,0.720902,0.445265,0.275637,0.708296,42865.925724,0.030902,0.019087,0.011815,0.382350,2.421837,3.253490,9.474421
2,0.634451,0.577166,0.057285,0.171869,5051.043062,0.003205,0.002915,0.000289,0.090291,7.309472,170.155305,7.637562
3,1.519842,1.233863,0.285980,0.614051,31640.215998,0.048088,0.039040,0.009048,0.188164,6.584032,33.708664,8.182208
4,0.859366,0.730723,0.128643,0.606940,33942.547422,0.029169,0.024803,0.004366,0.149695,3.764660,19.274034,4.678464
...,...,...,...,...,...,...,...,...,...,...,...,...
2452,1.160741,0.997771,0.162970,0.478116,15311.408304,0.017773,0.015277,0.002495,0.140402,7.257715,168.950220,7.583467
2453,0.753964,0.308241,0.445723,0.807850,140422.837723,0.105874,0.043284,0.062590,0.591173,0.950844,1.136073,5.831848
2454,1.524578,1.272741,0.251837,0.567762,39924.491693,0.060868,0.050814,0.010054,0.165185,5.733196,133.461707,5.990538
2455,1.377993,1.061783,0.316210,0.752345,73707.398953,0.101568,0.078261,0.023307,0.229471,3.712145,19.005283,4.613211


# Leave fractal dimesnion: =2.2 as test set

In [16]:
train_set=df[(df['fractal_dimension']<=2.1) | (df['fractal_dimension']>2.2)]
test_set=df[df['fractal_dimension']==2.2]
test_set.shape

(2195, 36)

In [8]:
Y_train = train_set.iloc[:,25:28]
X_train = train_set.iloc[:,:8]
Y_test = test_set.iloc[:,25:28]
X_test = test_set.iloc[:,:8]

In [36]:
parameters=[{'estimator__alpha':[0, 0.0001, 0.001, 0.01, 0.1],
            'estimator__kernel':['rbf'],
            'estimator__gamma':[0, 0.5,1]}]

In [9]:
#array_var=generate_default_kernel_ridge_model(X,Y)
scaling_x=StandardScaler()
scaling_y=StandardScaler()
X_train=scaling_x.fit_transform(X_train)
X_test=scaling_x.transform(X_test)
Y_train=scaling_y.fit_transform(Y_train)
#Y_test=scaling_y.transform(Y_test)

regressor= KernelRidge()
model=regressor.fit(X_train,Y_train)
grid_search=grid_search_cv(parameters, model, X_train, Y_train)

parameter_alpha=grid_search.best_params_['estimator__alpha']
parameter_kernel=grid_search.best_params_['estimator__kernel']
parameter_gamma=grid_search.best_params_['estimator__gamma']

error=calculate_mean_absolute_percentage_error_multi(parameter_alpha, parameter_kernel, parameter_gamma, X_train, Y_train, X_test, Y_test, scaling_y)
print('Mean absolute percentage error on test set: ', error)


Best score in CV:  -1.1131582243789162
Best parameters in CV:  {'estimator__alpha': 0.001, 'estimator__gamma': 1, 'estimator__kernel': 'rbf'}
Mean absolute percentage error on test set:  [0.02084082 0.09594194 0.1088999 ]


# Leave fraction of coating: ==50 as test set

In [14]:
train_set=df[(df['fraction_of_coating']<=40) | (df['fraction_of_coating']>50)]
test_set=df[df['fraction_of_coating']==50]
test_set.shape


(1264, 36)

In [15]:

Y_train = train_set.iloc[:,25:28]
X_train = train_set.iloc[:,:8]
Y_test = test_set.iloc[:,25:28]
X_test = test_set.iloc[:,:8]

#array_var=generate_default_kernel_ridge_model(X,Y)
scaling_x=StandardScaler()
scaling_y=StandardScaler()
X_train=scaling_x.fit_transform(X_train)
X_test=scaling_x.transform(X_test)
Y_train=scaling_y.fit_transform(Y_train)
#Y_test=scaling_y.transform(Y_test)

regressor= KernelRidge()
model=regressor.fit(X_train,Y_train)
grid_search=grid_search_cv(parameters, model, X_train, Y_train)

parameter_alpha=grid_search.best_params_['estimator__alpha']
parameter_kernel=grid_search.best_params_['estimator__kernel']
parameter_gamma=grid_search.best_params_['estimator__gamma']

error=calculate_mean_absolute_percentage_error_multi(parameter_alpha, parameter_kernel, parameter_gamma, X_train, Y_train, X_test, Y_test, scaling_y)
print('Mean absolute percentage error on test set: ', error)

Best score in CV:  -1.356543027619559
Best parameters in CV:  {'estimator__alpha': 0.01, 'estimator__gamma': 1, 'estimator__kernel': 'rbf'}
Mean absolute percentage error on test set:  [0.01712927 0.12712033 0.12401284]


# Leave fractal dimesnion: =2.2 as test set (Not Multiregressor)

In [18]:
train_set=df[(df['fractal_dimension']<=2.1) | (df['fractal_dimension']>2.2)]
test_set=df[df['fractal_dimension']==2.2]
test_set.shape

(2195, 36)

In [19]:
Y_train = train_set.iloc[:,25:28]
X_train = train_set.iloc[:,:8]
Y_test = test_set.iloc[:,25:28]
X_test = test_set.iloc[:,:8]

In [20]:
parameters=[{'alpha':[0, 0.0001, 0.001, 0.01, 0.1],
            'kernel':['rbf'],
            'gamma':[0, 0.5,1]}]

In [21]:
#array_var=generate_default_kernel_ridge_model(X,Y)
scaling_x=StandardScaler()
scaling_y=StandardScaler()
X_train=scaling_x.fit_transform(X_train)
X_test=scaling_x.transform(X_test)
Y_train=scaling_y.fit_transform(Y_train)
#Y_test=scaling_y.transform(Y_test)

regressor= KernelRidge()
model=regressor.fit(X_train,Y_train)
grid_search=GridSearchCV(estimator=model,
                        param_grid=parameters,
                         cv=2,
                         scoring='neg_mean_absolute_percentage_error',
                        n_jobs=-1
                        )
grid_search= grid_search.fit(X_train, Y_train)
print('Best score in CV: ', grid_search.best_score_)
print('Best parameters in CV: ', grid_search.best_params_)
#grid_search=grid_search_cv(parameters, model, X_train, Y_train)

parameter_alpha=grid_search.best_params_['alpha']
parameter_kernel=grid_search.best_params_['kernel']
parameter_gamma=grid_search.best_params_['gamma']

regressor=KernelRidge(alpha= parameter_alpha, kernel=parameter_kernel, gamma=parameter_gamma)
    
#wrapper=MultiOutputRegressor(regressor)
model=regressor.fit(X_train,Y_train)
    #wrapper.fit(X_train, Y_train)

Y_pred=model.predict(X_test)
#Y_pred=wrapper.predict(X_test)
    
Y_pred=scaling_y.inverse_transform(Y_pred)

error= mean_absolute_percentage_error(Y_test, Y_pred, multioutput='raw_values')   


#error=calculate_mean_absolute_percentage_error_multi(parameter_alpha, parameter_kernel# parameter_gamma, X_train, Y_train, X_test, Y_test, scaling_y)
print('Mean absolute percentage error on test set: ', error)

Best score in CV:  -1.1131582243789473
Best parameters in CV:  {'alpha': 0.001, 'gamma': 1, 'kernel': 'rbf'}
Mean absolute percentage error on test set:  [0.02084082 0.09594194 0.1088999 ]


# Leave fraction of coating: ==50 as test set (Not Multiregressor)

In [23]:
train_set=df[(df['fraction_of_coating']<=40) | (df['fraction_of_coating']>50)]
test_set=df[df['fraction_of_coating']==50]
test_set.shape


(1264, 36)

In [24]:

Y_train = train_set.iloc[:,25:28]
X_train = train_set.iloc[:,:8]
Y_test = test_set.iloc[:,25:28]
X_test = test_set.iloc[:,:8]

#array_var=generate_default_kernel_ridge_model(X,Y)
scaling_x=StandardScaler()
scaling_y=StandardScaler()
X_train=scaling_x.fit_transform(X_train)
X_test=scaling_x.transform(X_test)
Y_train=scaling_y.fit_transform(Y_train)
#Y_test=scaling_y.transform(Y_test)

regressor= KernelRidge()
model=regressor.fit(X_train,Y_train)

grid_search=GridSearchCV(estimator=model,
                        param_grid=parameters,
                         cv=2,
                         scoring='neg_mean_absolute_percentage_error',
                        n_jobs=-1
                        )
grid_search= grid_search.fit(X_train, Y_train)
print('Best score in CV: ', grid_search.best_score_)
print('Best parameters in CV: ', grid_search.best_params_)
#grid_search=grid_search_cv(parameters, model, X_train, Y_train)

parameter_alpha=grid_search.best_params_['alpha']
parameter_kernel=grid_search.best_params_['kernel']
parameter_gamma=grid_search.best_params_['gamma']

regressor=KernelRidge(alpha= parameter_alpha, kernel=parameter_kernel, gamma=parameter_gamma)
    
#wrapper=MultiOutputRegressor(regressor)
model=regressor.fit(X_train,Y_train)
    #wrapper.fit(X_train, Y_train)

Y_pred=model.predict(X_test)
#Y_pred=wrapper.predict(X_test)
    
Y_pred=scaling_y.inverse_transform(Y_pred)

error= mean_absolute_percentage_error(Y_test, Y_pred, multioutput='raw_values')   


#error=calculate_mean_absolute_percentage_error_multi(parameter_alpha, parameter_kernel# parameter_gamma, X_train, Y_train, X_test, Y_test, scaling_y)
print('Mean absolute percentage error on test set: ', error)



Best score in CV:  -1.3565430276194639
Best parameters in CV:  {'alpha': 0.01, 'gamma': 1, 'kernel': 'rbf'}
Mean absolute percentage error on test set:  [0.01712927 0.12712033 0.12401284]


# Leave fractal dimesnion: ==  2.8 as test set

In [8]:
train_set=df[(df['fractal_dimension']<=2.7) | (df['fractal_dimension']>=11)]
test_set=df[df['fractal_dimension']==2.8]
test_set.shape

(92, 36)

In [7]:
Y_train = train_set.iloc[:,25:28]
X_train = train_set.iloc[:,:8]
Y_test = test_set.iloc[:,25:28]
X_test = test_set.iloc[:,:8]
Y_test.head()

Unnamed: 0,q_abs,q_sca,g
6764,0.15654,0.00093,0.00569
6765,0.20533,0.002392,0.013693
6766,0.26387,0.005929,0.024223
6767,0.30759,0.010165,0.031153
6768,0.34084,0.01483,0.037551


In [5]:
parameters=[{'alpha':[0, 0.0001, 0.001, 0.1, 1],
            'kernel':['rbf'],
            'gamma':[0,1, 10, 100]}]

In [9]:
#array_var=generate_default_kernel_ridge_model(X,Y)
scaling_x=StandardScaler()
scaling_y=StandardScaler()
X_train=scaling_x.fit_transform(X_train)
X_test=scaling_x.transform(X_test)
Y_train=scaling_y.fit_transform(Y_train)
#Y_test=scaling_y.transform(Y_test)

regressor= KernelRidge()
#model=regressor.fit(X_train,Y_train)
grid_search=grid_search_cv(parameters, regressor, X_train, Y_train)

parameter_alpha=grid_search.best_params_['alpha']
parameter_kernel=grid_search.best_params_['kernel']
parameter_gamma=grid_search.best_params_['gamma']

error=calculate_mean_absolute_percentage_error_multi(parameter_alpha, parameter_kernel, parameter_gamma, X_train, Y_train, X_test, Y_test, scaling_y)
print('Mean absolute percentage error on test set: ', error)


Best score in CV:  -0.9744756530512584
Best parameters in CV:  {'alpha': 0.1, 'gamma': 100, 'kernel': 'rbf'}
Mean absolute percentage error on test set:  [0.19898543 2.72745065 1.59866281]


# Leave fraction of coating: ==80, 90 as test set

In [57]:
train_set=df[df['fraction_of_coating']<=70]
test_set=df[df['fraction_of_coating']>=80]
train_set.shape


(9196, 36)

In [58]:

Y_train = train_set.iloc[:,25:28]
X_train = train_set.iloc[:,:8]
Y_test = test_set.iloc[:,25:28]
X_test = test_set.iloc[:,:8]

#array_var=generate_default_kernel_ridge_model(X,Y)
scaling_x=StandardScaler()
scaling_y=StandardScaler()
X_train=scaling_x.fit_transform(X_train)
X_test=scaling_x.transform(X_test)
Y_train=scaling_y.fit_transform(Y_train)
#Y_test=scaling_y.transform(Y_test)

regressor= KernelRidge()
#model=regressor.fit(X_train,Y_train)
grid_search=grid_search_cv(parameters, regressor, X_train, Y_train)

parameter_alpha=grid_search.best_params_['alpha']
parameter_kernel=grid_search.best_params_['kernel']
parameter_gamma=grid_search.best_params_['gamma']

error=calculate_mean_absolute_percentage_error_multi(parameter_alpha, parameter_kernel, parameter_gamma, X_train, Y_train, X_test, Y_test, scaling_y)
print('Mean absolute percentage error on test set: ', error)

Best score in CV:  -0.96436185024248
Best parameters in CV:  {'alpha': 0.0001, 'gamma': 100, 'kernel': 'rbf'}
Mean absolute percentage error on test set:  [1.74355348 3.37444507 1.43679927]


# Leave wavelength = 467 out

In [6]:
train_set=df[df['wavelength']>467]
test_set=df[df['wavelength']==467]
test_set.shape

(3014, 36)

In [7]:

Y_train = train_set.iloc[:,25:28]
X_train = train_set.iloc[:,:8]
Y_test = test_set.iloc[:,25:28]
X_test = test_set.iloc[:,:8]

#array_var=generate_default_kernel_ridge_model(X,Y)
scaling_x=StandardScaler()
scaling_y=StandardScaler()
X_train=scaling_x.fit_transform(X_train)
X_test=scaling_x.transform(X_test)
Y_train=scaling_y.fit_transform(Y_train)
#Y_test=scaling_y.transform(Y_test)

regressor= KernelRidge()
#model=regressor.fit(X_train,Y_train)
grid_search=grid_search_cv(parameters, regressor, X_train, Y_train)

parameter_alpha=grid_search.best_params_['alpha']
parameter_kernel=grid_search.best_params_['kernel']
parameter_gamma=grid_search.best_params_['gamma']

regressor=KernelRidge(alpha= parameter_alpha, kernel=parameter_kernel, gamma=parameter_gamma)
    
wrapper=MultiOutputRegressor(regressor)
    
    #model=regressor.fit(X_train,Y_train)
wrapper.fit(X_train, Y_train)

    #Y_pred=model.predict(X_test)
Y_pred=wrapper.predict(X_test)
    
Y_pred=scaling_y.inverse_transform(Y_pred)


Best score in CV:  -0.9993877181459276
Best parameters in CV:  {'alpha': 0.0001, 'gamma': 10, 'kernel': 'rbf'}


In [8]:

mape_error= mean_absolute_percentage_error(Y_test, Y_pred, multioutput='raw_values')
rmse= mean_squared_error(Y_test, Y_pred, multioutput='raw_values', squared=False)
    

print('Mean absolute percentage error on test set: ', mape_error)
rmse

Mean absolute percentage error on test set:  [0.46084883 3.88224472 2.2488797 ]


array([0.42033088, 0.22201353, 0.24427546])

# Result: when fractal dimension or fraction of coating or wavelength is left out, Mean absolute percentage error on test set is much higher approximately >50%. Same for Support vector regression.