<a href="https://colab.research.google.com/github/delicate99/Python_ML/blob/main/KfoldCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV

In [15]:
df= pd.read_csv('https://raw.githubusercontent.com/justmarkham/scikit-learn-videos/master/data/Advertising.csv')
df.head(5)

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [16]:
X= df.drop('Sales', axis=1)
y= df.Sales

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [18]:
scaler = StandardScaler()

In [19]:
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [20]:
X_train = scaler.transform(X_train)

In [21]:
X_test =scaler.transform(X_test)

In [22]:
model= Ridge(alpha =100)

In [23]:
score =cross_val_score(model, X_train, y_train, scoring ='neg_mean_squared_error', cv=5)

In [24]:
score

array([ -9.46077895,  -4.99113998, -11.41979353,  -7.02510055,
        -8.4068186 ])

In [25]:
abs(score.mean())

8.260726318542556

In [26]:
model= Ridge(alpha =1)

In [27]:
score =cross_val_score(model, X_train, y_train, scoring ='neg_mean_squared_error', cv=5)

In [28]:
abs(score.mean())

3.35824045994748

In [29]:
model.fit(X_train, y_train)

Ridge(alpha=1, copy_X=True, fit_intercept=True, max_iter=None, normalize=False,
      random_state=None, solver='auto', tol=0.001)

In [30]:
y_pred_final= model.predict(X_test)

In [31]:
mean_squared_error(y_test, y_pred_final)

2.4135807641416784

In [32]:
score =cross_validate(model, X_train, y_train, scoring =['neg_mean_squared_error','neg_mean_absolute_error'], cv=10)

In [33]:
scores =pd.DataFrame(score)

In [34]:
scores

Unnamed: 0,fit_time,score_time,test_neg_mean_squared_error,test_neg_mean_absolute_error
0,0.001467,0.004648,-2.876054,-1.440573
1,0.001611,0.000578,-3.395931,-1.613269
2,0.000926,0.000617,-2.114775,-1.217058
3,0.00071,0.000623,-0.819543,-0.796995
4,0.000738,0.000668,-3.572297,-1.420258
5,0.000717,0.000613,-8.10781,-1.485559
6,0.0007,0.000656,-1.781995,-1.033155
7,0.000711,0.000603,-2.704369,-1.241214
8,0.000672,0.000657,-5.20005,-1.622663
9,0.00092,0.000596,-2.847027,-1.168318


In [35]:
scores.mean()

fit_time                        0.000917
score_time                      0.001026
test_neg_mean_squared_error    -3.341985
test_neg_mean_absolute_error   -1.303906
dtype: float64

In [36]:
model.fit(X_train,y_train)

Ridge(alpha=1, copy_X=True, fit_intercept=True, max_iter=None, normalize=False,
      random_state=None, solver='auto', tol=0.001)

In [37]:
y_pred = model.predict(X_test)

In [38]:
mean_squared_error(y_test, y_pred)

2.4135807641416784

## Grid Search

In [39]:
base_elastic_net_model = ElasticNet()

In [51]:
param_grid = {'alpha':[0.1, 1, 5, 10, 50,100], 'l1_ratio':[0.1,0.5,0.7,.95, .99,1]}

In [52]:
grid_model = GridSearchCV(estimator = base_elastic_net_model, param_grid=param_grid, scoring='neg_mean_squared_error',
                          verbose =1, cv =5)

In [53]:
grid_model.fit(X_train, y_train)

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed:    0.3s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=None, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [0.1, 1, 5, 10, 50, 100],
                         'l1_ratio': [0.1, 0.5, 0.7, 0.95, 0.99, 1]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=1)

In [54]:
grid_model.best_estimator_

ElasticNet(alpha=0.1, copy_X=True, fit_intercept=True, l1_ratio=1,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [55]:
grid_model.best_params_

{'alpha': 0.1, 'l1_ratio': 1}

In [56]:
pd.DataFrame(grid_model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001169,0.000625,0.000471,0.000164,0.1,0.1,"{'alpha': 0.1, 'l1_ratio': 0.1}",-3.546724,-1.38189,-5.740664,-2.149507,-4.74987,-3.513731,1.605878,6
1,0.000669,1.6e-05,0.000373,1e-05,0.1,0.5,"{'alpha': 0.1, 'l1_ratio': 0.5}",-3.394053,-1.401407,-5.569151,-2.154947,-4.539475,-3.411807,1.519703,5
2,0.000659,2.3e-05,0.000361,1e-05,0.1,0.7,"{'alpha': 0.1, 'l1_ratio': 0.7}",-3.326987,-1.418961,-5.487357,-2.16395,-4.436442,-3.366739,1.475358,4
3,0.000686,4.4e-05,0.000371,2.2e-05,0.1,0.95,"{'alpha': 0.1, 'l1_ratio': 0.95}",-3.260909,-1.454707,-5.395304,-2.177452,-4.31084,-3.319842,1.419639,3
4,0.000662,2.1e-05,0.000377,2.3e-05,0.1,0.99,"{'alpha': 0.1, 'l1_ratio': 0.99}",-3.253311,-1.46198,-5.380239,-2.181097,-4.291155,-3.313556,1.410057,2
5,0.000735,7.5e-05,0.000383,3.7e-05,0.1,1.0,"{'alpha': 0.1, 'l1_ratio': 1}",-3.25147,-1.463869,-5.376255,-2.182076,-4.286254,-3.311985,1.407572,1
6,0.000693,4.1e-05,0.000374,1.9e-05,1.0,0.1,"{'alpha': 1, 'l1_ratio': 0.1}",-9.90536,-5.283147,-11.875346,-7.449195,-8.546841,-8.611978,2.225301,12
7,0.000698,4.2e-05,0.00036,2.1e-05,1.0,0.5,"{'alpha': 1, 'l1_ratio': 0.5}",-8.707071,-4.214228,-10.879261,-6.204545,-7.173031,-7.435627,2.255532,11
8,0.000827,0.000273,0.000354,1.3e-05,1.0,0.7,"{'alpha': 1, 'l1_ratio': 0.7}",-7.92087,-3.549562,-10.024877,-5.379553,-6.324836,-6.63994,2.206213,10
9,0.000675,1.4e-05,0.000372,1.2e-05,1.0,0.95,"{'alpha': 1, 'l1_ratio': 0.95}",-6.729435,-2.591285,-8.709842,-4.156317,-5.329916,-5.503359,2.102835,9


In [None]:
y_pred = grid_model.predict(X_test)

In [57]:
mean_squared_error(y_test, y_pred)

2.4135807641416784