# K-Fold Cross Validation

<img src="img/k-fold1.png" alt="fold1" width="500" height="600">
<img src="img/k-fold2.png" alt="fold2" width="800" height="600">

In [1]:
# import library
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("seaborn-whitegrid")

In [2]:
iris = load_iris()

x = iris.data
y = iris.target

# normalization

x = (x-np.min(x))/(np.max(x)-np.min(x))

In [3]:
# train test split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3)

In [4]:
# knn model
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)

In [5]:
# k-fold cv k = 10
from sklearn.model_selection import cross_val_score

accuracies = cross_val_score(estimator = knn, X = x_train, y = y_train, cv = 10)

In [6]:
print("avarage accuracy : ",np.mean(accuracies))
print("avarage std : ",np.std(accuracies))

avarage accuracy :  0.9618181818181817
avarage std :  0.062031716760843555


In [7]:
knn.fit(x_train,y_train)
print("test accuracy : ",knn.score(x_test,y_test))

test accuracy :  0.9777777777777777


# Grid Search with KNN
<img src="img/gridsearch.png" alt="fold1" width="500" height="600">

In [9]:
from sklearn.model_selection import GridSearchCV

grid = {"n_neighbors":np.arange(1,50)}
knn = KNeighborsClassifier()

knn_cv = GridSearchCV(knn, grid, cv=10)
knn_cv.fit(x,y)

GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])})

In [10]:
# print hyperparameter knn algoritması k değeri
print("tuned hyperparameter K: ",knn_cv.best_params_)
print("tuned parametreye göre en iyi accuracy (best score): ",knn_cv.best_score_)

tuned hyperparameter K:  {'n_neighbors': 13}
tuned parametreye göre en iyi accuracy (best score):  0.9800000000000001


# Grid Seacrh with Logistic Regression #

In [16]:
x = x[:100,:]
y = y[:100]

from sklearn.linear_model import LogisticRegression

grid = {"C":np.logspace(-3,3,7),"penalty":["l1","l2"]}    # l1 = lasso ve l2 = ridge

loreg = LogisticRegression()
loreg_cv = GridSearchCV(loreg,grid, cv=10)
loreg_cv.fit(x_train,y_train)



Traceback (most recent call last):
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solve

Traceback (most recent call last):
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/ea/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1306, in fit
    solver = _check_solve

GridSearchCV(cv=10, estimator=LogisticRegression(),
             param_grid={'C': array([1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02, 1.e+03]),
                         'penalty': ['l1', 'l2']})

In [17]:
print("tuned hyperparameters : (best parameters): ",loreg_cv.best_params_)
print("accuracy : ",loreg_cv.best_score_)

tuned hyperparameters : (best parameters):  {'C': 1.0, 'penalty': 'l2'}
accuracy :  1.0


In [20]:
loreg2 = LogisticRegression(C=1,penalty="l2")
loreg2.fit(x_test,y_test)
print("score : ",loreg2.score(x_test,y_test))

score :  0.7
