# cross-validation

In [1]:
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

In [2]:
iris = load_iris()
logreg = LogisticRegression()

In [3]:
scores = cross_val_score(logreg,iris.data,iris.target)
print(scores)

[ 0.96078431  0.92156863  0.95833333]


In [4]:
scores = cross_val_score(logreg,iris.data,iris.target,cv=5)
print(scores)

[ 1.          0.96666667  0.93333333  0.9         1.        ]


In [5]:
print(scores.mean())

0.96


In [6]:
from sklearn.model_selection import KFold

In [7]:
kfold = KFold(n_splits=5)
print(cross_val_score(logreg,iris.data,iris.target,cv=kfold))

[ 1.          0.93333333  0.43333333  0.96666667  0.43333333]


In [8]:
kfold = KFold(n_splits=3)
print(cross_val_score(logreg,iris.data,iris.target,cv=kfold))

[ 0.  0.  0.]


In [9]:
kfold = KFold(n_splits=3, shuffle=True,random_state=0)
print(cross_val_score(logreg,iris.data,iris.target,cv=kfold))

[ 0.9   0.96  0.96]


# grid-search

In [12]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [13]:
x_train, x_test, y_train, y_test = train_test_split(iris.data,iris.target, random_state=0)
print(x_train.shape[0],x_test.shape[0])

112 38


In [15]:
best_score = 0

for gamma in [0.001,0.01,0.1,1,10,100]:
    for c in [0.001,0.01,0.1,1,10,100]:
        
        svm = SVC(gamma=gamma, C=c)
        svm.fit(x_train,y_train)
        
        score = svm.score(x_test,y_test)
        
        if score > best_score:
            best_score = score
            best_param = [c,gamma]

In [17]:
print(best_score)
print(best_param)

0.973684210526
[100, 0.001]
