## Cross Validation

For cross validation, you use the entire dataset as validation set in several increments. This is to ensure it is a thorough model.

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score 
iris = load_iris()
X = iris.data
y = iris.target

In [2]:
X1, X2, y1, y2 = train_test_split(X, y, random_state=0,
train_size=0.5)

In [3]:
model1 = GaussianNB()
model1.fit(X1,y1)
y_model1 = model1.predict(X2)
accuracy_score(y2, y_model1)

0.9466666666666667

In [4]:
model2 = GaussianNB()
model2.fit(X2,y2)
y_model2 = model2.predict(X1)
accuracy_score(y1, y_model2)

0.9733333333333334

In [None]:
#alternative

In [5]:
# Instead of making multiple models, we make one model.
model = GaussianNB()

In [6]:
# Then, we can use cross_val_score, which takes the model, the inputs, the labels, and the
# number of cross validations we want to split it into. In this case, we want 5 cross validations.
from sklearn.model_selection import cross_val_score 
print(cross_val_score(model, X, y, cv=5))
# This returns an array of the accuracy of the five cross validations.
# The average validation score is just the mean of these values.

[0.93333333 0.96666667 0.93333333 0.93333333 1.        ]


## Leave One-out Cross Validation

In [7]:
# More extreme case of cross validation.
# Everything goes into training phase except one, which goes into validation.
# Used in the case when you have a very small dataset.
from sklearn.model_selection import LeaveOneOut
scores = cross_val_score(model, X, y, cv=LeaveOneOut()) 
scores

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [8]:
# Average validation score
scores.mean()

0.9533333333333334