# POLYNOMIAL REGRESSION
We will use K-Fold cross validation to reduce overfitting the data

In [20]:
#import libraries and load the Iris dataset

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn import datasets
from sklearn import svm

iris = datasets.load_iris()

In [21]:
# Split the iris data into train/test data sets with 40% reserved for testing
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.4, random_state = 0)
# Build an SVC model for predicting iris classifications using training data
clf = svm.SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
#Now test its performance with the test data
clf.score(X_test, y_test)

0.9666666666666667

In [22]:
# We give cross_val_score a model, the entire data set and its "real" values, and the number of folds:
scores = cross_val_score(clf, iris.data, iris.target, cv = 5)
#Calculate the accuracy of each fold
print(scores)
#Mean accuracy of all the 5 folds
print(scores.mean())

[0.96666667 1.         0.96666667 0.96666667 1.        ]
0.9800000000000001


A score of 0.98 shows that our model has performed quite well.
Let's try with a polynomial kernel

In [23]:
clf2 = svm.SVC(kernel = 'poly', C = 1).fit(X_train, y_train)
clf2.score(X_test, y_test)
score = cross_val_score(clf2, iris.data, iris.target, cv = 5)
print(score)
print(score.mean())

[0.96666667 1.         0.96666667 0.96666667 1.        ]
0.9800000000000001


The more complex polynomial kernel produced lower accuracy than a simple linear kernel. The polynomial kernel is overfitting. But we couldn't have told that with a single train/test split:

In [24]:
# Build an SVC model for predicting iris classifications using training data
clf3 = svm.SVC(kernel='poly', C=1).fit(X_train, y_train)

# Now measure its performance with the test data
clf3.score(X_test, y_test)   

0.9