In [1]:
%matplotlib inline
import numpy as np
from sklearn import cross_validation
from sklearn import datasets
from sklearn import svm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
iris = datasets.load_iris()
print(iris.data.shape, iris.target.shape)

X, y = iris.data, iris.target

(150, 4) (150,)


In [7]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
     iris.data, iris.target, test_size=0.4, random_state=0)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(90, 4) (90,)
(60, 4) (60,)


In [8]:
clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
clf.score(X_test, y_test)                           

0.96666666666666667

In [9]:
clf = svm.SVC(kernel='linear', C=1)
scores = cross_validation.cross_val_score(
       clf, iris.data, iris.target, cv=5)

scores

array([ 0.96666667,  1.        ,  0.96666667,  0.96666667,  1.        ])

In [10]:
scores.mean()

0.98000000000000009

In [11]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.98 (+/- 0.03)


In [12]:
from sklearn import metrics
scores = cross_validation.cross_val_score(clf, iris.data, iris.target,
     cv=5, scoring='f1_weighted')
scores  

array([ 0.96658312,  1.        ,  0.96658312,  0.96658312,  1.        ])

In [13]:
n_samples = X.shape[0]
cv = cross_validation.ShuffleSplit(n_samples, n_iter=3,
     test_size=0.3, random_state=0)

cross_validation.cross_val_score(clf, iris.data, iris.target, cv=cv)

array([ 0.97777778,  0.97777778,  1.        ])

In [15]:
from sklearn import preprocessing
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
     X, y, test_size=0.4, random_state=0)

scaler = preprocessing.StandardScaler().fit(X_train)
X_train_transformed = scaler.transform(X_train)
clf = svm.SVC(C=1).fit(X_train_transformed, y_train)
X_test_transformed = scaler.transform(X_test)
clf.score(X_test_transformed, y_test)  

0.93333333333333335

In [17]:
from sklearn.pipeline import make_pipeline
clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1))
cross_validation.cross_val_score(clf, X, y, cv=cv)

array([ 0.97777778,  0.93333333,  0.95555556])

In [20]:
predicted = cross_validation.cross_val_predict(clf, X, y, cv=10)
metrics.accuracy_score(y, predicted) 

0.96666666666666667