In [20]:
from __future__ import division, print_function
import numpy as np
import sklearn
from sklearn.svm import SVC
from sklearn import datasets, svm 
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt

In [21]:
f = open( 'data/svm_data', 'r')
X = []
Y = []
for line in f:
    comps=line.strip().split('\t')
    x=[float(comps[0]), float(comps[1])]
    y=float(comps[2])
    X.append(x)
    Y.append(y)
X=numpy.asarray(X)
Y=numpy.asarray(Y)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)


In [22]:
def evaluate_on_test_data(model=None):
    predictions = model.predict(X_test)
    correct_classifications = 0
    for i in range(len(y_test)):
        if predictions[i] == y_test[i]:
            correct_classifications += 1
    accuracy = 100*correct_classifications/len(y_test) #Accuracy as a percentage
    return accuracy

In [23]:
kernels = ('linear','poly','rbf')
accuracies = []
for index, kernel in enumerate(kernels):
    model = svm.SVC(kernel=kernel)
    model.fit(X_train, y_train)
    acc = evaluate_on_test_data(model)
    accuracies.append(acc)
    print("{} % accuracy obtained with kernel = {}".format(acc, kernel))

100.0 % accuracy obtained with kernel = linear
100.0 % accuracy obtained with kernel = poly
100.0 % accuracy obtained with kernel = rbf


In [26]:
#Train SVMs with different kernels
svc = svm.SVC(kernel='linear').fit(X_train, y_train)
rbf_svc = svm.SVC(kernel='rbf', gamma=0.7).fit(X_train, y_train)
poly_svc = svm.SVC(kernel='poly', degree=3).fit(X_train, y_train)

#Create a mesh to plot in
h = .02  # step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

#Define title for the plots
titles = ['SVC with linear kernel',
          'SVC with RBF kernel',
          'SVC with polynomial (degree 3) kernel']


for i, clf in enumerate((svc, rbf_svc, poly_svc)):
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, m_max]x[y_min, y_max].
    plt.figure(i)

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)

#     # Plot also the training points
#     plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.ocean)
#     plt.xlabel('Sepal length')
#     plt.ylabel('Sepal width')
#     plt.xlim(xx.min(), xx.max())
#     plt.ylim(yy.min(), yy.max())
#     plt.xticks(())
#     plt.yticks(())
#     plt.title(titles[i])

# plt.show()

In [33]:
print("The support vectors are:\n", rbf_svc.support_vectors_)

The support vectors are:
 [[ -0.97077391  -0.62661754]
 [ -0.8910801   -6.18625792]
 [  2.45388968   2.91046677]
 [  5.34117303  -3.19553617]
 [  0.94453556   2.39075841]
 [  1.14488806   1.51273535]
 [ -0.55918139   3.66067198]
 [  0.85999142  -3.41310613]
 [ -0.84088527  -1.76003545]
 [ -1.2771354   -3.22416291]
 [  4.66628721  -3.86040583]
 [  4.07654249  -3.39156092]
 [ -3.90321337   1.77474123]
 [  0.75865088   0.87614647]
 [ -3.15456936   4.87585942]
 [  1.54907953  -1.08253139]
 [  3.48749107   1.20840668]
 [ -6.93750893   0.7020688 ]
 [  2.39589476  -3.3279586 ]
 [ -2.70257823  -1.00432371]
 [  1.11662869   4.87628322]
 [  6.58416102  -1.23127843]
 [  0.91115508  -0.33940421]
 [  1.38943048  -3.19888517]
 [  3.80744639   0.12394229]
 [ -2.80558959   2.81707963]
 [  0.15461167  -3.97509154]
 [  2.75051075  -1.54666104]
 [ -1.16101165   8.39369078]
 [ -0.44405042  -2.11766743]
 [  2.34752767   4.5295017 ]
 [ -4.19302245   3.52558089]
 [ 21.02418499  19.76702944]
 [ 18.2988937   2