In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from matplotlib import pyplot
from pandas import DataFrame

In [None]:
import numpy as np
import pandas as pd

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# importing dataset
# create matrix of independent variables(features)
data_X = pd.read_csv('svm_train.csv')
X_train = data_X.iloc[:,0:2].values
#create dependent variable vector
data_y = pd.read_csv('svm_train.csv')
y_train = data_y.iloc[:, -1].values
# create matrix of independent variables(features)
data_Xtst = pd.read_csv('svm_test.csv')
X_test = data_Xtst.iloc[:,0:2].values
#create dependent variable vector
data_ytst = pd.read_csv('svm_test.csv')
y_test = data_ytst.iloc[:, -1].values

In [None]:
data_X.info()

In [None]:
data_X.head()

In [None]:
data_X.describe()

In [None]:
%matplotlib inline
import matplotlib as plt
data_X.hist(bins=50, figsize=(20,15))

In [None]:
from matplotlib import pyplot


In [None]:
# scatter plot, dots colored by class value
df = DataFrame(dict(x1=X_train[:,0], x2=X_train[:,1], label=y_train))
colors = {0:'red', 1:'blue'}
fig, ax = pyplot.subplots()
grouped = df.groupby('label')
for key, group in grouped:
    group.plot(ax=ax, kind='scatter', x='x1', y='x2', label=key, color=colors[key])
pyplot.show()

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1953)

In [None]:
svm_class = Pipeline([
    ("scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=10, loss="hinge"))
])

In [None]:
svm_class.fit(X_train, y_train)

In [None]:
#predicting Test set results
y_pred = svm_class.predict(X_val)
# making confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_val, y_pred)

In [None]:
cm

In [None]:
# Set min and max values and give it some padding
x_min, x_max = X_val[:, 0].min() - .5, X_val[:, 0].max() + .5
y_min, y_max = X_val[:, 1].min() - .5, X_val[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = svm_class.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
pyplot.contourf(xx, yy, Z, cmap=pyplot.cm.Spectral)
pyplot.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=pyplot.cm.Spectral)
pyplot.savefig('svm_linear.png')

In [None]:
svm_class = Pipeline([
    ("scaler", StandardScaler()),
    ("poly_svc", SVC(kernel="poly", degree=3, coef0=1, C=5))
])
svm_class.fit(X_train, y_train)
#predicting Test set results
y_pred = svm_class.predict(X_val)
# making confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_val, y_pred)

In [None]:
cm

In [None]:
# Set min and max values and give it some padding
x_min, x_max = X_val[:, 0].min() - .5, X_val[:, 0].max() + .5
y_min, y_max = X_val[:, 1].min() - .5, X_val[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = svm_class.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
pyplot.contourf(xx, yy, Z, cmap=pyplot.cm.Spectral)
pyplot.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=pyplot.cm.Spectral)
pyplot.savefig('svm_poly3.png')

In [None]:
svm_class = Pipeline([
    ("scaler", StandardScaler()),
    ("poly_svc", SVC(kernel="poly", degree=10, coef0=1, C=5))
])
svm_class.fit(X_train, y_train)
#predicting Test set results
y_pred = svm_class.predict(X_val)
# making confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_val, y_pred)

In [None]:
cm

In [None]:
# Set min and max values and give it some padding
x_min, x_max = X_val[:, 0].min() - .5, X_val[:, 0].max() + .5
y_min, y_max = X_val[:, 1].min() - .5, X_val[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = svm_class.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
pyplot.contourf(xx, yy, Z, cmap=pyplot.cm.Spectral)
pyplot.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=pyplot.cm.Spectral)
pyplot.savefig('svm_poly10.png')

In [None]:
from sklearn.model_selection import GridSearchCV
hp_grid=[
    {'C' : [0.001, 1, 1000], 'gamma' : [0.1, 0.7, 5]}
]
svm_classifier = SVC(kernel="rbf")
grid_search = GridSearchCV(svm_classifier, hp_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

In [None]:
grid_search.best_params_

In [None]:
grid_search.best_estimator_

In [None]:
svm_class = Pipeline([
    ("scaler", StandardScaler()),
    ("kern_svc", grid_search.best_estimator_)
])
svm_class.fit(X_train, y_train)
#predicting Test set results
y_pred = svm_class.predict(X_val)
# making confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_val, y_pred)

In [None]:
cm

In [None]:
## Set min and max values and give it some padding
x_min, x_max = X_val[:, 0].min() - .5, X_val[:, 0].max() + .5
y_min, y_max = X_val[:, 1].min() - .5, X_val[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = svm_class.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
pyplot.contourf(xx, yy, Z, cmap=pyplot.cm.Spectral)
pyplot.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=pyplot.cm.Spectral)
pyplot.savefig('svm_kernel.png')

In [None]:
#predicting Test set results
y_pred = svm_class.predict(X_test)
# making confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [None]:
cm

In [None]:
# Set min and max values and give it some padding
x_min, x_max = X_test[:, 0].min() - .5, X_test[:, 0].max() + .5
y_min, y_max = X_test[:, 1].min() - .5, X_test[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = svm_class.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
pyplot.contourf(xx, yy, Z, cmap=pyplot.cm.Spectral)
pyplot.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=pyplot.cm.Spectral)
pyplot.savefig('svm_kernel_test.png')