In [None]:
from sklearn.datasets import make_classification
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
_x1 = np.random.multivariate_normal(mean=[0, 0], cov=np.array([[0, 0.5], [0.5, 0]]), size=(900,))
_x2 = np.random.multivariate_normal(mean=[0.75, 0.75], cov=np.array([[0, 0.125], [0.125, 0]]), size=(100,))
X = np.r_[_x1, _x2]
y = np.zeros((X.shape[0],))
y[900:] = 1
rand_ix = np.arange(1000)
np.random.shuffle(rand_ix)
X = X[rand_ix, :]
y = y[rand_ix]

In [None]:
from sklearn.svm import LinearSVC
clf = LinearSVC()

In [None]:
x_train, y_train = X[:900, :], y[:900]
x_test, y_test = X[900:, :], y[900:]

In [None]:
clf.fit(x_train, y_train)

In [None]:
prediction = clf.predict(x_test)

In [None]:
prediction

In [None]:
y_test

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, prediction)

In [None]:
from sklearn.metrics import recall_score
recall_score(y_test, prediction)

### Plotting the decision boundary

In [None]:
plt.figure(figsize=(10,8))
plt.scatter(X[:, 0], X[:, 1], c=y, s=50)

In [None]:
xmin, xmax = X[:, 0].min() - 1, X[:, 0].max() + 1
ymin, ymax = X[:, 1].min() - 1, X[:, 1].max() + 1
h = 0.02
xx, yy = np.meshgrid(np.arange(xmin, xmax, h), np.arange(ymin, ymax, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, s=50)

In [None]:
from sklearn.datasets import make_circles
X, y = make_circles(n_samples=1000, factor=.3, noise=.05)
rand_ix = np.arange(1000)
np.random.shuffle(rand_ix)
X = X[rand_ix, :]
y = y[rand_ix]
plt.figure(figsize=(10, 8))
plt.scatter(X[:, 0], X[:, 1], c=y)

### Introduction to kernel methods

In [None]:
clf = LinearSVC()
x_train, y_train = X[:900, :], y[:900]
x_test, y_test = X[900:, :], y[900:]
clf.fit(x_train, y_train)

In [None]:
prediction = clf.predict(x_test)
print accuracy_score(y_test, prediction)
print recall_score(y_test, prediction)

In [None]:
xmin, xmax = X[:, 0].min() - 1, X[:, 0].max() + 1
ymin, ymax = X[:, 1].min() - 1, X[:, 1].max() + 1
h = 0.02
xx, yy = np.meshgrid(np.arange(xmin, xmax, h), np.arange(ymin, ymax, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, s=50)

In [None]:
from sklearn.decomposition import KernelPCA

In [None]:
kpca = KernelPCA(kernel="rbf", gamma=5)
x_kpca = kpca.fit_transform(X)
plt.figure(figsize=(10, 8))
plt.scatter(x_kpca[:, 0], x_kpca[:, 1], c=y)

In [None]:
clf = LinearSVC()
x_train, y_train = x_kpca[:900, :2], y[:900]
x_test, y_test = x_kpca[900:, :2], y[900:]
clf.fit(x_train, y_train)
prediction = clf.predict(x_test)
print accuracy_score(y_test, prediction)
print recall_score(y_test, prediction)

In [None]:
xmin, xmax = x_kpca[:, 0].min() - 1, x_kpca[:, 0].max() + 1
ymin, ymax = x_kpca[:, 1].min() - 1, x_kpca[:, 1].max() + 1
h = 0.02
xx, yy = np.meshgrid(np.arange(xmin, xmax, h), np.arange(ymin, ymax, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.scatter(x_kpca[:, 0], x_kpca[:, 1], c=y, cmap=plt.cm.Paired, s=50)

In [None]:
from sklearn.svm import SVC

In [None]:
clf = SVC(kernel="rbf")
x_train, y_train = X[:900, :], y[:900]
x_test, y_test = X[900:, :], y[900:]
clf.fit(x_train, y_train)

In [None]:
prediction = clf.predict(x_test)
print accuracy_score(y_test, prediction)
print recall_score(y_test, prediction)

In [None]:
xmin, xmax = X[:, 0].min() - 1, X[:, 0].max() + 1
ymin, ymax = X[:, 1].min() - 1, X[:, 1].max() + 1
h = 0.02
xx, yy = np.meshgrid(np.arange(xmin, xmax, h), np.arange(ymin, ymax, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, s=50)