In [None]:
# enables inline plotting
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
# use seaborn plotting defaults
import seaborn as sns; sns.set()

#### Solution Task 1: Constructing linear separators
One can easily draw three different linear separators that classify the data correctly. However as soon as you add a new data the found separators might not be perfect anymore.


In [None]:
from sklearn.datasets import make_blobs
# create 50 samples with 2 centers and a standard deviation of 0.7
data, labels = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.70)
# plot the created data, with the lables as the color, the size set to 50 and the color map set to summer
plt.scatter(data[:, 0], data[:, 1], c=labels, s=50, cmap='summer');

# create a equidistant spaced array from the min to the max values of the data (x)
xfit = np.linspace(np.min(data[:, 0]), np.max(data[:, 0]))

#SOLUTION (one of many)
for m, b in [(0.9, 0.75), (0.5, 1.6), (0, 2.7)]:
    yfit = m * xfit + b
    plt.plot(xfit, yfit, '-')
    
# set the axis limits to the min and max values of the data
plt.xlim(np.min(data[:, 0]), np.max(data[:, 0]));
plt.ylim(np.min(data[:, 1]), np.max(data[:, 1]));

#### Solution Task 2: Maximizing Margins
The line that maximizes the margin between the two classes is the one we will choose as the optimal model. Support vector machines are an example of such a maximum margin estimator.

In [None]:
from sklearn.datasets import make_blobs
# create 50 samples with 2 centers and a standard deviation of 0.7
data, labels = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.70)
# plot the created data, with the lables as the color, the size set to 50 and the color map set to summer
plt.scatter(data[:, 0], data[:, 1], c=labels, s=50, cmap='summer');

# create a equidistant spaced array from the min to the max values of the data (x)
xfit = np.linspace(np.min(data[:, 0]), np.max(data[:, 0]))

#SOLUTION
for m, b, margin in [(0.9, 0.75, 0.07), (0.5, 1.6, 0.28), (0, 2.7, 0.1)]:
    yfit = m * xfit + b
    plt.plot(xfit, yfit, '-')
    # draw a margin around the line
    plt.fill_between(xfit, yfit - margin, yfit + margin, edgecolor='none', color='r', alpha=0.1)

# set the axis limits to the min and max values of the data
plt.xlim(np.min(data[:, 0]), np.max(data[:, 0]));
plt.ylim(np.min(data[:, 1]), np.max(data[:, 1]));

#### Solution Task 3: Transforming Data


In [None]:
from sklearn.datasets import make_circles
data, labels = make_circles(100, factor=.1, noise=.1)
#ONE SOLUTION
r = (data ** 2).sum(1)

from mpl_toolkits import mplot3d

def plot_3D(data, labels, r, elev=10, azim=30):
    ax = plt.subplot(projection='3d')
    ax.scatter3D(data[:, 0], data[:, 1], r, c=labels, s=50, cmap='summer')
    ax.view_init(elev=elev, azim=azim)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('r')
    return ax

def plot_3D_hyperplane(data,r,ax):
    x = np.linspace(np.min(data[:, 0]), np.max(data[:, 0]),2)
    y = np.linspace(np.min(data[:, 1]), np.max(data[:, 1]),2)
    X, Y = np.meshgrid(x, y)
    r = np.ones(X.shape)*r
    ax.plot_surface(X, Y, r, color='r', alpha=0.4);

# plot the data in 3d
ax = plot_3D(data,labels,r)
# draw a possible hyperplane for linear separation
plot_3D_hyperplane(data,np.average(r),ax)