# Examples
This notebook contains examples I'll run during the presentation to illustrate some points!

In [None]:
import numpy as np
from matplotlib import pyplot as plt

# Example 1: Models that don't use weights!

In [None]:
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets

n_neighbors = 2

# import some data to play with
iris = datasets.load_iris()

# we only take the first two features. We could avoid this ugly
# slicing by using a two-dim dataset
X = iris.data[:, :2]
y = iris.target

h = .02  # step size in the mesh

# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

for weights in ['uniform', 'distance']:
    # we create an instance of Neighbours Classifier and fit the data.
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
    clf.fit(X, y)

    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold,
                edgecolor='k', s=20)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title("3-Class classification (k = %i, weights = '%s')"
              % (n_neighbors, weights))

plt.show()

# Example 2: Linear Regression

In [None]:
# First, we generate some linear data

num_pts = 100
X = np.linspace(-10, 10, num_pts)
Y = 0.6 * X + np.random.normal(0, 2, num_pts)

plt.scatter(X, Y)
plt.axis('equal')
plt.show()

In [None]:
# Next, we solve the normal equations to find the weights of our line.
psuedo_inv = 1 / (X.T @ X) * X.T
w = psuedo_inv @ Y

linex = np.linspace(-13, 13, 10)
liney = linex * w

plt.scatter(X, Y)
plt.plot(linex, liney, 'r')
plt.axis('equal')
plt.show()

### Now, let's try some different data.

In [None]:
num_pts = 100
X = np.linspace(-2, 15, num_pts)
Y = 1.3 * (X - 3) + np.random.normal(0, 2, num_pts)

plt.scatter(X, Y)
plt.axis('equal')
plt.show()

In [None]:
# Now, here's the same code as before to generate the line.
psuedo_inv = 1 / (X.T @ X) * X.T
w = psuedo_inv @ Y

linex = np.linspace(-5, 16, 10)
liney = linex * w

plt.scatter(X, Y)
plt.plot(linex, liney, 'r')
plt.axis('equal')
plt.show()

In [None]:
# Add a one to every data point!
X_add = np.vstack((X, np.ones(len(X)))).T
print(X_add.shape)

psuedo_inv =  np.linalg.inv(X_add.T @ X_add) @ X_add.T
w = psuedo_inv @ Y

linex = np.linspace(-5, 16, 10)
# add dimension to linex
linex = np.vstack((linex, np.ones(len(linex)))).T
liney = linex @ w

plt.scatter(X, Y)
plt.plot(linex[:,0], liney, 'r')
plt.axis('equal')
plt.show()

# Example 3: Kernels!

In [None]:
# Let's generate some quadratic data
X = np.linspace(-2, 4, 100)
Y = 0.3 * np.square(X) - 0.4 * X + 0.1  + np.random.normal(0, 0.5, 100)

plt.scatter(X, Y)
plt.axis('equal')
plt.show()

In [None]:
# This is what happens if we try to fit a regular line without bias
psuedo_inv = 1 / (X.T @ X) * X.T
w = psuedo_inv @ Y

linex = np.linspace(-2, 4, 10)
liney = linex * w

plt.scatter(X, Y)
plt.plot(linex, liney, 'r')
plt.axis('equal')
plt.show()


In [None]:
# This is what happens when we fit a line with bias!
# Add a one to every data point!
X_add = np.vstack((X, np.ones(len(X)))).T
print(X_add.shape)
print(X_add[:4])

psuedo_inv =  np.linalg.inv(X_add.T @ X_add) @ X_add.T
w = psuedo_inv @ Y

linex = np.linspace(-2, 4, 10)
# add dimension to linex
linex = np.vstack((linex, np.ones(len(linex)))).T
liney = linex @ w

plt.scatter(X, Y)
plt.plot(linex[:,0], liney, 'r')
plt.axis('equal')
plt.show()

In [None]:
# Let's try adding quadratic features to the data!
X_quad = np.vstack((np.square(X), X, np.ones(len(X)))).T
print(X_quad.shape)
print(X_quad[:4])

psuedo_inv =  np.linalg.inv(X_quad.T @ X_quad) @ X_quad.T
w = psuedo_inv @ Y

linex = np.linspace(-2, 4, 20)
# add dimension to linex
linex = np.vstack((np.square(linex), linex, np.ones(len(linex)))).T
liney = linex @ w

plt.scatter(X, Y)
plt.plot(linex[:,1], liney, 'r')
plt.axis('equal')
plt.show()

# Example 4: Logistic Regression

In [None]:
# Define some classification data
c1_x1, c1_x2 = np.random.multivariate_normal([-2.5,3], [[1, 0.3],[0.3, 1]], 100).T
c2_x1, c2_x2 = np.random.multivariate_normal([1,1], [[2, 1],[1, 2]], 100).T

plt.plot(c1_x1, c1_x2, 'x')
plt.plot(c2_x1, c2_x2, 'o')
plt.axis('equal')
plt.show()

c1_X = np.vstack((c1_x1, c1_x2)).T
c2_X = np.vstack((c2_x1, c2_x2)).T
X = np.concatenate((c1_X, c2_X))
y = np.concatenate((np.zeros(100), np.ones(100)))

# Shuffle the data
permutation = np.random.permutation(X.shape[0])
X = X[permutation, :]
y = y[permutation]

In [None]:
# Define functions for the logistic classifier
def sigmoid(x): # note that sigmoid = logistic
    return np.exp(x) / (1 + np.exp(x))

# Performs one gradient update step on the weights
def update(w, X, y, lr):
    grad = - np.dot(X.T, y - sigmoid(np.dot(X, w)))
    return w - lr * grad

In [None]:
### Model Parameters ###
lr = 0.01
steps = 100

# initialize weights
w = np.random.normal(0, 0.1, X.shape[1])

# Run gradient descent
for _ in range(steps):
    w = update(w, X, y, lr)

# Now, extract the descision boundary!
# 0.5 = s(Xw)
# 1 + exp(-Xw) = 2
# exp(-Xw) = 1
#  Xw = 0
# Solve for the equation of the line (X is now a vector)
# x1 w1 + x2 w2 = 0
# x2 = - w1 / w2 * x1

linex = np.linspace(-0.5, 0.5, 20)
liney = -w[0] / w[1] * linex

plt.plot(c1_x1, c1_x2, 'x')
plt.plot(c2_x1, c2_x2, 'o')
plt.plot(linex, liney, 'r')
plt.axis('equal')
plt.show()

In [None]:
# Let's add a bias kernel this time! WOHOOO
X_add = np.hstack((X, np.ones((X.shape[0], 1))))
print(X_add[:4])

### Model Parameters ###
lr = 0.01
steps = 100

# initialize weights
w = np.random.normal(0, 0.1, X_add.shape[1])

# Run gradient descent
for _ in range(steps):
    w = update(w, X_add, y, lr)

# Now, extract the descision boundary!
# 0.5 = s(Xw)
# 1 + exp(-Xw) = 2
# exp(-Xw) = 1
#  Xw = 0
# Solve for the equation of the line (X is now a vector)
# x1 w1 + x2 w2 + 1 w3 = 0
# x2 = - w1 / w2 * x1 -w3/w2

linex = np.linspace(-4, 1.6, 20)
liney = -w[0] / w[1] * linex - w[2]/w[1]

plt.plot(c1_x1, c1_x2, 'x')
plt.plot(c2_x1, c2_x2, 'o')
plt.plot(linex, liney, 'r')
plt.axis('equal')
plt.show()

# Example 5: Batch vs. Stochastic Gradient Descent

In [None]:
def plot_logistic_reg(X, y, w, low, up):
    linex = np.linspace(low, up, 20)
    liney = -w[0] / w[1] * linex - w[2]/w[1]

    plt.plot(c1_x1, c1_x2, 'x')
    plt.plot(c2_x1, c2_x2, 'o')
    plt.plot(linex, liney, 'r')
    plt.axis('equal')
    plt.show()

In [None]:
# Define some classification data, this time a bit tougher
c1_x1, c1_x2 = np.random.multivariate_normal([-1.5,1.2], [[1, 0.5],[0.5, 1]], 50).T
c2_x1, c2_x2 = np.random.multivariate_normal([0.4,0.6], [[2, 1],[1, 2]], 50).T

plt.plot(c1_x1, c1_x2, 'x')
plt.plot(c2_x1, c2_x2, 'o')
plt.axis('equal')
plt.show()

c1_X = np.vstack((c1_x1, c1_x2)).T
c2_X = np.vstack((c2_x1, c2_x2)).T
X = np.concatenate((c1_X, c2_X))
y = np.concatenate((np.zeros(50), np.ones(50)))

# Shuffle the data
permutation = np.random.permutation(X.shape[0])
X = X[permutation, :]
y = y[permutation]

# We're going to always use a bias
X = np.hstack((X, np.ones((X.shape[0], 1))))

In [None]:
### Batch Gradient Descent ###
lr = 0.01
steps = 5
low = -2
up = 2
# initialize weights
w = np.random.normal(0, 0.1, X.shape[1])

plot_logistic_reg(X, y, w, low, up)

# Run gradient descent
for _ in range(steps):
    w = update(w, X, y, lr)
    plot_logistic_reg(X, y, w, low, up)

In [None]:
### Stochastic Gradient Descent ###
lr = 0.01
steps = 200
low = -2
up = 2
# initialize weights
w = np.random.normal(0, 0.1, X.shape[1])

plot_logistic_reg(X, y, w, low, up)

# Run gradient descent
for i in range(steps):
    rand_index = int(np.random.rand() * X.shape[0])
    data_point = np.expand_dims(X[rand_index, :], axis=0)
    w = update(w, data_point, np.expand_dims(y[rand_index], axis=0), lr)
    if i % 20 == 0:
        plot_logistic_reg(X, y, w, low, up)