In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import time
import numpy as np
from IPython import display

In [None]:
# Adapted from http://cs231n.github.io/neural-networks-case-study/ (A. Karpathy)
# by Y. Zhang and P. Ginsparg

n = 200 # number of points per class
k = 4 # number of classes
d = 2
X = [[],[]] # 2d coords

np.random.seed(0)
for j in range(k): #classes
  r = np.linspace(0.0,1,n) # radius
  theta = np.linspace(j*2*np.pi/k, (j+3.5)*2*np.pi/k, n) + .2*np.random.randn(n) # .84*(2pi radians) = 301degs
  X[0] += list(r*np.cos(theta))
  X[1] += list(r*np.sin(theta))

X = np.array(X).T                         #nk x d array of data
y = [i for j in range(k) for i in [j]*n]  #nk list of classes

In [None]:
plt.figure(figsize=(6,6))
plt.scatter(*X.T, c=y, s=40, cmap=plt.cm.Spectral)
plt.axis([-1,1,-1,1])
plt.axis('off');

In [None]:
xx, yy = np.meshgrid(np.linspace(-1, 1, 100), np.linspace(-1, 1, 100))

def do_plot(ax, hidlayer=True, tsleep=0):
    # plot resulting classifier
    Z = np.c_[xx.ravel(), yy.ravel()].dot(W) + b
    if hidlayer: Z = np.maximum(0, Z).dot(W2) + b2          
    Z = Z.argmax(1).reshape(xx.shape)
    
    ax.cla()
    plt.title('iteration {}, loss {:.2f}'.format(i,loss))
    ax.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.8)
    ax.scatter(*X.T, c=y, s=40, cmap=plt.cm.Spectral)
    ax.axis([-1,1,-1,1])
    ax.axis('off')
    display.display(fig) 
    display.clear_output(wait=True)
    time.sleep(tsleep) #was .2

<img src="linear.png">

In [None]:
#Linear Classifier, hyperparameters
step_size = 1e-1
reg = 1e-4 # regularization

n_samples = len(X)  #800 = N*K
b = np.zeros(k)
# random initializations
W = 0.01 * np.random.randn(d,k)

fig=plt.figure(figsize=(6,6))

for i in range(501): #gradient descent loop
  
  scores = X.dot(W) + b  # evaluate class scores, [N x K]
  probs = np.exp(scores)
  probs /= probs.sum(1, keepdims=True) # N*k = 200*4 logistic class probabilities
  
  correct_logprobs = -np.log( probs[range(n_samples), y] )
  loss = correct_logprobs.sum() / n_samples  # average logistic loss
  loss += 0.5*reg*(W*W).sum()                # plus regularization loss

  if i % 25 == 0: do_plot(plt.gca(), False, .5)

  # compute gradient on scores
  dscores = probs
  dscores[range(n_samples),y] -= 1
  dscores /= n_samples
  
  # backpropagate gradient to parameters (W,b)
  dW = X.T.dot(dscores)
  db = dscores.sum(0)
  dW += reg*W # regularization gradient

  # parameter update
  W -= step_size * dW
  b -= step_size * db
            
# evaluate training set accuracy
accuracy = (scores.argmax(1) == y).mean()
plt.title('Linear: {} iterations, training accuracy: {}'.format(i, accuracy));

<img src="non-linear.png">

In [None]:
# with hidden layer, hyperparameters
step_size = 2e-1
reg = 2e-4 # regularization

h = 100 # size of hidden layer
b = np.zeros(h)
b2 = np.zeros(k)
# random initializations
W = 0.01 * np.random.randn(d,h)
W2 = 0.01 * np.random.randn(h,k)

fig=plt.figure(figsize=(6,6))
plt.axis('off')

n_samples = len(X)

for i in range(10001): # gradient descent loop
  
  hL = np.maximum(0, X.dot(W) + b) # hidden layer ReLU activation
  scores = hL.dot(W2) + b2         # class scores, [n x k]
  
  # compute the class probabilities, logistic
  probs = np.exp(scores)
  probs /= probs.sum(1, keepdims=True)
  
  # compute the loss: average cross-entropy loss and regularization
  correct_logprobs = -np.log(probs[range(n_samples),y])
  loss = correct_logprobs.sum() / n_samples  #data loss
  loss += 0.5*reg*(W*W).sum() + 0.5*reg*(W2*W2).sum() #plus regularization loss

  if i % 100 == 0: do_plot(fig.gca())
    
  # compute the gradient on scores
  dscores = probs
  dscores[range(n_samples),y] -= 1
  dscores /= n_samples
  
  db2 = dscores.sum(0)              # backprop to parameters W2 and b2
  dW2 = hL.T.dot(dscores) + reg*W2  # with regularization gradient

  dhL = dscores.dot(W2.T)           # backprop to hidden layer
  dhL[hL <= 0] = 0                  # backprop the ReLU non-linearity
  dW = X.T.dot(dhL) + reg*W
  db = dhL.sum(0)
  
  # parameter updates
  W  -= step_size * dW
  b  -= step_size * db
  W2 -= step_size * dW2
  b2 -= step_size * db2

accuracy = (scores.argmax(1) == y).mean()  #training set accuracy
plt.title('"Deep": {} iterations, training accuracy: {:.2f}'.format(i, accuracy));