In [None]:
# Run some setup code for this notebook.
import random
import numpy as np
import matplotlib.pyplot as plt
import cs565.data_utils as data_utils
from scipy.special import expit
import matplotlib
from sklearn.cross_validation import train_test_split

# Display plots inline and change default figure size
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (10.0, 8.0)

# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2


In [None]:
# load and visualize input data
X, y = data_utils.noisy_moons(n_samples=500, noise=0.40)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5)
data_utils.visualize_2d_dataset(X_train, y_train, 'Training dataset')
data_utils.visualize_2d_dataset(X_val, y_val, 'Validation dataset')
data_utils.visualize_2d_dataset(X_test, y_test, 'Testing dataset')

In [None]:
# Input data shape and first 5 sample

print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

print(X_train[:5])
print(y_train[:5])

In [None]:
# Helper function to plot a decision boundary.
# If you don't fully understand this function don't worry, it just generates the contour plot below.
def plot_decision_boundary(pred_func, X, y, title):
    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
    Z_temp, _ = pred_func(np.c_[xx.ravel(), yy.ravel()])
    Z = np.argmax(Z_temp, axis=1)
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
    plt.title(title)
    plt.show()

In [None]:
num_examples = len(X_train) # training set size
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality

# Gradient descent parameters (I picked these by hand)
epsilon = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength

In [None]:
# Helper function to predict an output (0 or 1)
def predict(parameters, x):
    W1, b1, W2, b2 = parameters['W1'], parameters['b1'], parameters['W2'], parameters['b2']
    # Forward propagation
    z1 = x.dot(W1) + b1
    a1 = np.tanh(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    forward_cache = (z1, a1, z2)
    return probs, forward_cache

In [None]:
# Helper function to evaluate the total loss on the dataset
def calculate_loss(parameters, x, y):
    W1, b1, W2, b2 = parameters['W1'], parameters['b1'], parameters['W2'], parameters['b2']
    # Forward propagation to calculate our predictions
    probs, _ = predict(parameters, x) 
    # Calculating the loss
    corect_logprobs = -np.log(probs[range(len(y)), y])
    data_loss = np.sum(corect_logprobs)
    # Add regulatization term to loss (optional)
    data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
    return 1./num_examples * data_loss

In [None]:
# This function learns parameters for the neural network and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations
def build_model(nn_hdim, num_passes=20000, print_loss=False):
    
    # Initialize the parameters to random values. We need to learn these.
    np.random.seed(0)
    W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
    b1 = np.zeros((1, nn_hdim))
    W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
    b2 = np.zeros((1, nn_output_dim))

    # This is what we return at the end
    parameters = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    
    # Gradient descent. For each batch...
    for i in range(0, num_passes):

        # Forward propagation
        probs, forward_cache = predict(parameters, X_train)
        z1, a1, z2 = forward_cache
        # Backpropagation
        delta3 = probs
        delta3[range(num_examples), y_train] -= 1
        dW2 = (a1.T).dot(delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
        dW1 = np.dot(X_train.T, delta2)
        db1 = np.sum(delta2, axis=0)

        # Add regularization terms (b1 and b2 don't have regularization terms)
        dW2 += reg_lambda * W2
        dW1 += reg_lambda * W1

        # Gradient descent parameter update
        W1 += -epsilon * dW1
        b1 += -epsilon * db1
        W2 += -epsilon * dW2
        b2 += -epsilon * db2
        
        # Assign new parameters to the model
        parameters = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        
        # Optionally print the loss.
        # This is expensive because it uses the whole dataset, so we don't want to do it too often.
        if print_loss and i % 1000 == 0:
            print("Loss after iteration ", i, calculate_loss(parameters, X_train, y_train))
            ### print test and validation error
            train_predict, _ = predict(parameters, X_train)
            val_predict, _ = predict(parameters, X_val)
            print(np.mean(np.argmax(train_predict, axis = 1) == y_train), np.mean(np.argmax(val_predict, axis = 1) == y_val))        
        
    return parameters


In [None]:
# Build a model with a 3-dimensional hidden layer
model_parameters = build_model(3, print_loss=True)

# Plot the decision boundary
plot_decision_boundary(lambda x: predict(model_parameters, x), X_train, y_train, "Decision Boundary for training dataset")
plot_decision_boundary(lambda x: predict(model_parameters, x), X_val, y_val, "Decision Boundary for validation dataset")
plot_decision_boundary(lambda x: predict(model_parameters, x), X_test, y_test, "Decision Boundary for test dataset")

