In [4]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

In [5]:
dataset = pd.read_csv('binary.csv')
dataset.head(3)


Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1


## Data Preparation Step



In [6]:
dataset = pd.get_dummies(dataset, columns=['rank'], drop_first=True)
dataset.head(3)

Unnamed: 0,admit,gre,gpa,rank_2,rank_3,rank_4
0,0,380,3.61,0,1,0
1,1,660,3.67,0,1,0
2,1,800,4.0,0,0,0


In [7]:
dataset['gre'] = dataset['gre'] / 800
dataset['gpa'] = dataset['gpa'] / 4.0
dataset.head(3)

Unnamed: 0,admit,gre,gpa,rank_2,rank_3,rank_4
0,0,0.475,0.9025,0,1,0
1,1,0.825,0.9175,0,1,0
2,1,1.0,1.0,0,0,0


In [8]:
X, y = dataset.drop('admit', axis=1), dataset['admit']
print(X.shape, y.shape)

(400, 5) (400,)


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, stratify=y)

# Gradient Step
First, you'll need to initialize the weights. We want these to be small such that the input to the sigmoid is in the
linear region near 0 and not squashed at the high and low ends. It's also important to initialize them randomly
so that they all have different starting values and diverge, breaking symmetry. So, we'll initialize the weights
from a normal distribution centered at 0. A good value for the scale is <br>
$$\frac{1}{\sqrt n}$$

where n is the number of input units. This keeps the input to the sigmoid low for increasing numbers of input units.

In [10]:
def sigmoid(x):
    """
    Calculate sigmoid
    :param x: Wx
    :return: probability
    """
    return 1 / (1 + np.exp(-x))
# TODO: We haven't provided the sigmoid_prime function like we did in
#       the previous lesson to encourage you to come up with a more
#       efficient solution. If you need a hint, check out the comments
#       in solution.py from the previous lecture.
def sigmoid_prime(x):
    sig = sigmoid(x)
    return sig * (1 - sig)

# Use to same seed to make debugging easier
np.random.seed(42)

n_records, n_features = X.shape
last_loss = None

# Initialize weights
weights = np.random.normal(scale=1 / n_features**.5, size=n_features)

# Neural Network hyperparameters
epochs = 1000
learnrate = 0.5

for e in range(epochs):
    del_w = np.zeros(weights.shape)
    for x, y in zip(X_train.values, y_train):
        # Loop through all records, x is the input, y is the target

        # Note: We haven't included the h variable from the previous
        #       lesson. You can add it if you want, or you can calculate
        #       the h together with the output

        # TODO: Calculate the output
        output = np.dot(x, weights)

        y_hat = sigmoid(output)

        # TODO: Calculate the error
        error = y - y_hat

        # TODO: Calculate the error term
        error_term = error * sigmoid_prime(output)

        # TODO: Calculate the change in weights for this sample
        #       and add it to the total weight change
        del_w += learnrate * error_term * x

    # TODO: Update weights using the learning rate and the average change in weights
    weights += del_w

    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        out = sigmoid(np.dot(X_train, weights))
        loss = np.mean((out - y_train) ** 2)
        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss


# Calculate accuracy on test data
print('TEST DATA')
tes_out = sigmoid(np.dot(X_test, weights))
predictions = tes_out > 0.5
accuracy = np.mean(predictions == y_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Train loss:  0.3166643405730375
Train loss:  0.3166642929842012
Train loss:  0.31666424338930815
Train loss:  0.3166641916583543
Train loss:  0.3166641376498345
Train loss:  0.3166640812094398
Train loss:  0.3166640221685751
Train loss:  0.3166639603426642
Train loss:  0.3166638955292084
Train loss:  0.3166638275055542
TEST DATA
Prediction accuracy: 0.680


In [11]:
# Calculate accuracy on test data
print('TRAIN DATA')
tes_out = sigmoid(np.dot(X_train, weights))
predictions = tes_out > 0.5
accuracy = np.mean(predictions == y_train)
print("Prediction accuracy: {:.3f}".format(accuracy))

TRAIN DATA
Prediction accuracy: 0.683


# Data Types and transformations

In [12]:
import numpy as np

def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1/(1+np.exp(-x))

# Network size
N_input = 4
N_hidden = 3
N_output = 2

np.random.seed(42)
# Make some fake data
X = np.random.randn(4)

weights_input_to_hidden = np.random.normal(0, scale=0.1, size=(N_input, N_hidden))
weights_hidden_to_output = np.random.normal(0, scale=0.1, size=(N_hidden, N_output))

X = np.array(X, ndmin=2)

# TODO: Make a forward pass through the network

hidden_layer_in = np.dot(X, weights_input_to_hidden)
hidden_layer_out = sigmoid(hidden_layer_in)

print('Hidden-layer Output:')
print(hidden_layer_out)


output_layer_in = np.dot(hidden_layer_out, weights_hidden_to_output)
output_layer_out = sigmoid(output_layer_in)

print('Output-layer Output:')
print(output_layer_out)

Hidden-layer Output:
[[0.41492192 0.42604313 0.5002434 ]]
Output-layer Output:
[[0.49815196 0.48539772]]


# Backpropagation

In [15]:
# Hyper parameters
features, features_test = X_train, X_test
targets, targets_test = y_train, y_test


n_hidden = 2  # number of hidden units
epochs = 900
learnrate = 0.005

n_records, n_features = features.shape
last_loss = None
# Initialize weights
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
                                        size=(n_features, n_hidden))
weights_hidden_output = np.random.normal(scale=1 / n_features ** .5,
                                         size=n_hidden)

for e in range(epochs):
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)

    for x, y in zip(features.values, targets):
        ## Forward pass ##
        # TODO: Calculate the output
        hidden_input = np.dot(x, weights_input_hidden)
        hidden_output = sigmoid(hidden_input)
        output = sigmoid(np.dot(hidden_output, weights_hidden_output))

        ## Backward pass ##
        # TODO: Calculate the network's prediction error
        error = y - output

        # TODO: Calculate error term for the output unit
        output_error_term = error * output * (1 - output)

        ## propagate errors to hidden layer

        # TODO: Calculate the hidden layer's contribution to the error
        hidden_error = np.dot(output_error_term, weights_hidden_output)

        # TODO: Calculate the error term for the hidden layer
        hidden_error_term = hidden_error * hidden_output * (1 - hidden_output)

        print(hidden_output.shape)
        print(output_error_term.shape)

        # TODO: Update the change in weights
        del_w_hidden_output += output_error_term * hidden_output
        del_w_input_hidden += hidden_error_term * x[:, None]


    # TODO: Update weights  (don't forget to division by n_records or number of samples)
    weights_input_hidden += learnrate * del_w_input_hidden / n_records
    weights_hidden_output += learnrate * del_w_hidden_output / n_records

    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        hidden_output = sigmoid(np.dot(x, weights_input_hidden))
        out = sigmoid(np.dot(hidden_output,
                             weights_hidden_output))
        loss = np.mean((out - targets) ** 2)

        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss

# Calculate accuracy on test data
hidden = sigmoid(np.dot(features_test, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()
(2,)
()


KeyboardInterrupt: 