In [3]:
import numpy as np

In [18]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [9]:
inputs = np.array([0.7, -0.3])
weights = np.array([0.1, 0.8])
bias = -0.1

In [13]:
output =sigmoid(weights.dot(inputs)+bias)

In [14]:
print('Output:')
print(output)

Output:
0.432907095035


### Gradient Descent: The Code

In [19]:
def sigmoid_prime(x):
    return sigmoid(x)*(1-sigmoid(x))

<img src="https://photos-4.dropbox.com/t/2/AAAf3MLYpqmuT6UVEs75qYMnBsZAlX5PCK2apeU_eM10Jg/12/51532177/png/32x32/1/_/1/2/Screenshot%202017-02-11%2017.50.27.png/EL-y1ScY-9UBIAIoAg/NBocAhRcG0xkvUAGwkqndS_s6CTleGw3WWE0UMyVSb0?size=800x600&size_mode=3">

In [16]:
x = inputs

In [17]:
y = 0.2

In [18]:
weights = weights

In [19]:
learnrate = 0.01

In [20]:
nn_output = sigmoid(weights.dot(x))

In [21]:
error = y - nn_output

In [22]:
error_term = error*sigmoid_prime(nn_output)

In [24]:
# Gradient Decsent step
delW = learnrate*error_term*x

In [25]:
print(delW)

[-0.00044756  0.00019181]


### Graduate School admissions data

##### Data Prep

In [None]:
import pandas as pd
import numpy as np

In [2]:
admissions = pd.read_csv('data/binary.csv')

In [3]:
data = pd.concat([admissions,pd.get_dummies(admissions['rank'],prefix = 'rank')],axis=1)

In [4]:
data = data.drop('rank',axis = 1)

In [5]:
data.head()

Unnamed: 0,admit,gre,gpa,rank_1,rank_2,rank_3,rank_4
0,0,380,3.61,0,0,1,0
1,1,660,3.67,0,0,1,0
2,1,800,4.0,1,0,0,0
3,1,640,3.19,0,0,0,1
4,0,520,2.93,0,0,0,1


In [6]:
for field in ['gre','gpa']:
    mean,std=data[field].mean(),data[field].std()
    data.loc[:,field] = (data[field]-mean)/std

In [10]:
np.random.seed(42)
sample = np.random.choice(data.index,size=int(len(data)*0.9),replace=False)
data,test = data.ix[sample],data.drop(sample)

In [13]:
features,targets = data.drop('admit',axis=1),data['admit']
features_test, targets_test = test.drop('admit', axis=1), test['admit']

##### Neural Net with GD

In [14]:
n_records,n_features = features.shape

In [15]:
weights = np.random.normal(scale=1/n_features**.5,size=n_features)

In [31]:
epochs = 1000
lr = 0.1

In [35]:
for e in range(epochs):
    delW = np.zeros(weights.shape)
    for x,y in zip(features.values,targets):
        nn_out = sigmoid(weights.dot(x))
        error = y - nn_out
        delW += error*nn_out*(1-nn_out)#error*sigmoid_prime(nn_out)
    weights+=lr*delW/n_records

In [36]:
test_out = sigmoid(np.dot(features_test,weights))

In [37]:
predictions = test_out>0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Prediction accuracy: 0.500


#### Going Deep

<img src="https://photos-2.dropbox.com/t/2/AAABHgS5zRh6QbNxEeYUD0bfh0uQct93CUD5WTJtqeSV8A/12/51532177/png/32x32/1/_/1/2/Screenshot%202017-02-11%2018.42.47.png/EL-y1ScY_NUBIAIoAg/G8oN4eaRKlRcLRbb57qSiekRyi6xEEN9yuYL4idI6nk?size=800x600&size_mode=3">

The weight matrix
<img src="https://photos-2.dropbox.com/t/2/AADbCYqEaibMU15a_vRRJsO3GNCGdMXAozxsZguasXUu7A/12/51532177/png/32x32/1/_/1/2/Screenshot%202017-02-11%2018.43.40.png/EL-y1ScY_dUBIAIoAg/idf3wixfiuXlr82PHj68odoHM4gKepxoBtqpApi28oU?size=800x600&size_mode=3">

In [38]:
n_records,n_inputs = features.shape

In [39]:
n_hidden = 2

In [53]:
weights_inp_hidden = np.random.normal(scale=1/n_features**.5,size=(n_inputs,n_hidden))

In [41]:
# np.random.normal(0, n_inputs**-0.5, size=(n_inputs, n_hidden))

Now for the hidden layer
<img src="https://photos-4.dropbox.com/t/2/AACoskTnbb0oSrVzKt4kg9EEEOFEBFA2Q8AANHVgU-82Vw/12/51532177/png/32x32/1/_/1/2/Screenshot%202017-02-11%2018.47.50.png/EL-y1ScY_tUBIAIoAg/HGnJwjO5-bI_aWQB7dYuzMzyRjfVbQgiflwhR5-c9jo?size=800x600&size_mode=3">

Thats *matrix multiplication* - matrix W, vector x.
<img src="https://photos-4.dropbox.com/t/2/AAC1lXTYOw0JA35frmpEAVpBUtHcoUdHQ_wzy0gHSBZYNA/12/51532177/png/32x32/1/_/1/2/Screenshot%202017-02-11%2018.49.40.png/EL-y1ScY_9UBIAIoAg/XkvahYv0pmuDcQcPvpztUxT-BoO6nFqO410BgwqMBw0?size=800x600&size_mode=3">

In [58]:
hidden_inputs = features.dot(weights_inp_hidden)

In [59]:
hidden_inputs.shape

(360, 2)

In [60]:
features.shape

(360, 6)

In [61]:
weights_inp_hidden.shape

(6, 2)

In [68]:
test =np.array([1,2,4])

In [69]:
test.T

array([1, 2, 4])

In [71]:
test[:,np.newaxis]

array([[1],
       [2],
       [4]])

In [72]:
test[:,None]

array([[1],
       [2],
       [4]])

In [78]:
np.array(test,ndmin=2).T

array([[1],
       [2],
       [4]])

In [79]:
np.random.randn(4)

array([ 0.69909238,  0.59989663, -0.48124813, -2.093794  ])

The whole thing

In [81]:
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

n_input = 4  # no of features
n_hidden = 3
n_output = 2

np.random.seed(42)

X = np.random.randn(4)

weights_inp_to_hid = np.random.normal(0,scale=1/(500)**0.5,size=(n_input,n_hidden))
weights_hid_to_out = np.random.normal(0,scale=1/(500)**0.5,size=(n_hidden,n_output))

In [85]:
hidden_layer_in = X.dot(weights_inp_to_hid)
hidden_layer_out = sigmoid(hidden_layer_in)

In [86]:
print('Hidden-layer Output:')
print(hidden_layer_out)

Hidden-layer Output:
[ 0.46165366  0.46673029  0.50010885]


In [87]:
output_layer_in = hidden_layer_out.dot(weights_hid_to_out)
output_layer_out = sigmoid(output_layer_in)

In [88]:
print('Output-layer Output:')
print(output_layer_out)

Output-layer Output:
[ 0.49822909  0.49299034]


#### Backpropagation

<img src="https://photos-6.dropbox.com/t/2/AACyjzO5VOItHMoWWDefnPjhmOgUvpm9fJaXS5WvSe9rNg/12/51532177/png/32x32/1/_/1/2/Screenshot%202017-02-11%2021.08.20.png/EL-y1ScYgNYBIAIoAg/e9Sx6fIRGpdSx5f6rww_wbKmyv0w9nXH3OrtCLc6HYU?size=800x600&size_mode=3">

In [89]:
np.random.seed(21)

In [90]:
n_hidden = 2
epochs = 900
lr = 0.005

In [91]:
n_records,n_features = features.shape

In [92]:
last_loss = None

In [93]:
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
                                        size=(n_features, n_hidden))
weights_hidden_output = np.random.normal(scale=1 / n_features ** .5,
                                         size=n_hidden)

In [94]:
weights_input_hidden.shape

(6, 2)

In [95]:
weights_hidden_output.shape

(2,)

In [114]:
for e in range(epochs):
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)
    for x,y in zip(features.values,targets):
        hidden_input = np.dot(x, weights_input_hidden)
        hidden_output = sigmoid(hidden_input)

        output = sigmoid(np.dot(hidden_output,
                                weights_hidden_output))

        ## Backward pass ##
        # TODO: Calculate the error
        error = y - output

        # TODO: Calculate error gradient in output unit
        output_error = error * output * (1 - output) #e*signoid_prime(out)

        # TODO: propagate errors to hidden layer
        hidden_error = np.dot(output_error, weights_hidden_output) * hidden_output * (1 - hidden_output)

        # TODO: Update the change in weights
        del_w_hidden_output += output_error * hidden_output #scaled by weights
        del_w_input_hidden += hidden_error * x[:, np.newaxis] # scaled by weights
        
    weights_input_hidden += del_w_input_hidden*lr/n_records
    weights_hidden_output += del_w_hidden_output*lr/n_records

hidden = sigmoid(np.dot(features_test, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))


Prediction accuracy: 0.650


In [113]:
weights_input_hidden.shape

(6, 2)

In [101]:
weights_hidden_output.shape

(2,)

In [105]:
weights_hidden_output[:,np.newaxis].T.shape

(1, 2)