In [1]:
import numpy as np
import pandas as pd

# Pre processing Data

In [2]:
admit = pd.read_csv('Entrance_Selection.csv')

In [3]:
data = pd.concat([admit,pd.get_dummies(admit['rank'],prefix='rank')],axis=1)
data = data.drop('rank',axis=1)

In [4]:
for field in ['gre','gpa']:
    mean, std = data[field].mean(), data[field].std()
    data.loc[:,field] = (data[field]-mean)/std

In [5]:
np.random.seed(42)
sample = np.random.choice(data.index,size=int(len(data)*0.9),replace=False)
data,test_data = data.ix[sample], data.drop(sample)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until


In [6]:
features, targets = data.drop('admit',axis=1), data['admit']
features_test, targets_test = test_data.drop('admit',axis=1), test_data['admit']

In [7]:
X = features.values
Y = targets.values

# Activation Function

In [8]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

# Hyper Parameters

In [9]:
n_hidden = 3 
epochs = 5000
learning_rate = 0.5

n_records, n_features = features.shape

last_loss = None

# Training

In [10]:
#initialize weights
weights_input_hidden = np.random.normal(scale=1/n_features** -.5,size=(n_features,n_hidden))
weights_hidden_output = np.random.normal(scale=1/n_features**-.5,size=n_hidden)

In [11]:
for e in range(epochs):
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)
    
    #forawrd pass
    hidden_input = np.dot(X,weights_input_hidden)
    hidden_activation = sigmoid(hidden_input)
    output_layer_input = np.dot(weights_hidden_output,hidden_activation.T)
    output = sigmoid(output_layer_input)
    
    #backward pass
    #calculate error
    error = Y - output
    
    #calculate the error gradient in output unit
    temp_o = np.multiply(output,(1-output))
    output_error = np.multiply(error,temp_o).reshape(-1,1)
    
    #propagate error to hidden layer 
    tmp_ho = np.dot(output_error, weights_hidden_output.reshape(-1,1).T)
    sig_prod = np.multiply(hidden_activation,(1-hidden_activation))
    hidden_error = np.multiply(tmp_ho,sig_prod)
    
    #update the change in weights
    del_w_hidden_output = np.multiply(output_error,hidden_activation)
    del_w_hidden_output = np.sum(del_w_hidden_output,axis=0)
    
    del_w_input_hidden = np.dot(hidden_error.T,X).T
    
    #update weights
    weights_hidden_output += learning_rate * del_w_hidden_output * (1/n_records)
    weights_input_hidden += learning_rate * del_w_input_hidden * (1/n_records)
    
    if  e % (epochs/10) == 0:
        hidden_activations = sigmoid(np.dot(X[int(len(features)-1)],weights_input_hidden))
        out = sigmoid(np.dot(hidden_activations,weights_hidden_output))
        loss = np.mean((out-targets )**2)
        
        if last_loss and last_loss < loss:
            print("Train loss: ",loss," Warninig - Loss increasing")
        else:
            print("Train loss: ",loss)
        last_loss = loss

Train loss:  0.2253412095295958
Train loss:  0.22784900287405507  Warninig - Loss increasing
Train loss:  0.23917101502914304  Warninig - Loss increasing
Train loss:  0.24274861261289765  Warninig - Loss increasing
Train loss:  0.24384751485331368  Warninig - Loss increasing
Train loss:  0.24408625741605433  Warninig - Loss increasing
Train loss:  0.24401567521176964
Train loss:  0.24382794052877407
Train loss:  0.2435950738118375
Train loss:  0.2433470324904509


In [12]:
hidden = sigmoid(np.dot(features_test,weights_input_hidden))
out = sigmoid(np.dot(hidden,weights_hidden_output))
predictions = out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Prediction accuracy: 0.725
