In [12]:
import pandas as pd
import numpy as np


def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

admissions = pd.read_csv("binary.csv")

# Make dummy variables for rank
data = pd.concat([admissions, pd.get_dummies(admissions['rank'], prefix='rank')], axis=1)
data = data.drop('rank', axis=1)

# Standarize features
for field in ['gre', 'gpa']:
    mean, std = data[field].mean(), data[field].std()
    data.loc[:,field] = (data[field]-mean)/std
    
# Split off random 10% of the data for testing
np.random.seed(42)
sample = np.random.choice(data.index, size=int(len(data)*0.9), replace=False)
data, test_data = data.ix[sample], data.drop(sample)

# Split into features and targets
features, targets = data.drop('admit', axis=1), data['admit']
features_test, targets_test = test_data.drop('admit', axis=1), test_data['admit']


np.random.seed(42) # same seed each time.

n_records, n_features = features.shape
print("num records = ", n_records, " num features =", n_features)

epochs = 100
learnrate = 0.5
# Initialize weights
weights = np.random.normal(scale=1 / n_features**.5, size=n_features)

for e in range(epochs):
    del_w = np.zeros(weights.shape)
    total_error = 0.0
    for x, y in zip(features.values, targets):
        result = np.dot(weights, x)
        output = sigmoid(result)
        error = -(output - y)
        total_error += error * error
        error_term = error * output * (1.0 - output) # grad descent delta direction.
        del_w += x * error_term * learnrate
    total_error /= 2 * n_records
    weights += del_w
    print("error:", e, " = ", total_error)




num records =  360  num features = 6
error: 0  =  0.132093399125
error: 1  =  0.143098005207
error: 2  =  0.122527285604
error: 3  =  0.123689205915
error: 4  =  0.129622470673
error: 5  =  0.119745468778
error: 6  =  0.121735311262
error: 7  =  0.13511273387
error: 8  =  0.114999827283
error: 9  =  0.122989258527
error: 10  =  0.120762994776
error: 11  =  0.132697059096
error: 12  =  0.114481735155
error: 13  =  0.128733052608
error: 14  =  0.119692377881
error: 15  =  0.130565142548
error: 16  =  0.115056760635
error: 17  =  0.129023782647
error: 18  =  0.117752900737
error: 19  =  0.12797075899
error: 20  =  0.117251749058
error: 21  =  0.131810591978
error: 22  =  0.115658759202
error: 23  =  0.125261220086
error: 24  =  0.118617427932
error: 25  =  0.13025046684
error: 26  =  0.116253345295
error: 27  =  0.130371473877
error: 28  =  0.117105045076
error: 29  =  0.127679056327
error: 30  =  0.117336501194
error: 31  =  0.130052705253
error: 32  =  0.1163473971
error: 33  =  0.12762