In [5]:
import numpy as np

#定义sigmoid激活函数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

#激活函数的导数
def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

#输入数据
x = np.array([0.1, 0.3])

#目标
y = 0.2

#权重
weights = np.array([-0.8, 0.5])

#更新权重的学习率
learnrate = 0.5

#输入和权重的线性组合
h = np.dot(x, weights)

#神经网络输出
nn_output = sigmoid(h)
print('Output: ', nn_output)

#误差输出
error = y - nn_output
print('\nError: ', error)

#梯度输出
output_grad = sigmoid_prime(h)

error_term = error * output_grad

#梯度下降一步
del_w = learnrate * error_term * x

weights_new = weights + del_w

print(weights_new)

Output:  0.517492857666

Error:  -0.317492857666
[-0.8039638   0.48810859]


In [9]:
import pandas as pd

admissions = pd.read_csv('binary.csv')

data = pd.concat([admissions, pd.get_dummies(admissions['rank'], prefix = 'rank')], axis = 1).drop('rank', axis = 1)
data.head()

Unnamed: 0,admit,gre,gpa,rank_1,rank_2,rank_3,rank_4
0,0,380,3.61,0,0,1,0
1,1,660,3.67,0,0,1,0
2,1,800,4.0,1,0,0,0
3,1,640,3.19,0,0,0,1
4,0,520,2.93,0,0,0,1


In [10]:
for field in ['gre', 'gpa']:
    mean, std = data[field].mean(), data[field].std()
    data.loc[:, field] = (data[field] - mean) / std
data.head()

Unnamed: 0,admit,gre,gpa,rank_1,rank_2,rank_3,rank_4
0,0,-1.798011,0.578348,0,0,1,0
1,1,0.625884,0.736008,0,0,1,0
2,1,1.837832,1.603135,1,0,0,0
3,1,0.452749,-0.525269,0,0,0,1
4,0,-0.586063,-1.208461,0,0,0,1


In [15]:
np.random.seed(42)
sample = np.random.choice(data.index, size = int(len(data) * 0.9), replace = False)
data, test_data = data.loc[sample], data.drop(sample)
print(data.head())
print('\n')
print(test_data.head())

     admit       gre       gpa  rank_1  rank_2  rank_3  rank_4
320      0 -1.105469 -0.656652       0       0       1       0
103      0 -0.412928  1.445476       0       0       1       0
70       0  0.452749  1.603135       0       0       1       0
23       0  0.799020 -0.525269       0       0       0       1
118      1  1.837832  0.814837       1       0       0       0


     admit       gre       gpa  rank_1  rank_2  rank_3  rank_4
127      0  1.318426  0.919944       0       0       0       1
286      1  1.837832 -0.446439       1       0       0       0
79       1  0.279614  1.603135       1       0       0       0
336      0 -0.586063 -0.630376       0       0       1       0
236      1  0.625884  0.263029       0       1       0       0


In [24]:
features, targets = data.drop('admit', axis = 1), data['admit']
features_test, targets_test = test_data.drop('admit', axis = 1), test_data['admit']
features_test.head()

Unnamed: 0,gre,gpa,rank_1,rank_2,rank_3,rank_4
127,1.318426,0.919944,0,0,0,1
286,1.837832,-0.446439,1,0,0,0
79,0.279614,1.603135,1,0,0,0
336,-0.586063,-0.630376,0,0,1,0
236,0.625884,0.263029,0,1,0,0


In [19]:
n_records, n_features = features.shape

In [21]:
last_loss =  None
print(n_records, n_features)

261 6


In [34]:
weights = np.random.normal(scale = 1 / n_features**.5, size = n_features)
epochs = 1000
learnrate = 0.5
print(weights)

[ 0.49372812  0.21962125  1.11632317  0.03827958 -0.57416255 -0.01409667]


In [43]:
for e in range(epochs):
    del_w = np.zeros(weights.shape)
    for x, y in zip(features.values, targets):
        output = sigmoid(np.dot(x, weights))
        error = y - output
        error_term = error * sigmoid_prime(output)
        del_w += error_term * x
    weights += learnrate * del_w / n_records
    
    if e % (epochs / 10) == 0:
        out = sigmoid(np.dot(features, weights))
        loss = np.mean((out - targets) ** 2)
        if last_loss and last_loss <= loss:
            print('Train loss:', loss, " WARNING Loss Increasing")
        else:
            print('Train loss:', loss)
        last_loss = loss



In [44]:
test_out = sigmoid(np.dot(features_test, weights))
predictions = test_out > 0.5
accuracy = np.mean(predictions == targets_test)
print('Prediction accuracy: {:.3f}'.format(accuracy))

Prediction accuracy: 0.600
