In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("./data/salary/train_ser.csv")

In [14]:
df_label = df.loc[:100,'income']
df_train = df.loc[:100,:].drop('income',axis=1)

data_x = df_train.values.astype(np.float).T
data_y = df_label.values.astype(np.float).reshape((1,-1))

data_x = (data_x - np.mean(data_x,axis=1).reshape((-1,1)))/np.std(data_x,axis=1).reshape((-1,1))

In [22]:
def sigmoid(x):
    return 1./(1.+ np.exp(-x))

def sigmoid_inv(x):
    return sigmoid(x) * (1.-sigmoid(x))

def mul(x,w,b):
    return np.matmul(w.T,x) + b

def loss(pred,y):
    pred_log = np.log(pred)#q(1) = f(x)
    no_pred_log = np.log(1.0-pred)#q(0) = 1 - f(x) ; f(x) : probability of class A(1)
    loss_val = -1.* (np.matmul(y,pred_log.T) + np.matmul((1.0-y),no_pred_log.T))
    return loss_val

dim1,n = data_x.shape
dim2 = 2
dim3 = 1
w1 = np.ones((dim1,dim2))#2 neurons
w2 = np.ones((dim2,dim3))#1 neurons output

b1 = np.zeros((dim2,1))
b2 = np.zeros((dim3,1))

w1_gss = np.zeros_like(w1)
w2_gss = np.zeros_like(w2)
b1_gss = np.zeros_like(b1)
b2_gss = np.zeros_like(b2)

lr = 3.
epochs = 1000

for i in range(epochs):
    x = data_x
    y = data_y
    
    z1 =  mul(x,w1,b1)# (dim2,dim1) * (dim1,n) -> (dim2,n)
    a1 = sigmoid(z1)
    
    z2 = mul(a1,w2,b2)#(dim3,dim2) * (dim2,n) -> (dim3,n)
    a2 = sigmoid(z2)
    
    d_c_a2 = -1.0 * (y/a2 - (1.-y)/(1.-a2))#(1,n) it is grad not loss
    
    '''
    关于multiply 和 matmul:关于神经元数量的使用matmul,直接在神经元里面运算的使用multiply
    因为反向传播的乘法是标量乘法,即点乘.
    只有涉及到把神经元压缩在矩阵中的运算时才使用向量乘法,这个时候可以使用多个标量乘法分别运算代替,而上一种不行.
    '''
    
    d_w2 = np.matmul(a1, np.multiply(sigmoid_inv(z2), d_c_a2).T)  # (2,n) @ [(1,n) * (1,n)] -> (2,1)
    d_b2 = np.matmul(np.ones((1,n)), np.multiply(sigmoid_inv(z2), d_c_a2).T).T  # (n,1) @ [(1,n) * (1,n)] -> (1,1)
    d_c_a1 = np.matmul(w2, np.multiply(sigmoid_inv(z2), d_c_a2))  # (2,1) * (1,,n) * (1,n) -> (2,n)

    d_w1 = np.matmul(x, np.multiply(sigmoid_inv(z1), d_c_a1).T)  # (dim,n) @ [(2,n) * (2,n)] -> (dim,2)
    d_b1 = np.matmul(np.ones((1, n)), np.multiply(sigmoid_inv(z1), d_c_a1).T).T  # (1,n) @ [(2,n) * (2,n)] -> (1,2)
    
    w1_gss += d_w1 ** 2
    w2_gss += d_w2 ** 2
    b1_gss += d_b1 ** 2
    b2_gss += d_b2 ** 2
    
    w1_t = w1 - (lr/np.sqrt(w1_gss))*(d_w1)
    w2_t = w2 - (lr/np.sqrt(w2_gss))*(d_w2)
    b1_t = b1 - (lr/np.sqrt(b1_gss))*(d_b1)
    b2_t = b2 - (lr/np.sqrt(b2_gss))*(d_b2)
    
    w1 = w1_t
    w2 = w2_t
    b1 = b1_t
    b2 = b2_t
    
    loss_val = loss(a2,y)[0][0]
    print("loss:{:.5f}",loss_val)


loss:{:.5f} 107.49986519250787
loss:{:.5f} 160.64948933402957
loss:{:.5f} 65.06820730877637
loss:{:.5f} 56.23897023413827
loss:{:.5f} 53.504863306354096
loss:{:.5f} 50.23809285435144
loss:{:.5f} 48.61124384544128
loss:{:.5f} 44.89879936933704
loss:{:.5f} 42.03403023121453
loss:{:.5f} 41.95265374697922
loss:{:.5f} 41.00112776833477
loss:{:.5f} 36.033124372855966
loss:{:.5f} 32.44754681584362
loss:{:.5f} 32.50078080733278
loss:{:.5f} 35.778719930523266
loss:{:.5f} 32.04042944099836
loss:{:.5f} 28.13353412350252
loss:{:.5f} 26.42223774961453
loss:{:.5f} 29.409411363915137
loss:{:.5f} 26.430608341267302
loss:{:.5f} 28.731629815780984
loss:{:.5f} 28.925156810819487
loss:{:.5f} 25.58676512591454
loss:{:.5f} 25.868697232144243
loss:{:.5f} 24.928021840209485
loss:{:.5f} 22.135788174156474
loss:{:.5f} 20.943171253304598
loss:{:.5f} 21.015181309999377
loss:{:.5f} 22.41502305293696
loss:{:.5f} 25.58189958328105
loss:{:.5f} 21.10829422683553
loss:{:.5f} 22.029128170653927
loss:{:.5f} 23.8772893014