In [10]:
import pandas as pd
import numpy as np

df = pd.read_csv("./data/salary/train_ser.csv")

In [11]:
df_label = df.loc[:,'income']
df_train = df.loc[:,:].drop('income',axis=1)

data_x = df_train.values.astype(np.float).T
data_y = df_label.values.astype(np.float).reshape((1,-1))

data_x = (data_x - np.mean(data_x,axis=1).reshape((-1,1)))/np.std(data_x,axis=1).reshape((-1,1))

In [12]:
def sigmoid(x):
    return 1./(1.+ np.exp(-x))

def sigmoid_inv(x):
    return sigmoid(x) * (1.-sigmoid(x))

def mul(x,w,b):
    return np.matmul(w.T,x) + b

def loss(pred,y):
    pred_log = np.log(pred)#q(1) = f(x)
    no_pred_log = np.log(1.0-pred)#q(0) = 1 - f(x) ; f(x) : probability of class A(1)
    loss_val = -1.* (np.matmul(y,pred_log.T) + np.matmul((1.0-y),no_pred_log.T))
    return loss_val

dim1,n = data_x.shape
dim2 = 7
dim3 = 1
w1 = np.ones((dim1,dim2))#2 neurons
w2 = np.ones((dim2,dim3))#1 neurons output

b1 = np.zeros((dim2,1))
b2 = np.zeros((dim3,1))

w1_gss = np.zeros_like(w1)
w2_gss = np.zeros_like(w2)
b1_gss = np.zeros_like(b1)
b2_gss = np.zeros_like(b2)

lr = 0.3
epochs = 2000

for i in range(epochs):
    x = data_x
    y = data_y
    
    z1 =  mul(x,w1,b1)# (dim2,dim1) * (dim1,n) -> (dim2,n)
    a1 = sigmoid(z1)
    
    z2 = mul(a1,w2,b2)#(dim3,dim2) * (dim2,n) -> (dim3,n)
    a2 = sigmoid(z2)
    
    d_c_a2 = -1.0 * (y/a2 - (1.-y)/(1.-a2))#(1,n) it is grad not loss
    
    '''
    关于multiply 和 matmul:关于神经元数量的使用matmul,直接在神经元里面运算的使用multiply
    因为反向传播的乘法是标量乘法,即点乘.
    只有涉及到把神经元压缩在矩阵中的运算时才使用向量乘法,这个时候可以使用多个标量乘法分别运算代替,而上一种不行.
    '''
    
    d_w2 = np.matmul(a1, np.multiply(sigmoid_inv(z2), d_c_a2).T)  # (dim2,n) @ [(1,n) * (1,n)].T -> (dim2,dim3)
    d_b2 = np.matmul(np.ones((1,n)), np.multiply(sigmoid_inv(z2), d_c_a2).T).T  # (n,1) @ [(1,n) * (1,n)] -> (1,1)
    d_c_a1 = np.matmul(w2, np.multiply(sigmoid_inv(z2), d_c_a2))  # (dim2,1) * (1,,n) * (1,n) -> (2,n)

    d_w1 = np.matmul(x, np.multiply(sigmoid_inv(z1), d_c_a1).T)  # (dim1,n) @ [(dim2,n) * (dim2,n)].T -> (dim1,dim2) dim2 dimensions and dim3 neurons
    d_b1 = np.matmul(np.ones((1, n)), np.multiply(sigmoid_inv(z1), d_c_a1).T).T  # (1,n) @ [(dim2,n) * (dim2,n)] -> (1,dim2)
    
    w1_gss += d_w1 ** 2
    w2_gss += d_w2 ** 2
    b1_gss += d_b1 ** 2
    b2_gss += d_b2 ** 2
    
    w1_t = w1 - (lr/np.sqrt(w1_gss))*(d_w1)
    w2_t = w2 - (lr/np.sqrt(w2_gss))*(d_w2)
    b1_t = b1 - (lr/np.sqrt(b1_gss))*(d_b1)
    b2_t = b2 - (lr/np.sqrt(b2_gss))*(d_b2)
    
    w1 = w1_t
    w2 = w2_t
    b1 = b1_t
    b2 = b2_t
    
    loss_val = loss(a2,y)[0][0]
    print("loss:{:.5f}",loss_val)

loss:{:.5f} 83147.34938104285
loss:{:.5f} 48285.53399333073
loss:{:.5f} 32522.794212513352
loss:{:.5f} 24394.4531978326
loss:{:.5f} 20461.20677503954
loss:{:.5f} 18629.787451556826
loss:{:.5f} 17752.738341170454
loss:{:.5f} 17293.62629247608
loss:{:.5f} 17018.789233457675
loss:{:.5f} 16827.472236803405
loss:{:.5f} 16675.743788111104
loss:{:.5f} 16544.172725703495
loss:{:.5f} 16424.055048726706
loss:{:.5f} 16311.439730031956
loss:{:.5f} 16204.492406836833
loss:{:.5f} 16102.319067685226
loss:{:.5f} 16004.4396310772
loss:{:.5f} 15910.553789091913
loss:{:.5f} 15820.43853450369
loss:{:.5f} 15733.904602371691
loss:{:.5f} 15650.778821075712
loss:{:.5f} 15570.89751663152
loss:{:.5f} 15494.104395306891
loss:{:.5f} 15420.250077099974
loss:{:.5f} 15349.192120523145
loss:{:.5f} 15280.795101671163
loss:{:.5f} 15214.9306104659
loss:{:.5f} 15151.477142740341
loss:{:.5f} 15090.319905430679
loss:{:.5f} 15031.350560398409
loss:{:.5f} 14974.466930056036
loss:{:.5f} 14919.57268289338
loss:{:.5f} 14866.577