In [2]:
import numpy as np

In [8]:
def softmax_stabel(Z):
    # mỗi hàng là 1 tập hợp điểm
    e_z = np.exp(Z-np.max(Z, axis=1, keepdims=True))
    A = e_z/ e_z.sum(axis=1, keepdims=True)
    return A

def crossentropy_loss(Yhat, Y):
    """
    Yhat: (Npoints, nClasses)
    Y: (Npoints,)
    """
    id0 = range(Yhat.shape[0])
    return -np.mean(np.log(Yhat[id0,Y]))

def mlp_init(d0,d1,d2):
    #khởi tạo 
    '''
    d0: chiều của input
    d1: số unit của hidden layer1
    d2: số unit của ouput = số class 
    '''
    W1 = 0.01 * np.random.randn(d0,d1) #(d0,d1)
    b1 = np.zeros(d1)
    W2 = 0.01 * np.random.randn(d1, d2) #(d1,d2)
    b2 = np.zeros(d2)
    return W1,b1,W2,b2

def mlp_predict(X,y,W1,b1,W2,b2):
    '''
    X: (N,d0)
    y: (N, )
    W1: (d0,d1)
    W2: (d1,d2)
    '''
    Z1 = X.dot(W1) + b1 #(N,d1)
    A1 = np.maximum(Z1,0) # relu 
    Z2 = A1.dot(W2) + b2 #(N,d2)
    return np.argmax(Z2, axis=1)

def mlp_fit(X,y,W1,b1,W2,b2, lr):
    loss_hist=[]
    for i in range(20000):
        Z1 = X.dot(W1) + b1  #(N,d1)
        A1 = np.maximum(Z1,0) #(N,d1)
        Z2 = A1.dot(W2) + b2 #(N,d2)
        Yhat = softmax_stabel(Z2) #(N,d2)
        
        if i%1000==0:
            loss = crossentropy_loss(Yhat,y)
            #print(f'iter {i}, loss: {loss}')
        loss_hist.append(loss)
        
        # back propagation
        
        id0 = range(Yhat.shape[0])
        Yhat[id0,y]-=1
        E2 = Yhat/X.shape[0]  # (N,d2)
        dW2 = np.dot(A1.T, E2) # (d1,d2)
        db2 = np.sum(E2, axis=0) # (d2,)
        E1 = np.dot(E2, W2.T) # (N,d1)
        E1[Z1<=0]=0
        dW1 = np.dot(X.T, E1)
        db1 = np.sum(E1, axis=0)
        
        # gradient descent update
        
        W1+=-lr*dW1
        b1+=-lr*db1
        W2+=-lr*dW2
        b2+=-lr*db2
        
        return W1,b1,W2,b2,loss_hist
        