In [205]:
import pandas as pd
import h5py
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

np.random.seed(1)

In [206]:
def load_dataset():
    train_dataset = h5py.File('catsvsdogs-train.h5', "r")
    train_set_x_orig = np.array(train_dataset["images"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["labels"][:]) # your train set labels

    test_dataset = h5py.File('catsvsdogs-test.h5', "r")
    test_set_x_orig = np.array(test_dataset["images"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["labels"][:]) # your test set labels
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig

In [207]:
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig = load_dataset()

In [208]:
train_set_y_orig.shape

(1, 5000)

In [209]:
# Convert X to 2D array
X_train, X_validation, y_train, y_validation = train_test_split(train_set_x_orig.flatten().reshape((len(train_set_x_orig),-1)) / 255, train_set_y_orig.T, test_size = 0.2, random_state = 42)
X_test = test_set_x_orig.flatten().reshape((len(test_set_x_orig),-1)) / 255

# Convert y to 1D array
y_train = y_train.reshape(1, -1)
y_validation = y_validation.reshape(1, -1)
y_test = test_set_y_orig.reshape(1, -1)

In [210]:
def Sigmoid(z):
    return 1/(1 + np.exp(-z))

In [211]:
def Sigmoid_derivative(Z):
    s = 1/(1+np.exp(-Z))
    dZ = s * (1-s)
    return dZ

In [212]:
def Relu(Z):
    return np.maximum(0, Z)

In [213]:
def Relu_derivative(Z):
    temp = np.ones(Z.shape)
    temp[Z<0] = 0
    return temp

In [214]:
def Loss(y, y_hat):
    return np.sum((1./y.shape[1]) * (-np.multiply(y,np.log(y_hat)) - np.multiply(1-y, np.log(1-y_hat))))

In [215]:
def Init_layer(X, y, num_layer = 3, num_node = 108, method='randn', random_state = 1):    
    if random_state == 1:
        np.random.seed(1)

    param = {}
    dims = []
    dims.append(X.shape[1])
    for i in range(num_layer):
        dims.append(num_node)
    dims.append(y.shape[0])

    if method == 'randn':
        for i in range(1, num_layer+2):
            param['W'+str(i)] = np.random.randn(dims[i],dims[i-1]) / np.sqrt(dims[i-1]) 
            param['b'+str(i)] = np.random.randn(dims[i],1)                      
    elif method == 'normal':
        for i in range(1, num_layer+2):
            param['W'+str(i)] = np.random.normal(0, 1/y.shape[1], (dims[i],dims[i-1])) 
            param['b'+str(i)] = np.ones((dims[i],1))   

    return param, dims

In [216]:
def feed_forward(X, y, param, dims):
    ch = {}
    ch['A0'] = X.T
    k = len(dims) - 1

    for i in range(1, k+1):
        Z = param['W'+str(i)].dot(ch['A'+str(i-1)]) + param['b'+str(i)] 
        if i == k:
            A = Sigmoid(Z)
        else:
            A = Relu(Z)
        ch['Z'+str(i)],ch['A'+str(i)]=Z,A

    loss=Loss(y, ch['A'+str(k)])
    return ch, loss

In [217]:
def backward(y, ch, param, dims, lr, regulization = None):
    if regulization == 'l2':
        regu = 1
    else:
        regu = 0
    grad = {}
    k = len(dims) - 1
    m = y.shape[1]
    for i in range(1, k+1)[::-1]:
        if i == k:
            e = (ch['A'+str(k)] - y) / m 
        else:
            e = np.dot(param['W'+str(i+1)].T, grad['E'+str(i+1)]) * Relu_derivative(ch['Z'+str(i)])
        grad['E'+str(i)] = e
        grad['dW'+str(i)] = np.dot(e, ch['A'+str(i-1)].T) + 0.01 * param['W'+str(i)] * regu
        grad['db'+str(i)] = np.sum(e)

    return grad

In [229]:
# normal neural net

iteration = 2000
param, dims = Init_layer(X_train, y_train, 4, 35)
lr = 0.1
test_error = [1000]
train_error = []
k = len(dims) - 1
for i in range(iteration):
    mini_batch = np.random.randint(0, X_train.shape[0], 64)
    X_batch = X_train[mini_batch, :]
    y_batch = y_train[0, mini_batch].reshape(1, -1)
    ch, loss = feed_forward(X_batch, y_batch, param, dims)
    grad = backward(y_batch, ch, param, dims, lr)
        
    for j in range(1, k+1):
        param['W'+str(j)] -= lr * grad['dW'+str(j)]
        param['b'+str(j)] -= lr * grad['db'+str(j)]
   
    if i % 100 == 0:
        J_train = feed_forward(X_train, y_train, param, dims)[1]
        print('iteration: ',i, ' Loss: ', J_train)
    if i%100 == 0:
        predicts = feed_forward(X_validation, y_validation, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5  
        predicts_test = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
        print('accuracy score validation: ', accuracy_score(y_validation.T, predicts.T))    
        print('accuracy score test: ', accuracy_score(y_test.T, predicts_test.T))       

    if (i%500 == 0):
        lr /= 2   
        print(lr)

iteration:  0  Loss:  1.9938894504470903
accuracy score validation:  0.511
accuracy score test:  0.49
0.05
iteration:  100  Loss:  0.6944525762087818
accuracy score validation:  0.489
accuracy score test:  0.51
iteration:  200  Loss:  0.6925333231175099
accuracy score validation:  0.493
accuracy score test:  0.516
iteration:  300  Loss:  0.6826650059042804
accuracy score validation:  0.555
accuracy score test:  0.566
iteration:  400  Loss:  0.6798588196551001
accuracy score validation:  0.552
accuracy score test:  0.558
iteration:  500  Loss:  0.6941153384029657
accuracy score validation:  0.514
accuracy score test:  0.536
0.025
iteration:  600  Loss:  0.6689219164105051
accuracy score validation:  0.572
accuracy score test:  0.574
iteration:  700  Loss:  0.6707496574987832
accuracy score validation:  0.588
accuracy score test:  0.61
iteration:  800  Loss:  0.6538746476675177
accuracy score validation:  0.598
accuracy score test:  0.61
iteration:  900  Loss:  0.69019054959131
accuracy 

In [None]:
for i in range(iteration):
    mini_batch = np.random.randint(0, X_train.shape[0], 64)
    X_batch = X_train[mini_batch, :]
    y_batch = y_train[0, mini_batch].reshape(1, -1)
    ch, loss = feed_forward(X_batch, y_batch, param, dims)
    grad = backward(y_batch, ch, param, dims, lr)
        
    for j in range(1, k+1):
        param['W'+str(j)] -= lr * grad['dW'+str(j)]
        param['b'+str(j)] -= lr * grad['db'+str(j)]
   
    if i % 100 == 0:
        J_train = feed_forward(X_train, y_train, param, dims)[1]
        print('iteration: ',i, ' Loss: ', J_train)
    if i%100 == 0:
        predicts = feed_forward(X_validation, y_validation, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5  
        predicts_test = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
        print('accuracy score validation: ', accuracy_score(y_validation.T, predicts.T))    
        print('accuracy score test: ', accuracy_score(y_test.T, predicts_test.T))       

    if ((i%500 == 0) & (i>1)):
        lr /= 2   
        print(lr)

iteration:  0  Loss:  0.5927351522329951
accuracy score validation:  0.62
accuracy score test:  0.612
iteration:  100  Loss:  0.6546455481989716
accuracy score validation:  0.556
accuracy score test:  0.558
iteration:  200  Loss:  0.6156413840588764
accuracy score validation:  0.579
accuracy score test:  0.574
iteration:  300  Loss:  0.5894124436866046
accuracy score validation:  0.597
accuracy score test:  0.598
iteration:  400  Loss:  0.5813653792594595
accuracy score validation:  0.612
accuracy score test:  0.606
iteration:  500  Loss:  0.5762449187740519
accuracy score validation:  0.615
accuracy score test:  0.62
0.003125
iteration:  600  Loss:  0.5873580397455468
accuracy score validation:  0.609
accuracy score test:  0.626
iteration:  700  Loss:  0.5590800686008373
accuracy score validation:  0.618
accuracy score test:  0.626
iteration:  800  Loss:  0.5889452117860532
accuracy score validation:  0.59
accuracy score test:  0.584
iteration:  900  Loss:  0.55876001955813
accuracy s

In [226]:
for i in range(iteration):
    mini_batch = np.random.randint(0, X_train.shape[0], 64)
    X_batch = X_train[mini_batch, :]
    y_batch = y_train[0, mini_batch].reshape(1, -1)
    ch, loss = feed_forward(X_batch, y_batch, param, dims)
    grad = backward(y_batch, ch, param, dims, lr)
        
    for j in range(1, k+1):
        param['W'+str(j)] -= lr * grad['dW'+str(j)]
        param['b'+str(j)] -= lr * grad['db'+str(j)]
   
    if i % 100 == 0:
        J_train = feed_forward(X_train, y_train, param, dims)[1]
        print('iteration: ',i, ' Loss: ', J_train)
    if i%100 == 0:
        predicts = feed_forward(X_validation, y_validation, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5  
        predicts_test = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
        print('accuracy score validation: ', accuracy_score(y_validation.T, predicts.T))    
        print('accuracy score test: ', accuracy_score(y_test.T, predicts_test.T))       

    if (i%500 == 0):
        lr /= 2   
        print(lr)

iteration:  0  Loss:  0.6126793293249149
accuracy score validation:  0.605
accuracy score test:  0.614
0.0015625
iteration:  100  Loss:  0.6161912234125029
accuracy score validation:  0.588
accuracy score test:  0.616
iteration:  200  Loss:  0.6076229628920211
accuracy score validation:  0.595
accuracy score test:  0.628
iteration:  300  Loss:  0.6062038001760329
accuracy score validation:  0.605
accuracy score test:  0.622
iteration:  400  Loss:  0.6086605069199281
accuracy score validation:  0.598
accuracy score test:  0.62
0.00078125
iteration:  500  Loss:  0.6056425787115369
accuracy score validation:  0.609
accuracy score test:  0.612
iteration:  600  Loss:  0.6046704309278774
accuracy score validation:  0.607
accuracy score test:  0.638
iteration:  700  Loss:  0.6011355178182889
accuracy score validation:  0.603
accuracy score test:  0.624
iteration:  800  Loss:  0.6006937856118263
accuracy score validation:  0.602
accuracy score test:  0.616
0.000390625
iteration:  900  Loss:  0

In [228]:
for i in range(iteration):
    mini_batch = np.random.randint(0, X_train.shape[0], 64)
    X_batch = X_train[mini_batch, :]
    y_batch = y_train[0, mini_batch].reshape(1, -1)
    ch, loss = feed_forward(X_batch, y_batch, param, dims)
    grad = backward(y_batch, ch, param, dims, lr)
        
    for j in range(1, k+1):
        param['W'+str(j)] -= lr * grad['dW'+str(j)]
        param['b'+str(j)] -= lr * grad['db'+str(j)]
   
    if i % 100 == 0:
        J_train = feed_forward(X_train, y_train, param, dims)[1]
        print('iteration: ',i, ' Loss: ', J_train)
    if i%100 == 0:
        predicts = feed_forward(X_validation, y_validation, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5  
        predicts_test = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
        print('accuracy score validation: ', accuracy_score(y_validation.T, predicts.T))    
        print('accuracy score test: ', accuracy_score(y_test.T, predicts_test.T))       

    if ((i%400 == 0) & (lr > 0.00003)):
        lr /= 2   
        print(lr)

iteration:  0  Loss:  0.5936369442085235
accuracy score validation:  0.602
accuracy score test:  0.644
4.8828125e-05
iteration:  100  Loss:  0.5933838803694436
accuracy score validation:  0.609
accuracy score test:  0.636
iteration:  200  Loss:  0.5932685450790218
accuracy score validation:  0.608
accuracy score test:  0.64
iteration:  300  Loss:  0.5932768925173137
accuracy score validation:  0.606
accuracy score test:  0.638
iteration:  400  Loss:  0.5933758797952782
accuracy score validation:  0.603
accuracy score test:  0.636
2.44140625e-05
iteration:  500  Loss:  0.5931016846822831
accuracy score validation:  0.609
accuracy score test:  0.64
iteration:  600  Loss:  0.5930781883456697
accuracy score validation:  0.607
accuracy score test:  0.638
iteration:  700  Loss:  0.5930002308161553
accuracy score validation:  0.606
accuracy score test:  0.642
iteration:  800  Loss:  0.592970509655866
accuracy score validation:  0.606
accuracy score test:  0.64
iteration:  900  Loss:  0.592952

In [223]:
for i in range(iteration):
    mini_batch = np.random.randint(0, X_train.shape[0], 64)
    X_batch = X_train[mini_batch, :]
    y_batch = y_train[0, mini_batch].reshape(1, -1)
    ch, loss = feed_forward(X_batch, y_batch, param, dims)
    grad = backward(y_batch, ch, param, dims, lr)
        
    for j in range(1, k+1):
        param['W'+str(j)] -= lr * grad['dW'+str(j)]
        param['b'+str(j)] -= lr * grad['db'+str(j)]
   
    if i % 100 == 0:
        J_train = feed_forward(X_train, y_train, param, dims)[1]
        print('iteration: ',i, ' Loss: ', J_train)
    if i%100 == 0:
        predicts = feed_forward(X_validation, y_validation, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5  
        predicts_test = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
        print('accuracy score validation: ', accuracy_score(y_validation.T, predicts.T))    
        print('accuracy score test: ', accuracy_score(y_test.T, predicts_test.T)) 

iteration:  0  Loss:  0.5452434182862218
accuracy score validation:  0.632
accuracy score test:  0.652
iteration:  100  Loss:  0.545183132067119
accuracy score validation:  0.635
accuracy score test:  0.65
iteration:  200  Loss:  0.5452125587998857
accuracy score validation:  0.636
accuracy score test:  0.658
iteration:  300  Loss:  0.5449398988546333
accuracy score validation:  0.633
accuracy score test:  0.65
iteration:  400  Loss:  0.5448683650385318
accuracy score validation:  0.636
accuracy score test:  0.652
iteration:  500  Loss:  0.5446528355866211
accuracy score validation:  0.631
accuracy score test:  0.652
iteration:  600  Loss:  0.5446290090049842
accuracy score validation:  0.634
accuracy score test:  0.65
iteration:  700  Loss:  0.5444459603127434
accuracy score validation:  0.63
accuracy score test:  0.65
iteration:  800  Loss:  0.544603619128703
accuracy score validation:  0.635
accuracy score test:  0.654
iteration:  900  Loss:  0.5441866252434995
accuracy score valida

KeyboardInterrupt: 

In [94]:
# init W with normal distribution

iteration = 2000
param, dims = Init_layer(X_train, y_train, 4, 35, method='normal')
lr = 0.05
test_error = [1000]
train_error = []
k = len(dims) - 1
for i in range(iteration):
    mini_batch = np.random.randint(0, X_train.shape[0], 64)
    X_batch = X_train[mini_batch, :]
    y_batch = y_train[0, mini_batch].reshape(1, -1)
    ch, loss = feed_forward(X_batch, y_batch, param, dims)
    grad = backward(y_batch, ch, param, dims, lr)
        
    for j in range(1, k+1):
        param['W'+str(j)] -= lr * grad['dW'+str(j)]
        param['b'+str(j)] -= lr * grad['db'+str(j)]
   
    if i % 100 == 0:
        J_train = feed_forward(X_train, y_train, param, dims)[1]
        print('iteration: ',i, ' Loss: ', J_train)
    if i%100 == 0:
        predicts = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
        print('accuracy score: ', accuracy_score(y_test.T, predicts.T))       
    if ((i%300==0) & (i < 2100)):
        lr /= 2        

iteration:  0  Loss:  0.6955095821083643
accuracy score:  0.49
iteration:  100  Loss:  0.6941612657646118
accuracy score:  0.51
iteration:  200  Loss:  0.6948577463266178
accuracy score:  0.49
iteration:  300  Loss:  0.6931529473043672
accuracy score:  0.51
iteration:  400  Loss:  0.695208482255957
accuracy score:  0.51


KeyboardInterrupt: 

In [44]:
param['W1'].shape

(35, 49152)

In [52]:
## Nesterov 

iteration = 2000
param, dims = Init_layer(X_train, y_train, 4, 35)
param_pre = {}
lr = 0.003
test_error = [1000]
train_error = []
k = len(dims) - 1
vt, vb = {}, {}

for i in range(iteration):
    mini_batch = np.random.randint(0, X_train.shape[0], 64)
    X_batch = X_train[mini_batch, :]
    y_batch = y_train[0, mini_batch].reshape(1, -1)
    ch, loss = feed_forward(X_batch, y_batch, param, dims)
    grad = backward(y_batch, ch, param, dims, lr)
 
    for j in range(1, k+1):
        param_pre['W'+str(j)] = param['W'+str(j)] - lr * grad['dW'+str(j)]
        param_pre['b'+str(j)] = param['b'+str(j)] - lr * grad['db'+str(j)]   
        
    ch_pre, loss_pre = feed_forward(X_batch, y_batch, param_pre, dims)
    grad_pre = backward(y_batch, ch_pre, param_pre, dims, lr)
    
    for j in range(1, k+1):
        if i == 0:
            vt['V'+str(j)] = lr * grad_pre['dW'+str(j)]
            vb['b'+str(j)] = lr * grad_pre['db'+str(j)]
        else:
            vt['V'+str(j)] = 0.9*vt['V'+str(j)] + lr * grad_pre['dW'+str(j)]
            vb['b'+str(j)] = 0.9*vb['b'+str(j)] + lr * grad_pre['db'+str(j)]
        param['W'+str(j)] -= vt['V'+str(j)]
        param['b'+str(j)] -= vb['b'+str(j)]
   
    if i % 100 == 0:
        J_train = feed_forward(X_train, y_train, param, dims)[1]
        print('iteration: ',i, ' Loss: ', J_train)
    if i%100 == 0:
        predicts = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
        print('accuracy score: ', accuracy_score(y_test.T, predicts.T))       
    if ((i%300==0) & (i < 2100)):
        lr /= 2  

iteration:  0  Loss:  0.8871804176405083
accuracy score:  0.51


KeyboardInterrupt: 

In [53]:
## Nesterov with standardscaler

from sklearn.preprocessing import StandardScaler
X_train_scale = StandardScaler().fit_transform(X_train)
X_test_scale = StandardScaler().fit_transform(X_test)

iteration = 3000
param, dims = Init_layer(X_train, y_train, 4, 35)
param_pre = {}
lr = 0.003
test_error = [1000]
train_error = []
k = len(dims) - 1
vt, vb = {}, {}

for i in range(iteration):
    mini_batch = np.random.randint(0, X_train_scale.shape[0], 64)
    X_batch = X_train_scale[mini_batch, :]
    y_batch = y_train[0, mini_batch].reshape(1, -1)
    ch, loss = feed_forward(X_batch, y_batch, param, dims)
    grad = backward(y_batch, ch, param, dims, lr)
 
    for j in range(1, k+1):
        param_pre['W'+str(j)] = param['W'+str(j)] - lr * grad['dW'+str(j)]
        param_pre['b'+str(j)] = param['b'+str(j)] - lr * grad['db'+str(j)]   
        
    ch_pre, loss_pre = feed_forward(X_batch, y_batch, param_pre, dims)
    grad_pre = backward(y_batch, ch_pre, param_pre, dims, lr)
    
    for j in range(1, k+1):
        if i == 0:
            vt['V'+str(j)] = lr * grad_pre['dW'+str(j)]
            vb['b'+str(j)] = lr * grad_pre['db'+str(j)]
        else:
            vt['V'+str(j)] = 0.9*vt['V'+str(j)] + lr * grad_pre['dW'+str(j)]
            vb['b'+str(j)] = 0.9*vb['b'+str(j)] + lr * grad_pre['db'+str(j)]
        param['W'+str(j)] -= vt['V'+str(j)]
        param['b'+str(j)] -= vb['b'+str(j)]
   
    if i % 100 == 0:
        J_train = feed_forward(X_train_scale, y_train, param, dims)[1]
        print('iteration: ',i, ' Loss: ', J_train)
    if i%100 == 0:
        predicts = feed_forward(X_test_scale, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
        print('accuracy score: ', accuracy_score(y_test.T, predicts.T))       
    if ((i%300==0) & (i < 2100)):
        lr /= 2  

iteration:  0  Loss:  0.9704499954256084
accuracy score:  0.51
iteration:  100  Loss:  0.5974081422903474
accuracy score:  0.592
iteration:  200  Loss:  0.5273934843292094
accuracy score:  0.622
iteration:  300  Loss:  0.4641967823611979
accuracy score:  0.618
iteration:  400  Loss:  0.3676355282946672
accuracy score:  0.624
iteration:  500  Loss:  0.31023648415954586
accuracy score:  0.604
iteration:  600  Loss:  0.24018278939045262
accuracy score:  0.608
iteration:  700  Loss:  0.1663220202829015
accuracy score:  0.61
iteration:  800  Loss:  0.14359585109620937
accuracy score:  0.644
iteration:  900  Loss:  0.10208230017430005
accuracy score:  0.628
iteration:  1000  Loss:  0.08094159644830227
accuracy score:  0.638
iteration:  1100  Loss:  0.07172721464890514
accuracy score:  0.624
iteration:  1200  Loss:  0.057891172865939367
accuracy score:  0.62
iteration:  1300  Loss:  0.04956170070121617
accuracy score:  0.624
iteration:  1400  Loss:  0.04648986230826758
accuracy score:  0.632


KeyboardInterrupt: 

In [82]:
# gradient bossting

lr = 0.005
k = len(dims) - 1
X_train_gra = X_train
y_train_gra = y_train
model_param = {}
model_predict = np.zeros((1, y_test.shape[1]))

for num_tree in range(1,10):
    param, dims = Init_layer(X_train, y_train, 4, 35)

    for i in range(10):
        mini_batch = np.random.randint(0, X_train.shape[0], 64)
        X_batch = X_train_gra[mini_batch, :]
        y_batch = y_train_gra[0, mini_batch].reshape(1, -1)
        ch, loss = feed_forward(X_batch, y_batch, param, dims)
        grad = backward(y_batch, ch, param, dims, lr)
        
        for j in range(1, k+1):
            param['W'+str(j)] -= lr * grad['dW'+str(j)]
            param['b'+str(j)] -= lr * grad['db'+str(j)]

        if i % 100 == 0:
            J_train = feed_forward(X_train, y_train, param, dims)[1]
            print('iteration: ',i, ' Loss: ', J_train)
        if i%100 == 0:
            predicts = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
    
    model_param[str(num_tree)+'model'] = param
    predict_train = feed_forward(X_train_gra, y_train_gra, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5 
    model_predict += predicts
    print('accuracy score: ', accuracy_score(y_test.T, model_predict.T))  

iteration:  0  Loss:  0.7610668796365103
accuracy score:  0.51
iteration:  0  Loss:  0.7610668796365103
accuracy score:  0.51
iteration:  0  Loss:  0.7610668796365103
accuracy score:  0.51
iteration:  0  Loss:  0.7610668796365103
accuracy score:  0.51


KeyboardInterrupt: 

In [103]:
iteration = 200
test_error = [1000]
train_error = []
k = len(dims) - 1
all_model_prediction = np.zeros((1, y_test.shape[1]))
X_train_gra = X_train
y_train_gra = y_train

for num_model in range(2):
    lr = 0.05
    param, dims = Init_layer(X_train_gra, y_train_gra, 4, 35)
    for i in range(101):
        mini_batch = np.random.randint(0, X_train.shape[0], 64)
        X_batch = X_train_gra[mini_batch, :]
        y_batch = y_train_gra[0, mini_batch].reshape(1, -1)
        ch, loss = feed_forward(X_batch, y_batch, param, dims)
        grad = backward(y_batch, ch, param, dims, lr)

        for j in range(1, k+1):
            param['W'+str(j)] -= lr * grad['dW'+str(j)]
            param['b'+str(j)] -= lr * grad['db'+str(j)]

        if i%100 == 0:
            print(feed_forward(X_train_gra, y_train_gra, param, dims)[1])
            predicts = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
            print('accuracy score: ', accuracy_score(y_test.T, predicts.T))       
        if (((i%200==0) & (i < 1000)) | ((i%500==0) & (i > 0.0001))):
            lr /= 2  
    y_train_predict = feed_forward(X_train_gra, y_train_gra, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5 
    print(y_train_predict)
    y_test_predict = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5 
    y_train_gra -= y_train_predict
    all_model_prediction += y_test_predict

0.23866094262372387
accuracy score:  0.51
0.30613918040231886
accuracy score:  0.51
[[False False False ... False False False]]
0.23866094262372387
accuracy score:  0.51


KeyboardInterrupt: 

In [152]:
iteration = 500
lr = 0.05
test_error = [1000]
train_error = []
k = len(dims) - 1
all_predict = np.zeros((1, y_test.shape[1]))
for num in range(3):
    param, dims = Init_layer(X_train, y_train, 4, 35)
    lr = 0.05
    for i in range(iteration):
        mini_batch = np.random.randint(0, X_train.shape[0], 64)
        X_batch = X_train[mini_batch, :]
        y_batch = y_train[0, mini_batch].reshape(1, -1)
        ch, loss = feed_forward(X_batch, y_batch, param, dims)
        grad = backward(y_batch, ch, param, dims, lr)

        for j in range(1, k+1):
            param['W'+str(j)] -= lr * grad['dW'+str(j)]
            param['b'+str(j)] -= lr * grad['db'+str(j)]

        if i % 100 == 0:
            J_train = feed_forward(X_train, y_train, param, dims)[1]
            print('iteration: ',i, ' Loss: ', J_train)
        if i%100 == 0:
            predicts = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5   
            print('accuracy score: ', accuracy_score(y_test.T, predicts.T))       
        if (((i%200==0) & (i < 1000)) | ((i%500==0) & (i > 0.0001))):
            lr /= 2        

    y_train_predict = feed_forward(X_train, y_train, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5  
    y_train -= y_train_predict
    y_test_predict = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5  
    all_predict += y_test_predict

iteration:  0  Loss:  0.7637806535320173
accuracy score:  0.49
iteration:  100  Loss:  0.6887826298440516
accuracy score:  0.534
iteration:  200  Loss:  0.6817449639844069
accuracy score:  0.554
iteration:  300  Loss:  0.6782380934493155
accuracy score:  0.548
iteration:  400  Loss:  0.6642642021853835
accuracy score:  0.62
iteration:  0  Loss:  0.13350048979445994
accuracy score:  0.51
iteration:  100  Loss:  0.03332931646988882
accuracy score:  0.51
iteration:  200  Loss:  0.031557945135830184
accuracy score:  0.51
iteration:  300  Loss:  0.09343441667893834
accuracy score:  0.51
iteration:  400  Loss:  0.09150456184566605
accuracy score:  0.51
iteration:  0  Loss:  0.13350048979445994
accuracy score:  0.51
iteration:  100  Loss:  0.03332931646988882
accuracy score:  0.51
iteration:  200  Loss:  0.031557945135830184
accuracy score:  0.51
iteration:  300  Loss:  0.09343441667893834
accuracy score:  0.51
iteration:  400  Loss:  0.09150456184566605
accuracy score:  0.51


In [153]:
accuracy_score(y_test.T, all_predict.T)

0.616

In [91]:
from sklearn.metrics import accuracy_score
predicts = feed_forward(X_test, y_test, param, dims)[0]['A'+str(len(dims)-1)] >= 0.5
print('accuracy score: ', accuracy_score(y_test.T, predicts.T))

accuracy score:  0.51
