In [105]:
import numpy as np
np.random.seed(0)

In [110]:
np.random.seed(0)    
np.random.randn(10)

array([ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799,
       -0.97727788,  0.95008842, -0.15135721, -0.10321885,  0.4105985 ])

In [121]:
def sigmoid(z, grad=False):
    if grad:
        return sigmoid(z) * (1 - sigmoid(z))
    return 1 / (1 + np.exp(-z))

def relu(z, grad=False):
    if grad:
        return np.where(z <= 0, 0, 1)
    return np.where(z <= 0, 0, z)

def tanh(z, grad=False):
    if grad:
        return 1 - (tanh(z)) ** 2
    return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))

def leaky_relu(z, slope, grad=False):
    if grad:
        return np.where(z <= 0, slope, 1)
    return np.where(z <= 0, slope*z, z)

def log_loss(a, y, grad=False):
    if grad:
        return (a - y) / (a * (1 - a))
    return np.where(y == 0, -np.log(1 - a), -np.log(a))

def init_weights(random=True):
    global x, first_hidden, second_hidden
    np.random.seed(0)
    if random:
        W_1 = np.random.random((x_dim, first_hidden))           # первая матрица весов, соединяющая 0 и 1 слой
        b_1 = np.random.random((1, first_hidden))             # первый вектор смещений, соединяющая 0 и 1 слой
        W_2 = np.random.random((first_hidden, second_hidden))   # вторая матрица весов, соединяющая 1 и 2 слой
        b_2 = np.random.random((1, second_hidden))            # второй вектор весов, соединяющая 1 и 2 слой
        return W_1, b_1, W_2, b_2
    W_1 = np.zeros((x_dim, first_hidden))      
    b_1 = np.zeros((1, first_hidden))          
    W_2 = np.zeros((first_hidden, second_hidden)) 
    b_2 = np.zeros((1, second_hidden))            
    return W_1, b_1, W_2, b_2


In [122]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

# загрузка данных для бинарной классификации
x = data['data']
y = data['target'].reshape(-1, 1)
x = (x - np.mean(x)) / np.std(x)

first_hidden = 64     # 4 нейрона в первом скрытом
second_hidden = 1    # 1 нейрон на выходе

x_n, x_dim = x.shape

W_1, b_1, W_2, b_2 = init_weights(random=True)

In [123]:
parameters = {
    '0_hidden': x_dim,
    'n_layers': 2,
    '1_hidden': 64,
    '2_hidden': 1,
    'weights': {}
}


def init_basic_weight(x_dim, z_dim):
    return (np.random.random((x_dim, z_dim)), np.random.random((1, z_dim)))


def init_model(parameters):
    for i in range(model['n_layers']):
        w_i, b_i = init_basic_weight(model[f'{i}_hidden'], model[f'{i+1}_hidden'])
        
        model['weights'].update({
                f'W_{i}': w_i,
                f'b_{i}': b_i
            })
    return model


model = init_model(parameters)

In [124]:
# Юнит-тесты для проверки правильной спецификации параметров

assert (x.shape == (x_n, x_dim))
assert (y.shape == (x_n, 1))
assert (W_1.shape == (x_dim, first_hidden))
assert (b_1.shape == (1, first_hidden))
assert (W_2.shape == (first_hidden, second_hidden))
assert (b_2.shape == (1, second_hidden))

In [132]:
W_1, b_1, W_2, b_2 = init_weights(random=True) 

PRINT_EVERY = 10
EPOCHS = 100
lr = 0.1
slope = 0.05
activation = 'tanh'

def train_iters(W_1, b_1, W_2, b_2, EPOCHS, lr, slope, activation):
    losses = []
    for i in range(EPOCHS):
        # forward pass
        z_1 = x @ W_1 + b_1
        a_1 = eval(activation)(z_1)
        z_2 = a_1 @ W_2 + b_2
        a_2 = sigmoid(z_2)
        loss = log_loss(a_2, y).mean()
        losses.append(loss)
        if i % PRINT_EVERY == 0:
            print('Log-loss: {:.3f}, Iter: {}'.format(loss, i))

        # backward pass
        da_2 = log_loss(a_2, y, grad=True)
        dz_2 = da_2 * sigmoid(z_2, grad=True)
        dw_2 = (1 / x_n) * a_1.T @ dz_2
        db_2 = dz_2.mean(axis=0, keepdims=True)

        dw_1 = (1 / x_n) * x.T @ (W_2.T * dz_2 * eval(activation)(z_1, grad=True))
        db_1 = (W_2.T * dz_2 * eval(activation)(z_1, grad=True)).mean(axis=0, keepdims=True)

        W_1 -= lr * dw_1
        W_2 -= lr * dw_2
        b_1 -= lr * db_1
        b_2 -= lr * db_2
    return losses

losses = train_iters(W_1, b_1, W_2, b_2, EPOCHS, lr, slope, activation)

Log-loss: 15.910, Iter: 0
Log-loss: 0.548, Iter: 10
Log-loss: 0.265, Iter: 20
Log-loss: 0.243, Iter: 30
Log-loss: 0.236, Iter: 40
Log-loss: 0.231, Iter: 50
Log-loss: 0.227, Iter: 60
Log-loss: 0.224, Iter: 70
Log-loss: 0.222, Iter: 80
Log-loss: 0.219, Iter: 90


In [152]:
stats = []
for i in np.arange(0, 1, 0.1):
    W_1, b_1, W_2, b_2 = init_weights(random=True) 
    stats.append(min(train_iters(W_1, b_1, W_2, b_2, EPOCHS, i, slope, activation)))

Log-loss: 15.910, Iter: 0
Log-loss: 15.910, Iter: 10
Log-loss: 15.910, Iter: 20
Log-loss: 15.910, Iter: 30
Log-loss: 15.910, Iter: 40
Log-loss: 15.910, Iter: 50
Log-loss: 15.910, Iter: 60
Log-loss: 15.910, Iter: 70
Log-loss: 15.910, Iter: 80
Log-loss: 15.910, Iter: 90
Log-loss: 15.910, Iter: 0
Log-loss: 0.548, Iter: 10
Log-loss: 0.265, Iter: 20
Log-loss: 0.243, Iter: 30
Log-loss: 0.236, Iter: 40
Log-loss: 0.231, Iter: 50
Log-loss: 0.227, Iter: 60
Log-loss: 0.224, Iter: 70
Log-loss: 0.222, Iter: 80
Log-loss: 0.219, Iter: 90
Log-loss: 15.910, Iter: 0
Log-loss: 0.256, Iter: 10
Log-loss: 0.233, Iter: 20
Log-loss: 0.226, Iter: 30
Log-loss: 0.220, Iter: 40
Log-loss: 0.217, Iter: 50
Log-loss: 0.214, Iter: 60
Log-loss: 0.211, Iter: 70
Log-loss: 0.209, Iter: 80
Log-loss: 0.207, Iter: 90
Log-loss: 15.910, Iter: 0
Log-loss: 0.236, Iter: 10
Log-loss: 0.224, Iter: 20
Log-loss: 0.217, Iter: 30
Log-loss: 0.213, Iter: 40
Log-loss: 0.210, Iter: 50
Log-loss: 0.207, Iter: 60
Log-loss: 0.204, Iter: 70
Log

In [156]:
print('Best LR is', np.arange(0, 1, 0.1)[stats.index(min(stats))])

Best LR is 0.4
