# 0. Dependências

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder

%matplotlib inline

# 1. Introdução

# 2. Dados 

Os dados de cada tipo de problema (regressão, classificação binária e multiclasse) estão definidos nos testes da implementação.

# 3. Implementação 

###  Funções de ativação

In [2]:
def linear(x, derivative=False):
    return np.ones_like(x) if derivative else x

def sigmoid(x, derivative=False):
    if derivative:
        y = sigmoid(x)
        return y*(1 - y)
    return 1.0/(1.0 + np.exp(-x))

def tanh(x, derivative=False):
    if derivative:
        y = tanh(x)
        return 1 - y**2
    return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))

def relu(x, derivative=False):
    if derivative:
        return np.where(x <= 0, 0, 1)
    return np.maximum(0, x)

def leaky_relu(x, derivative=False):
    alpha = 0.1
    if derivative:
        return np.where(x <= 0, alpha, 1)
    return np.where(x <= 0, alpha*x, x)

def elu(x, derivative=False):
    alpha = 1.0
    if derivative:
        y = elu(x)
        return np.where(x <= 0, y + alpha, 1)
    return np.where(x <= 0, alpha*(np.exp(x) - 1), x)

# other functions
def softmax(x, y_oh=None, derivative=False):
    if derivative: 
        y_pred = softmax(x)
        y_correct = np.argmax(y_oh, axis=1)
        pk = y_pred[range(y_pred.shape[0]), y_correct]
        y_pred[range(y_pred.shape[0]), y_correct] = pk*(1.0 - pk)
        return y_pred
    exp = np.exp(x)
    return exp/np.sum(exp, axis=1, keepdims=True)

def neg_log_likelihood(y_oh, y_pred, derivative=False):
    y_correct = np.argmax(y_oh, axis=1)
    pk = y_pred[range(y_pred.shape[0]), y_correct]
    if derivative:
        y_pred[range(y_pred.shape[0]), y_correct] = (-1.0/pk)
        return y_pred
    return np.mean(-np.log(pk))

### Funções de Custo 

In [3]:
# cost functions
def mae(y, y_pred, derivative=False):
    if derivative:
        return np.where(y_pred > y, 1, -1) / y.shape[0]
    return np.mean(np.abs(y - y_pred))

def mse(y, y_pred, derivative=False):
    if derivative:
        return -(y - y_pred) / y.shape[0]
    return 0.5*np.mean((y - y_pred)**2)

def binary_cross_entropy(y, y_pred, derivative=False):
    if derivative:
        return -(y - y_pred) / (y_pred * (1-y_pred) * y.shape[0])
    return -np.mean(y*np.log(y_pred) + (1-y)*np.log(1-y_pred))

def softmax_neg_log_likelihood(y_oh, y_pred, derivative=False):
    y_softmax = softmax(y_pred)
    y_correct = np.argmax(y_oh, axis=1)
    pk = y_softmax[range(y_softmax.shape[0]), y_correct]
    if derivative:
        return -(y_oh - y_softmax)/y_oh.shape[0]
    return np.mean(-np.log(pk))

### Checagem dos Gradientes

In [4]:
def __compute_approx_grads(nn, x, y, eps=1e-4):
    approx_grads = []
    feed_forward = lambda inp: nn._NeuralNetwork__feedforward(inp)

    for layer in nn.layers:
        w_ori = layer.weights.copy()
        w_ravel = w_ori.ravel()
        w_shape = w_ori.shape

        for i in range(w_ravel.size):
            w_plus = w_ravel.copy()
            w_plus[i] += eps
            layer.weights = w_plus.reshape(w_shape)
            J_plus = nn.cost_func(y, feed_forward(x))

            w_minus = w_ravel.copy()
            w_minus[i] -= eps
            layer.weights = w_minus.reshape(w_shape)
            J_minus = nn.cost_func(y, feed_forward(x))
            approx_grads.append((J_plus - J_minus) / (2.0*eps))
        layer.weights = w_ori

    return approx_grads

def gradient_checking(nn, x, y, eps=1e-4, verbose=False, verbose_precision=5):
    from copy import deepcopy
    nn_copy = deepcopy(nn)

    nn.fit(x, y, epochs=0)
    grads = np.concatenate([layer._dweights.ravel() for layer in nn.layers])

    approx_grads = __compute_approx_grads(nn_copy, x, y, eps)

    is_close = np.allclose(grads, approx_grads)
    print("{}".format("\033[92mGRADIENTS OK" if is_close else "\033[91mGRADIENTS FAIL"))

    norm_num = np.linalg.norm(grads - approx_grads)
    norm_den = np.linalg.norm(grads) + np.linalg.norm(approx_grads)
    error = norm_num / norm_den
    print("Relative error:", error)

    if verbose:
        np.set_printoptions(precision=verbose_precision, linewidth=200, suppress=True)
        print("Gradientes:", grads)
        print("Aproximado:", np.array(approx_grads))

In [5]:
class Layer():
    def __init__(self, input_dim, output_dim, activation=linear):
        self.input = None
        self.weights = np.random.randn(output_dim, input_dim)
        self.biases = np.random.randn(1, output_dim)
        self.activation = activation

        self._activ_inp, self._activ_out = None, None
        self._dweights, self._dbiases = None, None

class NeuralNetwork():
    def __init__(self, cost_func=mse, learning_rate=1e-3):
        self.layers = []
        self.cost_func = cost_func
        self.learning_rate = learning_rate
        
    def fit(self, x_train, y_train, epochs=100, verbose=10):
        for epoch in range(epochs+1):
            y_pred = self.__feedforward(x_train)
            self.__backprop(y_train, y_pred)
            
            if epoch % verbose == 0:
                loss_train = self.cost_func(y_train, self.predict(x_train))
                print("epoch: {0:=4}/{1} loss_train: {2:.8f}".format(epoch, epochs, loss_train))
        
    def predict(self, x):
        return self.__feedforward(x)
    
    def __feedforward(self, x):
        self.layers[0].input = x
        for current_layer, next_layer in zip(self.layers, self.layers[1:] + [Layer(0, 0)]):
            y = np.dot(current_layer.input, current_layer.weights.T) + current_layer.biases
            current_layer._activ_inp = y
            current_layer._activ_out = next_layer.input = current_layer.activation(y)
        return self.layers[-1]._activ_out
    
    def __backprop(self, y, y_pred):
        last_delta = self.cost_func(y, y_pred, derivative=True)
        for layer in reversed(self.layers):
            dactivation = layer.activation(layer._activ_inp, derivative=True) * last_delta
            last_delta = np.dot(dactivation, layer.weights)
            layer._dweights = np.dot(dactivation.T, layer.input)
            layer._dbiases = 1.0*dactivation.sum(axis=0, keepdims=True)
        
        for layer in reversed(self.layers):
            layer.weights = layer.weights - self.learning_rate*layer._dweights
            layer.biases = layer.biases - self.learning_rate*layer._dbiases

# 4. Teste 

In [6]:
np.random.seed(1234)
N, D = 100, 2
x = np.random.rand(N, D)
y = np.random.rand(N, 1)

### Regressão

In [7]:
D_in, D_out = x.shape[1], y.shape[1]
nn = NeuralNetwork(cost_func=mse, learning_rate=1e-3)
nn.layers.append(Layer(input_dim=D_in, output_dim=4, activation=relu))
nn.layers.append(Layer(input_dim=4, output_dim=1, activation=tanh))
nn.layers.append(Layer(input_dim=1, output_dim=2, activation=sigmoid))
nn.layers.append(Layer(input_dim=2, output_dim=5, activation=leaky_relu))
nn.layers.append(Layer(input_dim=5, output_dim=3, activation=elu))
nn.layers.append(Layer(input_dim=3, output_dim=D_out, activation=linear))

nn.fit(x, y, epochs=100)
gradient_checking(nn, x, y, eps=1e-4, verbose=False)

epoch:    0/100 loss_train: 0.09443909
epoch:   10/100 loss_train: 0.08027429
epoch:   20/100 loss_train: 0.06997904
epoch:   30/100 loss_train: 0.06244557
epoch:   40/100 loss_train: 0.05690320
epoch:   50/100 loss_train: 0.05280786
epoch:   60/100 loss_train: 0.04977095
epoch:   70/100 loss_train: 0.04751232
epoch:   80/100 loss_train: 0.04582842
epoch:   90/100 loss_train: 0.04457045
epoch:  100/100 loss_train: 0.04362907
epoch:    0/0 loss_train: 0.04354904
[92mGRADIENTS OK
Relative error: 9.639094044703491e-08


###  Classificação Binária

In [8]:
y = np.random.randint(0, 2, (N, 1))
D_in, D_out = x.shape[1], y.shape[1]

nn = NeuralNetwork(cost_func=binary_cross_entropy, learning_rate=1e-3)
nn.layers.append(Layer(input_dim=D_in, output_dim=4, activation=relu))
nn.layers.append(Layer(input_dim=4, output_dim=1, activation=tanh))
nn.layers.append(Layer(input_dim=1, output_dim=2, activation=sigmoid))
nn.layers.append(Layer(input_dim=2, output_dim=5, activation=leaky_relu))
nn.layers.append(Layer(input_dim=5, output_dim=3, activation=elu))
nn.layers.append(Layer(input_dim=3, output_dim=D_out, activation=sigmoid))

nn.fit(x, y, epochs=100)
gradient_checking(nn, x, y, eps=1e-4, verbose=False)

epoch:    0/100 loss_train: 0.89516990
epoch:   10/100 loss_train: 0.88638120
epoch:   20/100 loss_train: 0.87791389
epoch:   30/100 loss_train: 0.86975853
epoch:   40/100 loss_train: 0.86190582
epoch:   50/100 loss_train: 0.85434657
epoch:   60/100 loss_train: 0.84707176
epoch:   70/100 loss_train: 0.84007248
epoch:   80/100 loss_train: 0.83334002
epoch:   90/100 loss_train: 0.82686579
epoch:  100/100 loss_train: 0.82064138
epoch:    0/0 loss_train: 0.82003236
[92mGRADIENTS OK
Relative error: 8.649640788422605e-10


### Classificação Multiclasse

In [9]:
y = np.random.randint(0, 2, (N, 1))
y_oh = OneHotEncoder(sparse=False, categories='auto').fit_transform(y)
D_in, D_out = x.shape[1], y_oh.shape[1]

nn = NeuralNetwork(cost_func=softmax_neg_log_likelihood, learning_rate=1e-3)
nn.layers.append(Layer(input_dim=D_in, output_dim=4, activation=relu))
nn.layers.append(Layer(input_dim=4, output_dim=1, activation=tanh))
nn.layers.append(Layer(input_dim=1, output_dim=2, activation=sigmoid))
nn.layers.append(Layer(input_dim=2, output_dim=5, activation=leaky_relu))
nn.layers.append(Layer(input_dim=5, output_dim=3, activation=elu))
nn.layers.append(Layer(input_dim=3, output_dim=D_out, activation=linear))

nn.fit(x, y, epochs=100)
gradient_checking(nn, x, y_oh, eps=1e-4, verbose=False)

epoch:    0/100 loss_train: 0.12266334
epoch:   10/100 loss_train: 0.12896237
epoch:   20/100 loss_train: 0.13561368
epoch:   30/100 loss_train: 0.14264357
epoch:   40/100 loss_train: 0.15003052
epoch:   50/100 loss_train: 0.15641980
epoch:   60/100 loss_train: 0.16203421
epoch:   70/100 loss_train: 0.16764310
epoch:   80/100 loss_train: 0.17338276
epoch:   90/100 loss_train: 0.17926592
epoch:  100/100 loss_train: 0.18529324
epoch:    0/0 loss_train: 1.09074880
[92mGRADIENTS OK
Relative error: 1.466199242106662e-09


# 5. Referências 

- [Notebook completo sobre Redes Neurais](https://github.com/arnaldog12/Manual-Pratico-Deep-Learning/blob/master/Rede%20Neural.ipynb)