Neste notebook, vamos codificar Redes Neurais de forma manual para tentar entender intuitivamente como elas são implementadas na prática.

# Sumário

- [Exemplo 1](#Exemplo-1)
- [Exemplo 2](#Exemplo-2)
- [O que precisamos para implementar uma Rede Neural?](#O-que-precisamos-para-implementar-uma-Rede-Neural?)
- [Referências](#Referências)

# Imports e Configurações

In [1]:
import numpy as np

# Exemplo 1

<img src='../images/backprop_example_1.png'>

In [2]:
def sigmoid(x, derivative=False):
    if derivative:
        y = sigmoid(x)
        return y*(1-y)
    return 1.0/(1.0 + np.exp(-x))

In [6]:
x = np.array([[0.05, 0.10]])
y = np.array([[0.01, 0.99]])

w1 = np.array([[0.15, 0.20], [0.25, 0.30]])
b1 = np.array([[0.35]])
w2 = np.array([[0.40, 0.45], [0.50, 0.55]])
b2 = np.array([[0.60]])

learning_rate = 0.5

for i in range(1):
    # feed-forward
    # 1a camada
    inp1 = np.dot(x, w1.T) + b1
    h1 = sigmoid(inp1)
    
    # 2a camada
    inp2 = np.dot(h1, w2.T) + b2
    out = sigmoid(inp2) 
    
    cost = 0.5 * np.sum((y - out)**2)
    
    # backpropagation
    dout = -(y-out)
    
    # 2a camada
    dinp2 = sigmoid(inp2, derivative=True) * dout
    dw2 = np.dot(dinp2.T, h1)
    dh1 = np.dot(dinp2, w2)
    db2 = 1.0 + dinp2.sum(axis = 0, keepdims=True)
    
    # 1a camada
    dinp1 = sigmoid(inp1, derivative=True) * dh1
    dx = np.dot(inp1, w1)
    dw1 = np.dot(dinp1.T, x)
    db1 = 1.0 + dinp1.sum(axis = 0, keepdims = True)
    
    print(dw1)
    print(db1)
    
    w2 -= learning_rate*dw2
    b2 -= learning_rate*db2
    w1 -= learning_rate*dw1
    b1 -= learning_rate*db1
    
    print(w1,w2,b1,b2, sep='\n\n', end='\n\n\n')
    
    
    

[[0.00043857 0.00087714]
 [0.00049771 0.00099543]]
[[1.00877135 1.00995425]]


ValueError: non-broadcastable output operand with shape (1,1) doesn't match the broadcast shape (1,2)

# Exemplo 2

In [7]:
def linear(x, derivative=False):
    return np.ones_like(x) if derivative else x

def relu(x, derivative=False):
    if derivative:
        x = np.where(x <= 0, 0, 1)
    return np.maximum(0, x)

def softmax(x, y_oh=None, derivative=False):
    if derivative: 
        y_pred = softmax(x)
        y_correct = np.argmax(y_oh, axis=1)
        pk = y_pred[range(y_pred.shape[0]), y_correct]
        y_pred[range(y_pred.shape[0]), y_correct] = pk*(1.0 - pk)
        return y_pred
    exp = np.exp(x)
    return exp/np.sum(exp, axis=1, keepdims=True)

def neg_log_likelihood(y_oh, y_pred, derivative=False):
    y_correct = np.argmax(y_oh, axis=1)
    pk = y_pred[range(y_pred.shape[0]), y_correct]
    if derivative:
        y_pred[range(y_pred.shape[0]), y_correct] = (-1.0/pk)
        return y_pred
    return np.mean(-np.log(pk))

def softmax_neg_log_likelihood(y_oh, y_pred, derivative=False):
    y_softmax = softmax(y_pred)
    if derivative:
        y_correct = np.argmax(y_oh, axis=1)
        dlog = neg_log_likelihood(y_oh, y_softmax, derivative=True)
        dsoftmax = softmax(y_pred, y_oh, derivative=True)
        y_softmax[range(y_softmax.shape[0]), y_correct] = dlog[range(dlog.shape[0]), y_correct]*dsoftmax[range(dsoftmax.shape[0]), y_correct]
        return y_softmax/y_softmax.shape[0]
    return neg_log_likelihood(y_oh, y_softmax)

In [10]:
x = np.array([[0.1, 0.2, 0.7]])
y = np.array([[1, 0, 0]])
w1 = np.array([[0.1, 0.2, 0.3], [0.3, 0.2, 0.7], [0.4, 0.3, 0.9]])
b1 = np.ones((1,3))
w2 = np.array([[0.2, 0.3, 0.5], [0.3, 0.5, 0.7], [0.6, 0.4, 0.8]])
b2 = np.ones((1,3))
w3 = np.array([[0.1, 0.4, 0.8], [0.3, 0.7, 0.2], [0.5, 0.2, 0.9]])
b3 = np.ones((1,3))

learning_rate = 0.01

N, D = x.shape

for i in range(301):
    # feedforward
    # 1a camada
    inp1 = np.dot(x, w1.T) + b1 # [1x3]x[3x3] = [1x3]
    h1 = relu(inp1) # [1x3]
    
    # 2a camada
    inp2 = np.dot(h1, w2.T) + b2 # [1x3]x[3x3] = [1x3]
    h2 = sigmoid(inp2) # [1x3]
    
    # 3a camada
    inp3 = np.dot(h2, w3.T) + b3 # [1x3]x[3x3] = [1x3]
    out = linear(inp3) # [1x3]
    
    cost = softmax_neg_log_likelihood(y, out) # [1x3]
    
    # backpropagation
    dout = softmax_neg_log_likelihood(y, out)   # [1x3]
    
    # 3a camada
    dinp3 = linear(inp3, derivative=True) * dout    # [1x3]
    dh2 = np.dot(dinp3, w3) # [1x3]x[3x3] = [1x3]
    dw3 = np.dot(dinp3.T, dh2)   # [3x1]x[1x3] = [3x3]
    db3 = 1.0 * dinp3.sum(axis = 0, keepdims=True)  # [1x3]
    
    # 2a camada
    dinp2 = sigmoid(inp2, derivative=True) * dh2   # [1x3]
    dh1 = np.dot(dinp2, w2)  # [1x3]x[3x3] = [1x3]
    dw2 = np.dot(dinp2.T, h1)  # [3x3]x[1x3] = [3x3]
    db2 = 1.0 * dinp2.sum(axis = 0, keepdims = True)  # [1x3]
    
    # 1a camada
    dinp1 = relu(inp1, derivative=True) * dh1    # [1x3]
    dx = np.dot(dinp1, w1)
    dw1 = np.dot(dinp1.T, x)
    db1 = 1.0 * dinp1.sum(axis = 0, keepdims = True)
    
    w3 -= learning_rate * dw3
    b3 -= learning_rate * db3
    w2 -= learning_rate * dw2
    b2 -= learning_rate * db2
    w1 -= learning_rate * db1
    b1 -= learning_rate * db1
    
    #print(dinp3, dh2, dw3, db3, dinp2, dh1, dw2, db2, dinp1, dx, dw1, db1, sep = '\n')

    if i % 30 == 0:
        cost = softmax_neg_log_likelihood(y, out)
        print(cost)

for w in [w1, w2, w3]:
    print(w)

1.1674456052871238
1.1658353662418386
1.1653209601164782
1.1651685578907658
1.1651243857460674
1.165111663594719
1.1651080061023227
1.16510695515841
1.1651066532254692
1.1651065664848061
1.1651065415658637
[[0.08505481 0.18337918 0.2719905 ]
 [0.28505481 0.18337918 0.6719905 ]
 [0.38505481 0.28337918 0.8719905 ]]
[[0.17752115 0.27210045 0.46912126]
 [0.28399265 0.48013254 0.67801103]
 [0.58375428 0.37983654 0.77768356]]
[[-1.99998903e-01 -3.33317488e-02  1.66668982e-01]
 [ 1.09695175e-06  2.66668251e-01 -4.33331018e-01]
 [ 2.00001097e-01 -2.33331749e-01  2.66668982e-01]]


# O que precisamos para implementar uma Rede Neural?

# Referências

- [Neural Network from Scratch](https://beckernick.github.io/neural-network-scratch/)
- [Backpropagation Algorithm](https://theclevermachine.wordpress.com/tag/backpropagation-algorithm/)
- [Back-Propagation is very simple. Who made it Complicated ?](https://becominghuman.ai/back-propagation-is-very-simple-who-made-it-complicated-97b794c97e5c)
- [A Step by Step Backpropagation Example](https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/)
- [Understanding softmax and the negative log-likelihood](https://ljvmiranda921.github.io/notebook/2017/08/13/softmax-and-the-negative-log-likelihood/)