## Redes Neurais

### Perceptrons

In [1]:
import import_ipynb
from linear_algebra import Vector, dot

def step_function(x: float) -> float:
    return 1.0 if x >= 0 else 0

def perceptron_output(weights: Vector, bias: float, x: Vector) -> float:
    """Retorna 1 se o perceptron 'disparar', 0 se não"""
    calculation = dot(weights, x) + bias
    return step_function(calculation)

importing Jupyter notebook from linear_algebra.ipynb


In [2]:
# porta AND

and_weights = [2., 2]
and_bias = -3.

assert perceptron_output(and_weights, and_bias, [1, 1]) == 1
assert perceptron_output(and_weights, and_bias, [0, 1]) == 0
assert perceptron_output(and_weights, and_bias, [1, 0]) == 0
assert perceptron_output(and_weights, and_bias, [0, 0]) == 0

In [3]:
# porta OR

or_weights = [2., 2]
or_bias = -1.

assert perceptron_output(or_weights, or_bias, [1, 1]) == 1
assert perceptron_output(or_weights, or_bias, [0, 1]) == 1
assert perceptron_output(or_weights, or_bias, [1, 0]) == 1
assert perceptron_output(or_weights, or_bias, [0, 0]) == 0

In [4]:
# porta NOT

not_weights = [-2.]
not_bias = 1.

assert perceptron_output(not_weights, not_bias, [0]) == 1
assert perceptron_output(not_weights, not_bias, [1]) == 0

In [5]:
# porta XOR

and_gate = min
or_bias = max
xor_gate = lambda x, y: 0 if x == y else 1

### Redes Neurais Feed-Forward

In [6]:
# função Sigmoid

import math

def sigmoid(t: float) -> float:
    return 1 / (1 + math.exp(-t))

def neuron_output(weights: Vector, inputs: Vector) -> float:
    # o weights inclui o termo de viés, as entradas incluem um 1
    return sigmoid(dot(weights, inputs))

In [7]:
from typing import List

def feed_forward(neural_network: List[List[Vector]], input_vector: Vector) -> List[Vector]:
    """
    Alimenta o vetor de entrada na rede neural.
    Retorna as saídas de todas as camadas (não só a última).
    """
    outputs: List[Vector] = []

    for layer in neural_network:
        input_with_bias = input_vector + [1]                # adicione uma constante
        output = [neuron_output(neuron, input_with_bias)    # Compute a saída
                  for neuron in layer]                      # para cada neurônio.
        outputs.append(output)                              # Adicione os resultados.

        # Agora a entrada da próxima camada é a saída da última
        input_vector = output                               

    return outputs


xor_network = [ # camada oculta
                [[20., 20, -30],        # neurônio 'and'
                [20., 20, -10]],        # neurônio 'or'
                #  camada de saída
                [[-60., 60, -30]]]      # neurônio de '2ª entrada, mas não a 1ª entrada'

# o feed_forward retorna as saídas de todas as camadas para que [-1] receba a 
# saída final e para que [0] receba o valor do vetor resultante
assert 0.000 < feed_forward(xor_network, [0, 0])[-1][0] < 0.001
assert 0.999 < feed_forward(xor_network, [1, 0])[-1][0] < 1.000
assert 0.999 < feed_forward(xor_network, [0, 1])[-1][0] < 1.000
assert 0.000 < feed_forward(xor_network, [1, 1])[-1][0] < 0.001

### Retropropagação

In [8]:
# função para cálcular os gradientes

def sqerror_gradients(network: List[List[Vector]],
                      input_vector: Vector,
                      target_vector: Vector) -> List[List[Vector]]:
    """
    Quando houver uma rede neural, um vector de entrada e um vetor
    de destino, faça uma previsão e compute o gradient descendente da perda
    dos erros quadráticos com relação aos pesos dos neurônios.
    """
    # passe pra frente
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradientes associados às saídas de pré-ativação dos neurônios de saída
    output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, target_vector)]

    # gradientes associados aos pesos dos neurônios de saída
    output_grads = [[output_deltas[i] * hidden_output for hidden_output in hidden_outputs + [1]] for i, output_neuron in enumerate(network[-1])]

    # gradientes associados às saídas de pré-ativação dos neurônios ocultos
    hidden_deltas = [hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs)]

    # gradientes associados aos pesos dos neurônios
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]] for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]

In [12]:
# treinando a rede neural

import random

random.seed(0)

# dados de treinamento
xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
ys = [[0.], [1.], [1.], [0.]]

# comece com pesos aleatórios
network = [
    # camada oculta: 2 entradas -> 2 saídas
    [[random.random() for _ in range(2 + 1)],           # 1° neurônio oculto
    [random.random() for _ in range(2 + 1)]],           # 2° neurônio oculto
    # camada de saída: 2 entradas -> 1 saída
    [[random.random() for _ in range(2 + 1)]]           # 1° neurônio de saída
]

In [15]:
from gradient_descent import gradient_step
import tqdm

def train():
    learning_rate = 1.0

    for epoch in tqdm.trange(20000, desc="neural net for xor"):
        for x, y in zip(xs, ys):
            gradients = sqerror_gradients(network, x, y)

            # dê um passo de gradiente para cada neurônio de cada camada
            network = [
                [gradient_step(neuron, grad, -learning_rate)
                for neuron, grad in zip(layer, layer_grad)]
                for layer, layer_grad in zip(network, gradients)
            ]


    # verifique se a rede aprendeu XOR
    assert feed_forward(network, [0, 0])[-1][0] < 0.01
    assert feed_forward(network, [0, 1])[-1][0] > 0.99
    assert feed_forward(network, [1, 0])[-1][0] > 0.99
    assert feed_forward(network, [1, 1])[-1][0] < 0.01

neural net for xor: 100%|██████████| 20000/20000 [00:03<00:00, 6524.04it/s]


In [16]:
network

[[[6.953505610104289, 6.952785792366962, -3.1484761965046655],
  [5.115899442661922, 5.115407875835949, -7.839603434415663]],
 [[10.961705832630562, -11.63060534664317, -5.144229056613082]]]

### Exemplo: Fizz Buzz

In [17]:
# for i in range(1, 101):
#     if i % 3 == 0: print('fizz')
#     elif i % 5 == 0: print('buzz')
#     elif i % 15 == 0: print('fizzbuzz')
#     else: print(i)

1
2
fizz
4
buzz
fizz
7
8
fizz
buzz
11
fizz
13
14
fizz
16
17
fizz
19
buzz
fizz
22
23
fizz
buzz
26
fizz
28
29
fizz
31
32
fizz
34
buzz
fizz
37
38
fizz
buzz
41
fizz
43
44
fizz
46
47
fizz
49
buzz
fizz
52
53
fizz
buzz
56
fizz
58
59
fizz
61
62
fizz
64
buzz
fizz
67
68
fizz
buzz
71
fizz
73
74
fizz
76
77
fizz
79
buzz
fizz
82
83
fizz
buzz
86
fizz
88
89
fizz
91
92
fizz
94
buzz
fizz
97
98
fizz
buzz


In [18]:
def fizz_buzz_encode(x: int) -> Vector:
    if x % 15 == 0: return [0, 0, 0, 1]
    elif x % 5 == 0: return [0, 0, 1, 0]
    elif x % 3 == 0: return [0, 1, 0, 0]
    else: return [1, 0, 0, 0]

assert fizz_buzz_encode(2) == [1, 0, 0, 0]
assert fizz_buzz_encode(6) == [0, 1, 0, 0]
assert fizz_buzz_encode(10) == [0, 0, 1, 0]
assert fizz_buzz_encode(30) == [0, 0, 0, 1]

In [19]:
def binary_encode(x: int) -> Vector:
    binary: List[float] = []

    for i in range(10):
        binary.append(x % 2)
        x = x // 2
    
    return binary

assert binary_encode(0)         == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
assert binary_encode(1)         == [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
assert binary_encode(10)        == [0, 1, 0, 1, 0, 0, 0, 0, 0, 0]
assert binary_encode(101)       == [1, 0, 1, 0, 0, 1, 1, 0, 0, 0]
assert binary_encode(999)       == [1, 1, 1, 0, 0, 1, 1, 1, 1, 1]

In [20]:
xs = [binary_encode(n) for n in range(101, 1024)]
ys = [fizz_buzz_encode(n) for n in range(101, 1024)]

In [21]:
NUM_HIDDEN = 25

network = [
    # camada oculta: 10 entradas -> NUM_HIDDEN saídas
    [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)],
    # camada de saída: NUM_HIDDEN entradas -> 4 saídas
    [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)]
]

from linear_algebra import squared_distance

learning_rate = 1.0

def train():
    with tqdm.trange(500) as t:
        for epoch in t:
            epoch_loss = 0.0

            for x, y in zip(xs, ys):
                predicted = feed_forward(network, x)[-1]
                epoch_loss += squared_distance(predicted, y)
                gradients = sqerror_gradients(network, x, y)

                # Dê um passo de gradiente para cada neurônio de cada camada
                network = [
                    [gradient_step(neuron, grad, -learning_rate)
                    for neuron, grad in zip(layer, layer_grad)]
                    for layer, layer_grad in zip(network, gradients)
                ]
            
            t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})")

In [22]:
def argmax(xs: list) -> int:
    """Retorna o índice de maior valor"""
    return max(range(len(xs)), key=lambda i: xs[i])

assert argmax([0, -1]) == 0                     # itens[0] é o maior valor
assert argmax([-1, 0]) == 1                     # itens[1] é o maior valor
assert argmax([-1, 10, 5, 20, -3]) == 3         # itens[3] é o maior valor

In [23]:
def test():
    num_correct = 0

    for n in range(1, 101):
        x = binary_encode(n)
        predicted = argmax(feed_forward(network, x)[-1])
        actual = argmax(fizz_buzz_encode(n))
        labels = [str(n), 'fizz', 'buzz', 'fizzbuzz']
        print(n, labels[predicted], labels[actual])

        if predicted == actual:
            num_correct += 1

    print(num_correct, '/', 100)

1 1 1
2 2 2
3 3 fizz
4 fizzbuzz 4
5 5 buzz
6 fizzbuzz fizz
7 7 7
8 buzz 8
9 buzz fizz
10 10 buzz
11 buzz 11
12 fizzbuzz fizz
13 13 13
14 14 14
15 15 fizzbuzz
16 16 16
17 17 17
18 18 fizz
19 19 19
20 20 buzz
21 21 fizz
22 22 22
23 23 23
24 24 fizz
25 25 buzz
26 26 26
27 27 fizz
28 28 28
29 29 29
30 30 fizzbuzz
31 31 31
32 32 32
33 33 fizz
34 34 34
35 35 buzz
36 36 fizz
37 37 37
38 38 38
39 39 fizz
40 40 buzz
41 41 41
42 42 fizz
43 43 43
44 44 44
45 45 fizzbuzz
46 46 46
47 47 47
48 48 fizz
49 49 49
50 50 buzz
51 51 fizz
52 52 52
53 53 53
54 54 fizz
55 55 buzz
56 56 56
57 57 fizz
58 58 58
59 59 59
60 60 fizzbuzz
61 61 61
62 62 62
63 63 fizz
64 64 64
65 65 buzz
66 66 fizz
67 67 67
68 68 68
69 69 fizz
70 70 buzz
71 71 71
72 72 fizz
73 73 73
74 74 74
75 75 fizzbuzz
76 76 76
77 77 77
78 78 fizz
79 79 79
80 80 buzz
81 81 fizz
82 82 82
83 83 83
84 84 fizz
85 85 buzz
86 86 86
87 87 fizz
88 88 88
89 89 89
90 90 fizzbuzz
91 91 91
92 92 92
93 93 fizz
94 94 94
95 95 buzz
96 96 fizz
97 97 97
98 98 98