In [1]:
# MIT License
#
# Copyright (c) 2020 Fagner Cunha
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# Redes Neurais Completamente Conectadas com TensorFlow

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/alcunha/nn-with-tf/blob/master/Fully-Connected-Neural-Networks-with-TF.ipynb"><img src="./images/colab_logo_32px.png" />Executar no Google Colab</a>
  </td>
  <td>
    <a href="https://github.com/alcunha/nn-with-tf/blob/master/Fully-Connected-Neural-Networks-with-TF.ipynb"><img src="./images/GitHub-Mark-32px.png" />Ver código no GitHub</a>
  </td>
</table>

*“NÃO ENTRE EM PÂNICO”*

(O Guia do Mochileiro das Galáxias)

In [2]:
! pip install tensorflow pandas



In [3]:
import tensorflow as tf
import pandas as pd

from tensorflow import keras

## Tensores

Tensores são arrays multi-dimensionais com um mesmo tipo (**dtype**).

#### Rank 0 ("Escalar")

In [4]:
a = tf.constant(4)
b = tf.constant(3.2)
c = tf.constant('casa')

In [5]:
print(a)
print(b)
print(c)

tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(3.2, shape=(), dtype=float32)
tf.Tensor(b'casa', shape=(), dtype=string)


#### Rank 1 ("Vetor")

In [6]:
list1 = tf.constant([4, 3])
list2 = tf.constant([3.2, 2])
list3 = tf.constant(['casa', 'rua'])

In [7]:
print(list1)
print(list2)
print(list3)

tf.Tensor([4 3], shape=(2,), dtype=int32)
tf.Tensor([3.2 2. ], shape=(2,), dtype=float32)
tf.Tensor([b'casa' b'rua'], shape=(2,), dtype=string)


Lista com tipos diferentes vai gerar erro:

In [8]:
#list4 = tf.constant(['casa', 4])

#### Rank 2 ("Matriz")

In [9]:
mat1 = tf.constant([[2, 3],
                    [4, 5]])
mat2 = tf.constant([[1, 1],
                    [1, 1.]])

In [10]:
print(mat1)
print(mat2)

tf.Tensor(
[[2 3]
 [4 5]], shape=(2, 2), dtype=int32)
tf.Tensor(
[[1. 1.]
 [1. 1.]], shape=(2, 2), dtype=float32)


Convertendo os valores de um tensor para o NumPy:

In [11]:
mat1.numpy()

array([[2, 3],
       [4, 5]], dtype=int32)

In [12]:
zeros = tf.zeros([2, 3], dtype=tf.int32)
print(zeros)

tf.Tensor(
[[0 0 0]
 [0 0 0]], shape=(2, 3), dtype=int32)


In [13]:
ones = tf.ones([3, 3, 2], dtype=tf.float32)
print(ones)

tf.Tensor(
[[[1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]]], shape=(3, 3, 2), dtype=float32)


In [14]:
tf.constant([1, 2, 3, 4, 5, 6], shape=(2,3), dtype=tf.float32)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

### Operações

In [15]:
a = tf.cast(a, tf.float32)

In [16]:
a + b

<tf.Tensor: shape=(), dtype=float32, numpy=7.2>

In [17]:
tf.add(a, b)

<tf.Tensor: shape=(), dtype=float32, numpy=7.2>

In [18]:
print(list1)
print(list1.shape)
print(list1.dtype)
print(list1 + tf.ones(list1.shape, dtype=list1.dtype))

tf.Tensor([4 3], shape=(2,), dtype=int32)
(2,)
<dtype: 'int32'>
tf.Tensor([5 4], shape=(2,), dtype=int32)


Multiplicação de matrizes

In [19]:
mat1 = tf.constant([[1, 2, 3],
                   [4, 5, 6]])

mat2 = tf.constant([
    [1, 2, 3, 4],
    [1, 2, 3, 4],
    [1, 2, 3, 4],
])

In [20]:
print(mat1.shape)
print(mat2.shape)

(2, 3)
(3, 4)


In [21]:
tf.matmul(mat1, mat2)

<tf.Tensor: shape=(2, 4), dtype=int32, numpy=
array([[ 6, 12, 18, 24],
       [15, 30, 45, 60]], dtype=int32)>

In [22]:
mat1 * mat1

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 1,  4,  9],
       [16, 25, 36]], dtype=int32)>

#### Bônus: Números complexos

In [23]:
tf.complex(a, b)

<tf.Tensor: shape=(), dtype=complex64, numpy=(4+3.2j)>

## Redes Neurais Artificiais

Redes neurais artificiais são modelos computacionais inspirados na estrutura do sistema
nervoso de animais que adquire conhecimento por meio da experiência.

Um neurônio articial (do tipo Perceptron) é composto por pesos W que multiplicam a entrada X e uma função de ativação f para determinar a saída Y.

<img src="./images/Perceptron-bias.png" width="600">
<center>Figura 1: Esquema de um neurônio articial (Perceptron)</center>
<center>Fonte: [1]</center>

No exemplo da figura acima:

y = f(x1\*w1 + x2\*w2)

y2 = f(x1\*w1 + x2\*w2 + b)

#### Rede neural completamente conectada

Os neurônios podem ser dispostos em camadas e várias dessas camadas podem ser encadeadas até a saída da rede. Neurônios em uma camada completamente conectada tem conexões a todas as "saídas" da camada anterior. Matematicamente, os pesos da rede podem ser representados como matrizes.

<img src="./images/MultiLayer_Neural_Network.png" width="600">
<center>Figura 2: Esquema de uma rede completamente conectada</center>
<center>Fonte: [2]</center>

Durante o treinamento, os pesos de uma rede neural são otimizados de acordo com o erro que é retropropagado ao longo das camadas.

## Exemplo Prático: NN para o dataset Titanic

1. Pré-processamento
2. Construção do pipeline de dados
3. Construção do modelo
4. Treinamento
5. Avaliação do modelo e visualização dos resultados

#### Pré-processamento do dataset

In [24]:
TITANIC_TRAIN_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
TITANIC_TEST_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"

In [25]:
titanic_train_path = keras.utils.get_file('titanic_train.csv', TITANIC_TRAIN_URL)
titanic_test_path = keras.utils.get_file('titanic_test.csv', TITANIC_TEST_URL)

In [26]:
train_df = pd.read_csv(titanic_train_path)
train_df.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [27]:
selected_columns = ['class', 'sex', 'age', 'n_siblings_spouses', 'fare', 'survived']
train_df = train_df[selected_columns]
train_df.head()

Unnamed: 0,class,sex,age,n_siblings_spouses,fare,survived
0,Third,male,22.0,1,7.25,0
1,First,female,38.0,1,71.2833,1
2,Third,female,26.0,0,7.925,1
3,First,female,35.0,1,53.1,1
4,Third,male,28.0,0,8.4583,0


In [28]:
print(train_df['class'].unique())
print(train_df['sex'].unique())

['Third' 'First' 'Second']
['male' 'female']


Feature enconde:

In [29]:
pd.get_dummies(train_df['class']).head()

Unnamed: 0,First,Second,Third
0,0,0,1
1,1,0,0
2,0,0,1
3,1,0,0
4,0,0,1


In [30]:
train_df = pd.concat([train_df, pd.get_dummies(train_df['class']), pd.get_dummies(train_df['sex'])], axis=1)
train_df.head()

Unnamed: 0,class,sex,age,n_siblings_spouses,fare,survived,First,Second,Third,female,male
0,Third,male,22.0,1,7.25,0,0,0,1,0,1
1,First,female,38.0,1,71.2833,1,1,0,0,1,0
2,Third,female,26.0,0,7.925,1,0,0,1,1,0
3,First,female,35.0,1,53.1,1,1,0,0,1,0
4,Third,male,28.0,0,8.4583,0,0,0,1,0,1


In [31]:
train_df = train_df.drop(columns=['class', 'sex'])
train_df.head()

Unnamed: 0,age,n_siblings_spouses,fare,survived,First,Second,Third,female,male
0,22.0,1,7.25,0,0,0,1,0,1
1,38.0,1,71.2833,1,1,0,0,1,0
2,26.0,0,7.925,1,0,0,1,1,0
3,35.0,1,53.1,1,1,0,0,1,0
4,28.0,0,8.4583,0,0,0,1,0,1


In [32]:
train_df_labels = train_df[['survived']].copy()
train_df_labels.head()

Unnamed: 0,survived
0,0
1,1
2,1
3,1
4,0


In [33]:
train_df_features = train_df.drop(columns=['survived'])
train_df_features.head()

Unnamed: 0,age,n_siblings_spouses,fare,First,Second,Third,female,male
0,22.0,1,7.25,0,0,1,0,1
1,38.0,1,71.2833,1,0,0,1,0
2,26.0,0,7.925,0,0,1,1,0
3,35.0,1,53.1,1,0,0,1,0
4,28.0,0,8.4583,0,0,1,0,1


Replicando para o conjunto de teste:

In [34]:
test_df = pd.read_csv(titanic_test_path)
test_df = test_df[selected_columns]
test_df = pd.concat([test_df, pd.get_dummies(test_df['class']), pd.get_dummies(test_df['sex'])], axis=1)
test_df = test_df.drop(columns=['class', 'sex'])
test_df_labels = test_df[['survived']].copy()
test_df_features = test_df.drop(columns=['survived'])

In [35]:
test_df_features.head()

Unnamed: 0,age,n_siblings_spouses,fare,First,Second,Third,female,male
0,35.0,0,8.05,0,0,1,0,1
1,54.0,0,51.8625,1,0,0,0,1
2,58.0,0,26.55,1,0,0,1,0
3,55.0,0,16.0,0,1,0,1,0
4,34.0,0,13.0,0,1,0,0,1


In [36]:
test_df_labels.head()

Unnamed: 0,survived
0,0
1,0
2,1
3,1
4,1


#### Construindo o pipeline de dados (tf.data.Dataset)

O `tf.data` é uma API do TensorFlow que permite construir pipelines de dados. Essa API é altamente flexível, permitindo a construção de pipelines complexos a partir de operações simples.

In [37]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_df_features.values, train_df_labels.values))
test_dataset = tf.data.Dataset.from_tensor_slices((test_df_features.values, test_df_labels.values))

In [38]:
for features, label in train_dataset.take(2):
    print(features)
    print(label)

tf.Tensor([22.    1.    7.25  0.    0.    1.    0.    1.  ], shape=(8,), dtype=float64)
tf.Tensor([0], shape=(1,), dtype=int64)
tf.Tensor([38.      1.     71.2833  1.      0.      0.      1.      0.    ], shape=(8,), dtype=float64)
tf.Tensor([1], shape=(1,), dtype=int64)


In [39]:
SHUFFLE_BUFFER_SIZE = len(train_df_labels)
TEST_LENGHT = len(test_df_labels)
BATCH_SIZE = 16
EPOCHS = 20

In [40]:
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE)
train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=True)
train_dataset = train_dataset.repeat(EPOCHS)

In [41]:
for features, label in train_dataset.take(1):
    print(features)
    print(label)

tf.Tensor(
[[25.      1.     91.0792  1.      0.      0.      0.      1.    ]
 [39.      0.     13.      0.      1.      0.      0.      1.    ]
 [28.      0.      7.775   0.      0.      1.      0.      1.    ]
 [ 2.      0.     10.4625  0.      0.      1.      1.      0.    ]
 [21.      0.      8.4333  0.      0.      1.      0.      1.    ]
 [17.      0.     12.      0.      1.      0.      1.      0.    ]
 [28.      0.      7.75    0.      0.      1.      0.      1.    ]
 [19.      0.      7.8958  0.      0.      1.      0.      1.    ]
 [46.      0.     26.      0.      1.      0.      0.      1.    ]
 [28.      0.      7.8958  0.      0.      1.      0.      1.    ]
 [55.5     0.      8.05    0.      0.      1.      0.      1.    ]
 [18.      0.     13.      0.      1.      0.      0.      1.    ]
 [18.      1.     17.8     0.      0.      1.      1.      0.    ]
 [60.      1.     39.      0.      1.      0.      0.      1.    ]
 [22.      0.     49.5     1.      0.      0.      

No teste não precisamos aleatorizar as instâncias:

In [42]:
test_dataset = test_dataset.batch(BATCH_SIZE, drop_remainder=True).repeat(EPOCHS)

Pergunta: Por que não deveríamos utilizar `rop_remainder` na avaliação de um experimento real?

### Construindo o modelo

In [52]:
model = keras.Sequential([
    keras.Input(shape=(8,)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(32),
    keras.layers.Activation('relu'),
    keras.layers.Dense(1)
])

In [53]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 32)                288       
_________________________________________________________________
dense_7 (Dense)              (None, 32)                1056      
_________________________________________________________________
activation_2 (Activation)    (None, 32)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 33        
Total params: 1,377
Trainable params: 1,377
Non-trainable params: 0
_________________________________________________________________


In [54]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer='adam',
    metrics=['accuracy'])

### Treinamento

In [55]:
history = model.fit(train_dataset,
                      epochs=EPOCHS,
                      validation_data=test_dataset,
                      validation_steps=TEST_LENGHT // BATCH_SIZE)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Referências

[1] File:Perceptron-bias.svg. Disponível em: https://commons.wikimedia.org/wiki/File:Perceptron-bias.svg. Acesso em: 03 de julho de 2020.

[2] File:Multi-Layer Neural Network-Vector.svg. Disponível em: https://commons.wikimedia.org/wiki/File:Multi-Layer_Neural_Network-Vector.svg. Acesso em: 03 de julho de 2020.