## Trabalho de Conclusão de Semestre - Redes Neurais e Algoritmos Genéticos
# NOME DO TRABALHO

#### Autores:
Alice Kageyama (220049), Haziel Sixto Baden Sanchez Hermoza (220065), Monyque Karoline de Paula Silva (220063), Pedro Thomazelli Ferreira (220058)

<hr>

In [1]:
# Importações:

import torch
import torch.nn as nn
import torch.optim as optim

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [2]:
# Setup inicial:

TAMANHO_TESTE = 0.1
SEMENTE_ALEATORIA = 61455
ARQUIVO = 'dados/elnino.csv'

df = pd.read_csv(ARQUIVO, header=0)
df = df.drop(columns=['Observation', ' Date'])

# Removendo dados faltantes:
df = df[df != '.']
df.dropna(how='any', inplace=True)
df = df.astype(float)
df = df.reset_index(drop=True)

display(df)

Unnamed: 0,Year,Month,Day,Latitude,Longitude,Zonal Winds,Meridional Winds,Humidity,Air Temp,Sea Surface Temp
0,93.0,5.0,9.0,-0.02,-109.96,-2.1,2.1,81.2,26.80,27.02
1,93.0,5.0,10.0,-0.02,-109.96,-3.4,1.4,84.2,26.95,26.91
2,93.0,5.0,11.0,-0.02,-109.96,-3.8,2.2,84.9,26.98,26.78
3,93.0,5.0,12.0,-0.02,-109.96,-3.0,1.5,86.9,26.93,26.74
4,93.0,5.0,13.0,-0.02,-109.96,-4.5,1.9,87.6,27.01,26.82
...,...,...,...,...,...,...,...,...,...,...
93930,98.0,6.0,10.0,8.95,-140.33,-6.8,-5.3,81.3,27.52,28.17
93931,98.0,6.0,11.0,8.96,-140.33,-5.1,-0.4,94.1,26.04,28.14
93932,98.0,6.0,12.0,8.96,-140.32,-4.3,-3.3,93.2,25.80,27.87
93933,98.0,6.0,13.0,8.95,-140.34,-6.1,-4.8,81.3,27.17,27.93


In [3]:
# Definindo os dados de treino e teste:

FEATURES = [df.columns[0],df.columns[1],df.columns[2],df.columns[3],df.columns[4],df.columns[5],df.columns[6],df.columns[7],df.columns[8]]
TARGET = [df.columns[9]]

indices = df.index

indices_treino, indices_teste = train_test_split(
    indices, test_size=TAMANHO_TESTE, random_state=SEMENTE_ALEATORIA
)

df_treino = df.loc[indices_treino]
df_teste = df.loc[indices_teste]

x_treino = df_treino.reindex(FEATURES, axis=1)
y_treino = df_treino.reindex(TARGET, axis=1)
x_teste = df_teste.reindex(FEATURES, axis=1)
y_teste = df_teste.reindex(TARGET, axis=1)

In [4]:
# Normalização:

normalizador_x = MinMaxScaler()
normalizador_y = MinMaxScaler()

normalizador_x.fit(x_treino)
normalizador_y.fit(y_treino)

x_treino = normalizador_x.transform(x_treino)
y_treino = normalizador_y.transform(y_treino)
x_teste = normalizador_x.transform(x_teste)
y_teste = normalizador_y.transform(y_teste)

In [5]:
# Criando tensores:

x_treino = torch.tensor(x_treino, dtype=torch.float32)
y_treino = torch.tensor(y_treino, dtype=torch.float32)
x_teste = torch.tensor(x_teste, dtype=torch.float32)
y_teste = torch.tensor(y_teste, dtype=torch.float32)

In [6]:
# Checando os dados:

print(x_treino)
print()
print(y_treino)

tensor([[0.4444, 1.0000, 0.3000,  ..., 0.4110, 0.6862, 0.7207],
        [0.8889, 0.4545, 0.1667,  ..., 0.5169, 0.6130, 0.8314],
        [0.7778, 0.9091, 0.0000,  ..., 0.4195, 0.6841, 0.4884],
        ...,
        [0.4444, 0.3636, 0.4333,  ..., 0.3856, 0.7301, 0.6780],
        [0.8889, 0.6364, 0.0000,  ..., 0.3051, 0.6025, 0.7359],
        [0.3333, 0.4545, 0.9000,  ..., 0.4831, 0.6506, 0.5810]])

tensor([[0.8132],
        [0.8444],
        [0.4786],
        ...,
        [0.7121],
        [0.8444],
        [0.5377]])


In [7]:
# Criando a classe da rede neural:

class OS_MENININHOS(nn.Module):
    def __init__(
        self, num_dados_entrada, neuronios_c1, neuronios_c2, num_targets
    ):
        super().__init__()

        # Definindo as camadas da rede:
        
        self.camadas = nn.Sequential(
                    nn.Linear(num_dados_entrada, neuronios_c1),
                    nn.ReLU(),
                    nn.Linear(neuronios_c1, neuronios_c2),
                    nn.ReLU(),
                    nn.Linear(neuronios_c2, num_targets),
                )

    def forward(self, x):
        """ Esse é o método que executa a rede do pytorch."""
        x = self.camadas(x)
        return x

In [8]:
# Criando uma instância da rede:

NUM_DADOS_DE_ENTRADA = x_treino.shape[1]
NUM_DADOS_DE_SAIDA = y_treino.shape[1]
NEURONIOS_C1 = 150
NEURONIOS_C2 = 63

minha_redezinha = OS_MENININHOS(NUM_DADOS_DE_ENTRADA, NEURONIOS_C1, NEURONIOS_C2, NUM_DADOS_DE_SAIDA)

In [9]:
# Checando os parâmetros internos:

for p in minha_redezinha.parameters():
    print(p)

Parameter containing:
tensor([[-0.0989, -0.0319,  0.0858,  ..., -0.0386, -0.2719, -0.2380],
        [-0.1810,  0.2028,  0.3276,  ...,  0.1885,  0.0264,  0.1665],
        [-0.0175, -0.0251, -0.1135,  ..., -0.0528, -0.2724,  0.0371],
        ...,
        [ 0.1519,  0.2429,  0.1478,  ..., -0.1728, -0.0305,  0.0437],
        [-0.0662,  0.1966, -0.0931,  ..., -0.2576, -0.0398,  0.1684],
        [ 0.3211, -0.1320,  0.2386,  ..., -0.2191, -0.1264,  0.2597]],
       requires_grad=True)
Parameter containing:
tensor([ 0.1128, -0.1748,  0.3193,  0.1185,  0.1696,  0.0901,  0.0179, -0.2173,
         0.1696,  0.1290, -0.0966, -0.1512,  0.0854, -0.2633, -0.0836,  0.2666,
         0.2213, -0.2850,  0.1539,  0.2336, -0.2633, -0.1533,  0.3226, -0.0292,
         0.1178, -0.0181, -0.2332,  0.1286, -0.2747, -0.0219,  0.1875, -0.2321,
        -0.1960, -0.2697,  0.2939, -0.0881, -0.2285, -0.1482,  0.0527, -0.1412,
        -0.0730, -0.0557,  0.1824, -0.0965, -0.2507, -0.0884,  0.0619, -0.0589,
        -0.0619

In [10]:
# Primeiro teste de previsão (ruinzinha):

y_prev = minha_redezinha(x_treino)
y_prev

tensor([[0.0669],
        [0.0946],
        [0.0830],
        ...,
        [0.0789],
        [0.0909],
        [0.0960]], grad_fn=<AddmmBackward0>)

In [11]:
# Usando o otimizador (Adam):

TAXA_DE_APRENDIZADO = 0.001

# A função perda será o erro quadrático médio:
fn_perda = nn.MSELoss()

# O otimizador será o Adam, um tipo de descida do gradiente:
otimizador = optim.Adam(minha_redezinha.parameters(), lr=TAXA_DE_APRENDIZADO)

In [12]:
# Treinando a rede:

# Colocando ela no "modo treino":
minha_redezinha.train()

NUM_EPOCAS = 1000

y_true = y_treino

for epoca in range(NUM_EPOCAS):
    # Forward pass:
    y_pred = minha_redezinha(x_treino)

    # Zero grad:
    otimizador.zero_grad()

    # Loss:
    loss = fn_perda(y_pred, y_true)

    # Backpropagation:
    loss.backward()

    # atualiza parâmetros
    otimizador.step()

    # Exibe o resultado:
    print(epoca, loss.data)

0 tensor(0.4599)
1 tensor(0.3886)
2 tensor(0.3223)
3 tensor(0.2626)
4 tensor(0.2097)
5 tensor(0.1631)
6 tensor(0.1223)
7 tensor(0.0873)
8 tensor(0.0589)
9 tensor(0.0376)
10 tensor(0.0244)
11 tensor(0.0195)
12 tensor(0.0223)
13 tensor(0.0303)
14 tensor(0.0403)
15 tensor(0.0488)
16 tensor(0.0536)
17 tensor(0.0541)
18 tensor(0.0509)
19 tensor(0.0451)
20 tensor(0.0383)
21 tensor(0.0315)
22 tensor(0.0257)
23 tensor(0.0215)
24 tensor(0.0190)
25 tensor(0.0179)
26 tensor(0.0179)
27 tensor(0.0186)
28 tensor(0.0198)
29 tensor(0.0209)
30 tensor(0.0219)
31 tensor(0.0226)
32 tensor(0.0229)
33 tensor(0.0227)
34 tensor(0.0221)
35 tensor(0.0213)
36 tensor(0.0202)
37 tensor(0.0190)
38 tensor(0.0178)
39 tensor(0.0168)
40 tensor(0.0159)
41 tensor(0.0153)
42 tensor(0.0150)
43 tensor(0.0148)
44 tensor(0.0148)
45 tensor(0.0149)
46 tensor(0.0150)
47 tensor(0.0151)
48 tensor(0.0150)
49 tensor(0.0148)
50 tensor(0.0144)
51 tensor(0.0139)
52 tensor(0.0134)
53 tensor(0.0129)
54 tensor(0.0124)
55 tensor(0.0120)
56

In [13]:
# Checando a performance da rede:

with torch.no_grad():
    y_true = normalizador_y.inverse_transform(y_treino)
    y_pred = minha_redezinha(x_treino)
    y_pred = normalizador_y.inverse_transform(y_pred)

for yt, yp in zip(y_true, y_pred):
    print(yt, yp)

[28.63999986] [28.4778784]
[29.03999988] [29.59725394]
[24.33999998] [24.4843567]
[25.92] [26.46379532]
[26.95000037] [27.15868055]
[29.14999973] [29.51262518]
[23.66999987] [23.74245395]
[28.01000028] [27.12447841]
[26.70000035] [26.67454348]
[28.88999987] [29.14758096]
[28.77000017] [28.38449671]
[30.2999998] [29.82799031]
[29.23999989] [29.06918678]
[23.83000011] [23.86304495]
[29.38000005] [29.29972478]
[29.44999975] [28.51258069]
[29.78000008] [29.64169643]
[28.88999987] [28.52171735]
[28.39999969] [28.31518251]
[25.58999968] [27.00563677]
[29.63999992] [29.67824075]
[28.63000001] [28.83941164]
[30.33000011] [29.8773477]
[25.31999997] [25.55885661]
[27.32000008] [27.93233755]
[27.58999979] [27.06551178]
[29.14999973] [29.843275]
[24.36000006] [25.00996946]
[28.73999986] [28.19748976]
[28.58000001] [28.62837626]
[27.78999981] [27.46922345]
[29.54999976] [29.8452595]
[29.3899999] [29.20571886]
[28.02999997] [28.3836542]
[25.87] [26.31135892]
[25.02999979] [25.08440767]
[26.11000017]

In [14]:
# Testando a rede:

# Colocando ela no "modo avaliação":
minha_redezinha.eval()

with torch.no_grad():
    y_true = normalizador_y.inverse_transform(y_teste)
    y_pred = minha_redezinha(x_teste)
    y_pred = normalizador_y.inverse_transform(y_pred)

for yt, yp in zip(y_true, y_pred):
    print(yt, yp)

[26.97000006] [27.21651438]
[28.32999999] [29.13270067]
[28.78999987] [28.5975725]
[29.61000037] [29.57553016]
[27.27999993] [27.63482218]
[23.80000018] [23.25578437]
[27.72999996] [27.91917293]
[28.88000002] [29.35686162]
[29.23000005] [27.85431255]
[25.86000015] [25.43478146]
[29.01000034] [28.64195142]
[30.2999998] [30.38322693]
[29.18999989] [29.40113791]
[29.14999973] [29.42898828]
[28.43999985] [28.20088738]
[29.47000021] [29.48426776]
[29.06000034] [28.95372133]
[28.93000003] [28.71185002]
[29.4399999] [29.40837355]
[29.2700002] [29.51631844]
[29.90999963] [29.63248241]
[30.25999965] [30.25512916]
[29.54999976] [29.17540682]
[28.17000014] [28.93186657]
[28.71000032] [28.71218932]
[28.13999983] [27.67725719]
[29.71000038] [28.66847981]
[29.74999977] [29.76361323]
[28.93000003] [29.14709537]
[24.41999991] [24.45006111]
[23.23999992] [24.16558705]
[28.03999982] [28.11426033]
[29.18999989] [28.9886756]
[29.53999991] [29.07438278]
[27.5700001] [27.0217816]
[26.57000004] [26.62412299]

In [15]:
# Medindo a performance com RMSE:

RMSE = mean_squared_error(y_true, y_pred, squared=False)
print(f'Loss do teste: {RMSE}')

Loss do teste: 0.4230889283590808
