In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [2]:
df = pd.read_csv('data/vazoes_CA_20_23.csv')


In [3]:
df.columns

Index(['Unnamed: 0', 'Data', 'Vazao_CA', 'Vazao1_CA_1d', 'Vazao2_CA_1d',
       'Vazao1_CA_7d', 'Vazao2_CA_7d', 'Vazao1_CA_15d', 'Vazao2_CA_15d',
       'Vazao1_CA_30d', 'Vazao2_CA_30d'],
      dtype='object')

In [4]:
scaler = MinMaxScaler()
df['vazao_normalizada'] = scaler.fit_transform(df['Vazao1_CA_1d'].values.reshape(-1, 1))

In [43]:
sequence = torch.FloatTensor(df['vazao_normalizada'].values).view(-1, 1)


In [46]:
# Criar uma função para preparar sequências para a entrada no modelo GRU
def prepare_sequence(seq, window_size):
    sequences = []
    for i in range(len(seq) - window_size):
        seq_slice = seq[i:i+window_size]
        target = seq[i+window_size:i+window_size+1]
        sequences.append((seq_slice, target))
    return sequences


In [47]:
class GRUImputer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUImputer, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input_seq):
        gru_out, _ = self.gru(input_seq.view(len(input_seq), 1, -1))
        output = self.fc(gru_out[-1])
        return output

In [48]:
# Configurar o modelo GRU
input_size = 1
hidden_size = 64
output_size = 1
window_size = 3  # Tamanho da janela temporal

model = GRUImputer(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)


In [49]:
11*1000/60

183.33333333333334

In [50]:
# Treinamento do modelo
epochs = 10

for epoch in range(epochs):
    print(epoch)
    for seq, target in prepare_sequence(sequence, window_size):
        optimizer.zero_grad()
        output = model(seq)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

# Imputar valores ausentes
model.eval()

0
1
2
3
4
5
6
7
8
9


GRUImputer(
  (gru): GRU(1, 64)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [53]:
len(imputed_values)

8630

In [51]:
imputed_values = []

for i in range(len(sequence) - window_size):
    seq = sequence[i:i+window_size]
    with torch.no_grad():
        imputed_value = model(seq).item()
        imputed_values.append(imputed_value)

# Desnormalizar os valores imputados
imputed_values = scaler.inverse_transform(np.array(imputed_values).reshape(-1, 1))

# Substituir os valores ausentes no DataFrame
df['imputed_value'] = np.nan
df['imputed_value'][window_size-1:] = imputed_values.flatten()

# Visualizar o DataFrame
print(df)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['imputed_value'][window_size-1:] = imputed_values.flatten()


ValueError: cannot set using a slice indexer with a different length than the value

In [22]:
imputed_values

array([[ 685.06697176],
       [ 681.59576534],
       [ 634.60800473],
       ...,
       [ 885.06735355],
       [1360.04495145],
       [1109.66279264]])

In [23]:
df['Vazao1_CA_1d']

0        335.0
1        329.0
2        278.0
3        250.0
4        183.0
         ...  
8628     498.0
8629     563.0
8630    2036.0
8631    1004.0
8632     593.0
Name: Vazao1_CA_1d, Length: 8633, dtype: float64