In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Input
from tensorflow.keras.optimizers import Adam

In [2]:
target = 'tmed'
seed = 42
n_days = 7

In [3]:
train = pd.read_csv('../../data/ml/train_scaled.csv', parse_dates=['fecha'])
test = pd.read_csv('../../data/ml/test_scaled.csv', parse_dates=['fecha'])

In [4]:
train = train[train['fecha'].dt.year >= 2006]

In [5]:
test = test[test['fecha'].dt.year >= 2023]

In [6]:
train.set_index('fecha', inplace=True)
test.set_index('fecha', inplace=True)

In [7]:
X_train = train.drop(columns=[target])
y_train = train[target]
X_test = test.drop(columns=[target])
y_test = test[target]

In [8]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((3907377, 14), (3907377,), (644092, 14), (644092,))

In [9]:
len(test) / (len(train) + len(test))

0.14151299283813643

In [10]:
def create_sequences(X, y, n_days):
    X_seq, y_seq = [], []
    
    for i in range(n_days, len(X)):
        X_seq.append(X.iloc[i-n_days:i].values)
        y_seq.append(y.iloc[i])
    
    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)
    
    return X_seq, y_seq

In [11]:
X_train_seq, y_train_seq = create_sequences(X_train, y_train, n_days)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, n_days)

In [12]:
X_train_seq.shape, y_train_seq.shape, X_test_seq.shape, y_test_seq.shape

((3907370, 7, 14), (3907370,), (644085, 7, 14), (644085,))

In [13]:
def create_gru(shape):
    
    model = Sequential()
    
    model.add(Input(shape=shape))
    
    model.add(GRU(units=50, return_sequences=True))
    model.add(GRU(units=50))
    
    model.add(Dense(units=1))
    
    return model

In [14]:
shape = (n_days, X_train_seq.shape[2])
model = create_gru(shape)

I0000 00:00:1741236019.469607   85942 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1192 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [15]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [None]:
model.fit(X_train_seq, y_train_seq, epochs=10, batch_size=128, validation_data=(X_test_seq, y_test_seq))

In [None]:
model.evaluate(X_test_seq, y_test_seq)