In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
from keras.utils import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import gc

In [4]:
# -------------------------------
# 1. Cargar datos
# -------------------------------
df = pd.read_csv("../../data/preprocessed/base.csv", sep=",")
df["periodo"] = pd.to_datetime(df["periodo"], format="%Y%m")
df = df.groupby(["product_id", "periodo"])["tn"].sum().reset_index()
df = df.sort_values(["product_id", "periodo"])

In [7]:
# -------------------------------
# 2. Feature engineering
# -------------------------------
def agregar_features(df):
    df = df.copy()
    df = df.sort_values(["product_id", "periodo"])
    df["tn_lag1"] = df.groupby("product_id")["tn"].shift(1)
    df["tn_diff1"] = df["tn"] - df["tn_lag1"]
    df["rolling_mean3"] = df.groupby("product_id")["tn"].transform(lambda x: x.shift(1).rolling(3).mean())
    df["rolling_std3"] = df.groupby("product_id")["tn"].transform(lambda x: x.shift(1).rolling(3).std())
    df["rolling_max3"] = df.groupby("product_id")["tn"].transform(lambda x: x.shift(1).rolling(3).max())
    df["rolling_min3"] = df.groupby("product_id")["tn"].transform(lambda x: x.shift(1).rolling(3).min())
    df["rolling_max6"] = df.groupby("product_id")["tn"].transform(lambda x: x.shift(1).rolling(6).max())
    df["rolling_min6"] = df.groupby("product_id")["tn"].transform(lambda x: x.shift(1).rolling(6).min())
    return df

df_features = agregar_features(df).fillna(0)

# -------------------------------
# 3. Crear target t+2
# -------------------------------
df_features["target_tn_plus2"] = df_features.groupby("product_id")["tn"].shift(-2)

# -------------------------------
# 4. Separar TRAIN y PREDICCIÓN
# -------------------------------
train_df = df_features[df_features["periodo"] <= "2019-09-01"].copy()
pred_df = df_features[df_features["periodo"] == "2019-10-01"].copy()
# train_df = train_df.dropna(subset=["target_tn_plus2"])
train_df['target_tn_plus2'].fillna(0, inplace=True)


# Escalar
feature_cols = ['tn', 'tn_lag1', 'tn_diff1', 'rolling_mean3', 'rolling_std3',
                'rolling_max3', 'rolling_min3', 'rolling_max6', 'rolling_min6']
scaler = RobustScaler()
train_df[feature_cols] = scaler.fit_transform(train_df[feature_cols])
pred_df[feature_cols] = scaler.transform(pred_df[feature_cols])

# -------------------------------
# 5. Crear X_train y y_train
# -------------------------------
X_list, y_list = [], []

for product_id, group in train_df.groupby("product_id"):
    group = group.sort_values("periodo")
    X = group[feature_cols].values
    y = group["target_tn_plus2"].values
    if len(X) >= 4:
        X_list.append(X)
        y_list.append(y)

X_train = pad_sequences(X_list, dtype='float32', padding='pre')
y_train = pad_sequences([y.reshape(-1, 1) for y in y_list], dtype='float32', padding='pre').squeeze()

# -------------------------------
# 6. Entrenar modelo LSTM
# -------------------------------
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.3),
    LSTM(32),
    Dense(16, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2, verbose=1)

# -------------------------------
# 7. Predicción del mes 201912 usando input 201910
# -------------------------------
X_pred_list = []
product_ids = []

for product_id, group in pred_df.groupby("product_id"):
    group = group.sort_values("periodo")
    X = group[feature_cols].values
    if len(X) >= 1:
        X_pred_list.append(X)
        product_ids.append(product_id)

X_pred = pad_sequences(X_pred_list, dtype='float32', padding='pre', maxlen=X_train.shape[1])
y_pred = model.predict(X_pred).squeeze()

# -------------------------------
# 8. Evaluación contra 201912
# -------------------------------
actual_201912 = df_features[df_features["periodo"] == "2019-12-01"]
actual_201912 = actual_201912[actual_201912["product_id"].isin(product_ids)][["product_id", "tn"]]
actual_201912 = actual_201912.rename(columns={"tn": "real_201912"})

pred_df_final = pd.DataFrame({
    "product_id": product_ids,
    "pred_201912": y_pred
})

result = pd.merge(pred_df_final, actual_201912, on="product_id", how="inner")
result["abs_error"] = np.abs(result["pred_201912"] - result["real_201912"])
result["pct_error"] = result["abs_error"] / (result["real_201912"] + 1e-8)

print(result.head())
print(f"\n📉 MAE: {result['abs_error'].mean():.2f}")
print(f"📊 MAPE: {result['pct_error'].mean() * 100:.2f}%")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['target_tn_plus2'].fillna(0, inplace=True)


Epoch 1/50


  super().__init__(**kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 80ms/step - loss: 15197.4980 - val_loss: 0.7757
Epoch 2/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - loss: 14760.9902 - val_loss: 1.4556
Epoch 3/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - loss: 17933.5742 - val_loss: 13.8525
Epoch 4/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 12179.4336 - val_loss: 30.3463
Epoch 5/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 10335.9209 - val_loss: 48.7853
Epoch 6/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 10246.6123 - val_loss: 1.6996
Epoch 7/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - loss: 13463.7061 - val_loss: 1.1387
Epoch 8/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 10339.1865 - val_loss: 0.6540
Epoch 9/50
[1m14/14[0m