In [None]:
import pandas as pd
import numpy as np
import pywt

# Cargar y preparar dataset
df = pd.read_csv("../../data/preprocessed/base.csv", sep=",")
df["periodo"] = pd.to_datetime(df["periodo"], format="%Y%m")
df = df.groupby(["product_id", "periodo"])["tn"].sum().reset_index()
df = df.sort_values(["product_id", "periodo"])

# Lista para almacenar features por producto
dwt_features_list = []

# Iterar sobre cada producto
for product_id, df_prod in df.groupby("product_id"):
    serie = df_prod["tn"].values

    # Saltar si la serie es muy corta (< 8 puntos no sirve para DWT con 3 niveles)
    if len(serie) < 8:
        continue

    # Aplicar DWT con wavelet Daubechies 4
    coeffs = pywt.wavedec(serie, 'db4', level=3)

    approx_coeffs = coeffs[0]
    detail_coeffs = coeffs[1:]

    # Crear diccionario con features
    feature_dict = {"product_id": product_id}

    # Guardar primeras N (ej. 5) coeficientes de cada tipo
    for i, coeff in enumerate([approx_coeffs] + detail_coeffs):
        for j in range(min(5, len(coeff))):
            feature_name = f"dwt_L{i}_C{j}"
            feature_dict[feature_name] = coeff[j]

    dwt_features_list.append(feature_dict)

# Crear DataFrame final de features
df_dwt_features = pd.DataFrame(dwt_features_list)
print(df_dwt_features.head())


In [7]:
import pandas as pd
import numpy as np
import pywt

# Cargar y preparar dataset
df = pd.read_csv("../../data/preprocessed/base.csv", sep=",")
df["periodo"] = pd.to_datetime(df["periodo"], format="%Y%m")
df = df.groupby(["product_id", "periodo"])["tn"].sum().reset_index()
df = df.sort_values(["product_id", "periodo"])

# Lista para guardar todas las filas con features
dwt_features_rows = []

# Iterar por producto
for product_id, df_prod in df.groupby("product_id"):
    df_prod = df_prod.sort_values("periodo").reset_index(drop=True)

    for end_idx in range(1, len(df_prod)+1):
        subserie = df_prod.iloc[:end_idx]
        valores = subserie["tn"].values

        # Saltar si hay menos de 8 puntos (mínimo para 3 niveles de DWT con db4)
        # if len(valores) < 8:
        #     continue

        # Aplicar DWT
        try:
            coeffs = pywt.wavedec(valores, 'db4', level=3)
        except ValueError:
            continue

        approx_coeffs = coeffs[0]
        detail_coeffs = coeffs[1:]

        # Diccionario con features
        row = {
            "product_id": product_id,
            "periodo": subserie.iloc[-1]["periodo"]
        }

        # Guardar primeras N (ej. 5) coeficientes de cada nivel
        for i, coeff in enumerate([approx_coeffs] + detail_coeffs):
            for j in range(min(5, len(coeff))):
                row[f"dwt_L{i}_C{j}"] = coeff[j]

        dwt_features_rows.append(row)

# Armar DataFrame final
df_dwt_features = pd.DataFrame(dwt_features_rows)
print(df_dwt_features.head())




   product_id    periodo    dwt_L0_C0    dwt_L0_C1    dwt_L0_C2    dwt_L0_C3  \
0       20001 2017-01-01  2643.935103  2643.935103  2643.935103  2643.935103   
1       20001 2017-02-01  2390.446756  2517.285249  2313.051668  2473.010764   
2       20001 2017-03-01  2945.822552  2892.047188  3033.610007  2944.537107   
3       20001 2017-04-01  2942.185164  2964.314994  3001.023566  3004.121394   
4       20001 2017-05-01  3151.244598  3234.824874  3184.786012  3334.266329   

     dwt_L0_C4     dwt_L1_C0     dwt_L1_C1     dwt_L1_C2  ...     dwt_L2_C0  \
0  2643.935103 -9.237056e-14 -9.947598e-14 -1.030287e-13  ...  7.460699e-14   
1  2439.066618  2.210346e+01  5.632704e+01 -7.028512e+01  ...  5.002341e+01   
2  2885.133093  4.407943e+01  1.463822e+02  1.913988e+01  ... -7.971876e+01   
3  2902.655314  9.493607e+01  3.079440e+02 -3.928756e+00  ... -5.773935e+01   
4  2925.545897  2.192043e+02  6.999447e+02 -2.227345e+02  ... -3.888295e+01   

      dwt_L2_C1     dwt_L2_C2     dwt_L2_C3 

In [9]:
print(f"{df.shape}")
print(f"{df_dwt_features.shape}")

(31243, 3)
(31243, 22)


In [10]:
df_dwt_features.isna().sum()

product_id       0
periodo          0
dwt_L0_C0        0
dwt_L0_C1        0
dwt_L0_C2        0
dwt_L0_C3        0
dwt_L0_C4        0
dwt_L1_C0        0
dwt_L1_C1        0
dwt_L1_C2        0
dwt_L1_C3        0
dwt_L1_C4        0
dwt_L2_C0        0
dwt_L2_C1        0
dwt_L2_C2        0
dwt_L2_C3        0
dwt_L2_C4        0
dwt_L3_C0        0
dwt_L3_C1        0
dwt_L3_C2        0
dwt_L3_C3        0
dwt_L3_C4     2446
dtype: int64

In [11]:
df_dwt_features[df_dwt_features['product_id'] == 20001].sort_values(by=['periodo'])

Unnamed: 0,product_id,periodo,dwt_L0_C0,dwt_L0_C1,dwt_L0_C2,dwt_L0_C3,dwt_L0_C4,dwt_L1_C0,dwt_L1_C1,dwt_L1_C2,...,dwt_L2_C0,dwt_L2_C1,dwt_L2_C2,dwt_L2_C3,dwt_L2_C4,dwt_L3_C0,dwt_L3_C1,dwt_L3_C2,dwt_L3_C3,dwt_L3_C4
0,20001,2017-01-01,2643.935103,2643.935103,2643.935103,2643.935103,2643.935103,-9.237056e-14,-9.947598e-14,-1.030287e-13,...,7.460699e-14,7.460699e-14,7.460699e-14,1.776357e-14,-1.065814e-14,-4.618528e-14,-4.618528e-14,-1.776357e-14,-2.842171e-14,
1,20001,2017-02-01,2390.446756,2517.285249,2313.051668,2473.010764,2439.066618,22.10346,56.32704,-70.28512,...,50.02341,161.5587,-50.02341,-161.5587,50.02341,11.20362,-11.20362,11.20362,-11.20362,
2,20001,2017-03-01,2945.822552,2892.047188,3033.610007,2944.537107,2885.133093,44.07943,146.3822,19.13988,...,-79.71876,-233.2939,82.50946,-31.15937,261.6626,120.1355,244.6009,-364.7364,120.1355,244.600903
3,20001,2017-04-01,2942.185164,2964.314994,3001.023566,3004.121394,2902.655314,94.93607,307.944,-3.928756,...,-57.73935,-173.6068,106.2462,-77.02323,202.1231,100.0888,300.8437,-108.0059,-292.9265,100.088765
4,20001,2017-05-01,3151.244598,3234.824874,3184.786012,3334.266329,2925.545897,219.2043,699.9447,-222.7345,...,-38.88295,-92.13046,225.8743,-85.2011,-153.1465,83.76819,300.8437,155.1702,-320.9837,-218.798349
5,20001,2017-06-01,3177.663632,3240.799067,3196.664484,3376.353692,2820.684361,220.5999,694.101,-379.4553,...,-38.35094,-94.79659,108.3529,-43.14705,-60.87305,83.57887,300.8437,151.0547,-56.30757,-244.014262
6,20001,2017-07-01,3121.142569,3240.616821,3167.588714,3301.777576,3049.691018,220.0815,705.908,-105.8539,...,-37.73637,-92.1228,264.3735,-141.5347,79.83528,83.57887,300.8437,151.0547,-297.5179,343.10454
7,20001,2017-08-01,3125.734643,3237.660206,3181.635549,3301.132685,3058.031807,220.8025,707.7614,-97.37237,...,-38.35593,-91.51829,259.8726,-190.8111,147.0491,83.57887,300.8437,151.0547,-352.0531,121.770617
8,20001,2017-09-01,3131.511301,3238.349432,3178.130557,3304.054365,3056.814811,220.4529,707.1393,-120.6759,...,-38.27001,-91.51829,259.7328,-162.6788,43.84986,83.57887,300.8437,151.0547,-352.0531,91.241532
9,20001,2017-10-01,3128.694814,3237.482732,3180.146656,3305.304463,3018.601481,220.4283,707.881,-89.41701,...,-38.29526,-91.51829,257.323,-242.1429,72.44405,83.57887,300.8437,151.0547,-352.0531,62.948928
