In [1]:
# 1. Imports

from datetime import datetime, timedelta
from src.data_utils import load_raw_data, transformar_a_series_temporales
from src import config
import hopsworks
import pandas as pd
import os

DEBUG ENV HOPSWORKS_PROJECT_NAME: fleca_mlops
DEBUG ENV PATH: C:\Workspace\mlops_fleca_project\.env


In [15]:
# Cargar datos directamente desde el archivo parquet en la carpeta raw
parquet_path = r"C:\Workspace\mlops_fleca_project\data\raw\raw_data_bq_forecasting_20250803.parquet"
df_historico = pd.read_parquet(parquet_path)
print('Datos históricos cargados:', df_historico.shape)



Datos históricos cargados: (356433, 9)


In [16]:
# Transformar a series temporales semanales para la familia BOLLERIA
df_ts_historico = transformar_a_series_temporales(df_historico, familia="BOLLERIA")
print('Series temporales históricas generadas:', df_ts_historico.shape)
print(df_ts_historico.head())

Series temporales históricas generadas: (131, 8)
   year  week   familia  base_imponible  is_summer_peak  is_easter  \
0  2023     1  BOLLERIA          825.11               0          0   
1  2023     2  BOLLERIA          658.40               0          0   
2  2023     3  BOLLERIA          741.40               0          0   
3  2023     4  BOLLERIA          653.64               0          0   
4  2023     5  BOLLERIA          680.46               0          0   

   dias_semana week_start  
0            7 2023-01-02  
1            7 2023-01-09  
2            7 2023-01-16  
3            7 2023-01-23  
4            7 2023-01-30  


In [18]:
df_ts_historico = transformar_a_series_temporales(df_historico, familia="BOLLERIA")
print('Series temporales históricas generadas:', df_ts_historico.shape)
print(df_ts_historico.head())

Series temporales históricas generadas: (131, 8)
   year  week   familia  base_imponible  is_summer_peak  is_easter  \
0  2023     1  BOLLERIA          825.11               0          0   
1  2023     2  BOLLERIA          658.40               0          0   
2  2023     3  BOLLERIA          741.40               0          0   
3  2023     4  BOLLERIA          653.64               0          0   
4  2023     5  BOLLERIA          680.46               0          0   

   dias_semana week_start  
0            7 2023-01-02  
1            7 2023-01-09  
2            7 2023-01-16  
3            7 2023-01-23  
4            7 2023-01-30  


In [19]:
# Eliminar columna 'fecha' si existe
if 'fecha' in df_ts_historico.columns:
    df_ts_historico = df_ts_historico.drop(columns=['fecha'])

# Ajustar tipos para coincidir con el schema del Feature Group histórico
df_ts_historico['year'] = df_ts_historico['year'].astype('int64')  # bigint
df_ts_historico['week'] = df_ts_historico['week'].astype('int64')  # bigint
df_ts_historico['familia'] = df_ts_historico['familia'].astype('string')  # string
df_ts_historico['base_imponible'] = df_ts_historico['base_imponible'].astype('float64')  # double
df_ts_historico['is_summer_peak'] = df_ts_historico['is_summer_peak'].astype('int32')  # int
df_ts_historico['is_easter'] = df_ts_historico['is_easter'].astype('int64')  # bigint
df_ts_historico['week_start'] = pd.to_datetime(df_ts_historico['week_start'])  # timestamp

print(df_ts_historico.dtypes)
print(df_ts_historico.head())

year                       int64
week                       int64
familia           string[python]
base_imponible           float64
is_summer_peak             int32
is_easter                  int64
dias_semana                int64
week_start        datetime64[ns]
dtype: object
   year  week   familia  base_imponible  is_summer_peak  is_easter  \
0  2023     1  BOLLERIA          825.11               0          0   
1  2023     2  BOLLERIA          658.40               0          0   
2  2023     3  BOLLERIA          741.40               0          0   
3  2023     4  BOLLERIA          653.64               0          0   
4  2023     5  BOLLERIA          680.46               0          0   

   dias_semana week_start  
0            7 2023-01-02  
1            7 2023-01-09  
2            7 2023-01-16  
3            7 2023-01-23  
4            7 2023-01-30  


In [22]:
# 5. Conectar a hopsworks
project = hopsworks.login(
    api_key_value=config.HOPSWORKS_API_KEY, 
    project=config.HOPSWORKS_PROJECT_NAME)

 # Conectar al feature store
feature_store = project.get_feature_store()

 # Conectar al Feature Group histórico
feature_group = feature_store.get_or_create_feature_group(
    name=config.FEATURE_GROUP_NAME,
    version=config.FEATURE_GROUP_VERSION,
    primary_key=["familia", "week_start"],
    event_time="week_start"
)

if feature_group is None:
    raise Exception("El Feature Group histórico no existe o el nombre/version no coinciden exactamente. Verifica en Hopsworks.")

2025-08-15 13:21:01,748 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-08-15 13:21:01,751 INFO: Initializing external client
2025-08-15 13:21:01,752 INFO: Base URL: https://c.app.hopsworks.ai:443
Connection closed.
2025-08-15 13:21:01,751 INFO: Initializing external client
2025-08-15 13:21:01,752 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-08-15 13:21:02,843 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272


In [24]:
# Insertar los datos en el Feature Group
feature_group.insert(
    df_ts_historico,
    write_options={'wait_for_job': True}
)

Uploading Dataframe: 100.00% |██████████| Rows 131/131 | Elapsed Time: 00:00 | Remaining Time: 00:00



Launching job: times_series_bolleria_feature_group_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1242272/jobs/named/times_series_bolleria_feature_group_1_offline_fg_materialization/executions
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1242272/jobs/named/times_series_bolleria_feature_group_1_offline_fg_materialization/executions
2025-08-15 13:21:53,474 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-08-15 13:21:53,474 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-08-15 13:21:56,632 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-08-15 13:21:56,632 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-08-15 13:23:22,101 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Fina

(Job('times_series_bolleria_feature_group_1_offline_fg_materialization', 'SPARK'),
 None)