In [1]:
import sys
from pathlib import Path

# Añade el directorio raíz del proyecto al sys.path
project_root = Path().resolve().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

In [2]:
# 1. Imports

from datetime import datetime, timedelta
from src.data_utils import load_raw_data, transformar_a_series_temporales
from src import config
import hopsworks
import pandas as pd
import os

DEBUG ENV HOPSWORKS_PROJECT_NAME: fleca_mlops
DEBUG ENV PATH: C:\Workspace\mlops_fleca_project\.env


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# 2. Calcular la última semana completa (lunes a domingo)

hoy = datetime.utcnow()
print(f"Fecha actual: {hoy.strftime('%Y-%m-%d %A')}")

# Calcular cuántos días han pasado desde el lunes de esta semana
dias_desde_lunes = hoy.weekday()  # 0=lunes, 1=martes, ..., 6=domingo

# Ir al lunes de la semana pasada (última semana completa)
ultimo_lunes = (hoy - timedelta(days=dias_desde_lunes + 7)).replace(hour=0, minute=0, second=0, microsecond=0)
ultimo_domingo = ultimo_lunes + timedelta(days=6)

print(f"Última semana completa:")
print(f"  Lunes: {ultimo_lunes.strftime('%Y-%m-%d %A')}")
print(f"  Domingo: {ultimo_domingo.strftime('%Y-%m-%d %A')}")
print(f"  Rango: {ultimo_lunes.strftime('%Y-%m-%d')} a {ultimo_domingo.strftime('%Y-%m-%d')}")

Fecha actual: 2025-09-28 Sunday
Última semana completa:
  Lunes: 2025-09-15 Monday
  Domingo: 2025-09-21 Sunday
  Rango: 2025-09-15 a 2025-09-21


In [4]:
# 3. Descargar y cargar datos de BigQuery para esa semana
df = load_raw_data(
    fecha_inicio=ultimo_lunes.strftime('%Y-%m-%d'),
    fecha_fin=ultimo_domingo.strftime('%Y-%m-%d'),
    descargar_bq=True
)
print('Datos descargados:', df.shape)

Descargando datos desde BigQuery porque descargar_bq=True o no existe el archivo C:\Workspace\mlops_fleca_project\data\raw\raw_data_bq_forecasting.parquet
Iniciando conexión con BigQuery...
Conexión establecida.
Usando fechas en consulta SQL: fecha_inicio='2025-09-15' y fecha_fin='2025-09-21'
Descargando datos de fleca-del-port.fleca_ventas_dia.t_facturas_dia_extendida_2023 ...
Ejecutando consulta SQL:

    SELECT 
        fecha,
        n_factura,
        zona_de_venta,
        producto,
        familia,
        cantidad,
        base_imponible,
        tipo_IVA,
        total
    FROM `fleca-del-port.fleca_ventas_dia.t_facturas_dia_extendida_2023`
    WHERE fecha >= '2025-09-15' AND fecha <= '2025-09-21'
Conexión establecida.
Usando fechas en consulta SQL: fecha_inicio='2025-09-15' y fecha_fin='2025-09-21'
Descargando datos de fleca-del-port.fleca_ventas_dia.t_facturas_dia_extendida_2023 ...
Ejecutando consulta SQL:

    SELECT 
        fecha,
        n_factura,
        zona_de_venta



Filas descargadas de la segunda tabla: 2443
Guardando archivo en C:\Workspace\mlops_fleca_project\data\raw\raw_data_bq_forecasting_20250928.parquet ...
Archivo guardado correctamente.
Usando archivo recién generado: C:\Workspace\mlops_fleca_project\data\raw\raw_data_bq_forecasting_20250928.parquet
Cargando datos desde: C:\Workspace\mlops_fleca_project\data\raw\raw_data_bq_forecasting_20250928.parquet
Validando fechas entre 2025-09-15 y 2025-09-21 (7 días)
Total de fechas faltantes: 0
No faltan fechas en el rango especificado.
Datos descargados: (2443, 11)


In [5]:
# 4. Transformar a series temporales semanales solo para la familia BOLLERIA
df_ts = transformar_a_series_temporales(df, familia="BOLLERIA")
print('Series temporales generadas:', df_ts.shape)
print(df_ts.head())


Series temporales generadas: (1, 8)
   year  week   familia  base_imponible  is_summer_peak  is_easter  \
0  2025    38  BOLLERIA          734.32               0          0   

   dias_semana week_start  
0            7 2025-09-15  


In [6]:
# Eliminar columna 'fecha' si existe
if 'fecha' in df_ts.columns:
    df_ts = df_ts.drop(columns=['fecha'])

# Ajustar tipos para coincidir con el schema del Feature Group histórico
df_ts['year'] = df_ts['year'].astype('int64')  # bigint
df_ts['week'] = df_ts['week'].astype('int64')  # bigint
df_ts['familia'] = df_ts['familia'].astype('string')  # string
df_ts['base_imponible'] = df_ts['base_imponible'].astype('float64')  # double
df_ts['is_summer_peak'] = df_ts['is_summer_peak'].astype('int32')  # int
df_ts['is_easter'] = df_ts['is_easter'].astype('int64')  # bigint
df_ts['week_start'] = pd.to_datetime(df_ts['week_start'])  # timestamp

print(df_ts.dtypes)
print(df_ts.head())

year                       int64
week                       int64
familia           string[python]
base_imponible           float64
is_summer_peak             int32
is_easter                  int64
dias_semana                int64
week_start        datetime64[ns]
dtype: object
   year  week   familia  base_imponible  is_summer_peak  is_easter  \
0  2025    38  BOLLERIA          734.32               0          0   

   dias_semana week_start  
0            7 2025-09-15  


In [7]:
# 5. Conectar a hopsworks
project = hopsworks.login(
    api_key_value=config.HOPSWORKS_API_KEY, 
    project=config.HOPSWORKS_PROJECT_NAME)

# Conectar al feature store
feature_store = project.get_feature_store()

# Conectar al Feature Group histórico
try:
    feature_group = feature_store.get_feature_group(
        name=config.FEATURE_GROUP_NAME,
        version=config.FEATURE_GROUP_VERSION,
        
    )
    if feature_group is None:
        raise Exception("El Feature Group histórico no existe o el nombre/version no coinciden exactamente. Verifica en Hopsworks.")
except Exception as e:
    print(f"Error al crear/conectar el Feature Group: {e}")

2025-09-28 20:32:08,115 INFO: Initializing external client
2025-09-28 20:32:08,116 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-09-28 20:32:08,116 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-09-28 20:32:09,239 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272


In [8]:
# Añadir solo la nueva semana al Feature Group histórico (evitando duplicados)
from src.data_utils import transformar_a_series_temporales
import pandas as pd

# Supón que df contiene solo la última semana descargada
df_ts = transformar_a_series_temporales(df, familia="BOLLERIA")
ultima_semana = df_ts['week_start'].max()
print(f"Semana a insertar: {ultima_semana}")

# --- FIX: Conversión explícita de tipos antes del insert ---
# Eliminar columna 'fecha' si existe
if 'fecha' in df_ts.columns:
    df_ts = df_ts.drop(columns=['fecha'])
# Ajustar tipos para coincidir con el schema del Feature Group histórico
# (esto es CRÍTICO para evitar errores de tipo en Hopsworks)
df_ts['year'] = df_ts['year'].astype('int64')  # bigint
df_ts['week'] = df_ts['week'].astype('int64')  # bigint
df_ts['familia'] = df_ts['familia'].astype('string')  # string
df_ts['base_imponible'] = df_ts['base_imponible'].astype('float64')  # double
df_ts['is_summer_peak'] = df_ts['is_summer_peak'].astype('int32')  # int (¡IMPORTANTE!)
df_ts['is_easter'] = df_ts['is_easter'].astype('int64')  # bigint
df_ts['week_start'] = pd.to_datetime(df_ts['week_start'])  # timestamp
print(df_ts.dtypes)
print(df_ts.head())
# --- FIN FIX ---

# Conectar a Hopsworks y al Feature Store
from src.inference import conectar_hopsworks_feature_store
import src.config as config
proyecto, feature_store = conectar_hopsworks_feature_store()
feature_group = feature_store.get_feature_group(
    name=config.FEATURE_GROUP_NAME,
    version=config.FEATURE_GROUP_VERSION,
)

# Leer semanas ya presentes en el Feature Group
df_hopsworks = feature_group.read()
semanas_existentes = set(df_hopsworks['week_start'].values)

# Filtrar solo las filas de la nueva semana que no estén ya en Hopsworks
nuevas_filas = df_ts[~df_ts['week_start'].isin(semanas_existentes)]
if nuevas_filas.empty:
    print("La semana ya existe en el Feature Group. No se inserta nada.")
else:
    print(f"Insertando {len(nuevas_filas)} filas nuevas en el Feature Group.")
    feature_group.insert(nuevas_filas, write_options={'wait_for_job': True})
    print("Inserción incremental realizada correctamente.")

Semana a insertar: 2025-09-15 00:00:00
year                       int64
week                       int64
familia           string[python]
base_imponible           float64
is_summer_peak             int32
is_easter                  int64
dias_semana                int64
week_start        datetime64[ns]
dtype: object
   year  week   familia  base_imponible  is_summer_peak  is_easter  \
0  2025    38  BOLLERIA          734.32               0          0   

   dias_semana week_start  
0            7 2025-09-15  
2025-09-28 20:32:17,970 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-09-28 20:32:17,972 INFO: Initializing external client
2025-09-28 20:32:17,972 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-09-28 20:32:17,970 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-09-28 20:32:17,972 INFO: Initializing external client
2025-09-28 20:32:17,972 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-09-28 20:32:19,296 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.62s) 
Insertando 1 filas nuevas en el Feature Group.
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.62s) 
Insertando 1 filas nuevas en el Feature Group.


Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:00 | Remaining Time: 00:00



Launching job: times_series_bolleria_feature_group_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1242272/jobs/named/times_series_bolleria_feature_group_1_offline_fg_materialization/executions
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1242272/jobs/named/times_series_bolleria_feature_group_1_offline_fg_materialization/executions
2025-09-28 20:32:37,328 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2025-09-28 20:32:37,328 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2025-09-28 20:32:40,600 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-09-28 20:32:40,600 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-09-28 20:32:43,820 INFO: Waiting for execution to finish. Current state: RUNNING. Fin

In [None]:
# Actualizar el Feature View de histórico tras insertar los datos en el Feature Group
from src.inference import conectar_hopsworks_feature_store
import src.config as config

# --- FUNCIÓN LOCAL: update_feature_view ---
def update_feature_view(feature_store, fg_metadata, fv_metadata):
    """
    Obtiene (o crea si no existe) el Feature View de histórico en Hopsworks.
    fg_metadata: dict con 'name' y 'version' del Feature Group
    fv_metadata: dict con 'name', 'version', 'description', 'labels', 'query' opcional
    """
    try:
        fg = feature_store.get_feature_group(
            name=fg_metadata['name'],
            version=fg_metadata['version']
        )
        print(f"Feature Group encontrado: {fg_metadata['name']} v{fg_metadata['version']}")
    except Exception as e:
        raise Exception(f"No se pudo obtener el Feature Group: {e}")

    # Si se pasa query personalizada, usarla; si no, usar fg.select_all()
    if 'query' in fv_metadata and fv_metadata['query'] is not None:
        query = fv_metadata['query']
    else:
        query = fg.select_all()

    try:
        # Intentar obtener el Feature View existente
        fv = feature_store.get_feature_view(
            name=fv_metadata['name'],
            version=fv_metadata['version']
        )
        print(f"Feature View encontrado: {fv_metadata['name']} v{fv_metadata['version']}. Ya existe, usando el existente.")
        print("Feature View verificado correctamente.")
    except Exception:
        # Si no existe, crear uno nuevo
        print(f"Feature View {fv_metadata['name']} v{fv_metadata['version']} no existe. Creando uno nuevo...")
        try:
            fv = feature_store.create_feature_view(
                name=fv_metadata['name'],
                version=fv_metadata['version'],
                description=fv_metadata.get('description', ''),
                labels=fv_metadata.get('labels', []),
                query=query
            )
            print("Feature View creado correctamente.")
        except Exception as create_error:
            # Si el error es que ya existe, obtenerlo
            if "already exists" in str(create_error):
                print("Feature View ya existe, obteniendo existente...")
                fv = feature_store.get_feature_view(
                    name=fv_metadata['name'],
                    version=fv_metadata['version']
                )
                print("Feature View obtenido correctamente.")
            else:
                raise create_error

# Conexión a Hopsworks
proyecto, feature_store = conectar_hopsworks_feature_store()
update_feature_view(
    feature_store,
    config.HISTORICAL_FEATURE_GROUP_METADATA,
    config.HISTORICAL_FEATURE_VIEW_METADATA
 )
print("Feature View de histórico actualizado correctamente.")

2025-09-28 20:34:42,701 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-09-28 20:34:42,704 INFO: Initializing external client
2025-09-28 20:34:42,705 INFO: Base URL: https://c.app.hopsworks.ai:443
Connection closed.
2025-09-28 20:34:42,704 INFO: Initializing external client
2025-09-28 20:34:42,705 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-09-28 20:34:43,911 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272




Feature View 'times_series_bolleria_feature_view' eliminado correctamente.
Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1242272/fs/1224799/fv/times_series_bolleria_feature_view/version/1
Feature View 'times_series_bolleria_feature_view' creado y actualizado correctamente.
Feature View de histórico actualizado correctamente.
Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1242272/fs/1224799/fv/times_series_bolleria_feature_view/version/1
Feature View 'times_series_bolleria_feature_view' creado y actualizado correctamente.
Feature View de histórico actualizado correctamente.


FIN