In [20]:
import sys
from pathlib import Path

# Añade el directorio raíz del proyecto al sys.path
project_root = Path().resolve().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

In [21]:
# 1. Imports

from datetime import datetime, timedelta
from src.data_utils import load_raw_data, transformar_a_series_temporales
from src import config
import hopsworks
import pandas as pd
import os

ModuleNotFoundError: No module named 'hopsworks'

In [52]:
# 2. Calcular la última semana completa (lunes a domingo)

hoy = datetime.utcnow()
print(f"Fecha actual: {hoy.strftime('%Y-%m-%d %A')}")

# Calcular cuántos días han pasado desde el lunes de esta semana
dias_desde_lunes = hoy.weekday()  # 0=lunes, 1=martes, ..., 6=domingo

# Ir al lunes de la semana pasada (última semana completa)
ultimo_lunes = (hoy - timedelta(days=dias_desde_lunes + 7)).replace(hour=0, minute=0, second=0, microsecond=0)
ultimo_domingo = ultimo_lunes + timedelta(days=6)

print(f"Última semana completa:")
print(f"  Lunes: {ultimo_lunes.strftime('%Y-%m-%d %A')}")
print(f"  Domingo: {ultimo_domingo.strftime('%Y-%m-%d %A')}")
print(f"  Rango: {ultimo_lunes.strftime('%Y-%m-%d')} a {ultimo_domingo.strftime('%Y-%m-%d')}")

Fecha actual: 2025-09-21 Sunday
Última semana completa:
  Lunes: 2025-09-08 Monday
  Domingo: 2025-09-14 Sunday
  Rango: 2025-09-08 a 2025-09-14


In [41]:
# 3. Descargar y cargar datos de BigQuery para esa semana
df = load_raw_data(
    fecha_inicio=ultimo_lunes.strftime('%Y-%m-%d'),
    fecha_fin=ultimo_domingo.strftime('%Y-%m-%d'),
    descargar_bq=True
)
print('Datos descargados:', df.shape)

Descargando datos desde BigQuery porque descargar_bq=True o no existe el archivo C:\Workspace\mlops_fleca_project\data\raw\raw_data_bq_forecasting.parquet
Iniciando conexión con BigQuery...
Conexión establecida.
Usando fechas en consulta SQL: fecha_inicio='2025-09-08' y fecha_fin='2025-09-14'
Descargando datos de fleca-del-port.fleca_ventas_dia.t_facturas_dia_extendida_2023 ...
Ejecutando consulta SQL:

    SELECT 
        fecha,
        n_factura,
        zona_de_venta,
        producto,
        familia,
        cantidad,
        base_imponible,
        tipo_IVA,
        total
    FROM `fleca-del-port.fleca_ventas_dia.t_facturas_dia_extendida_2023`
    WHERE fecha >= '2025-09-08' AND fecha <= '2025-09-14'
Conexión establecida.
Usando fechas en consulta SQL: fecha_inicio='2025-09-08' y fecha_fin='2025-09-14'
Descargando datos de fleca-del-port.fleca_ventas_dia.t_facturas_dia_extendida_2023 ...
Ejecutando consulta SQL:

    SELECT 
        fecha,
        n_factura,
        zona_de_venta



Filas descargadas de la segunda tabla: 2483
Guardando archivo en C:\Workspace\mlops_fleca_project\data\raw\raw_data_bq_forecasting_20250921.parquet ...
Archivo guardado correctamente.
Usando archivo recién generado: C:\Workspace\mlops_fleca_project\data\raw\raw_data_bq_forecasting_20250921.parquet
Cargando datos desde: C:\Workspace\mlops_fleca_project\data\raw\raw_data_bq_forecasting_20250921.parquet
Validando fechas entre 2025-09-08 y 2025-09-14 (7 días)
Total de fechas faltantes: 0
No faltan fechas en el rango especificado.
Datos descargados: (2483, 11)


In [42]:
# 4. Transformar a series temporales semanales solo para la familia BOLLERIA
df_ts = transformar_a_series_temporales(df, familia="BOLLERIA")
print('Series temporales generadas:', df_ts.shape)
print(df_ts.head())


Series temporales generadas: (1, 8)
   year  week   familia  base_imponible  is_summer_peak  is_easter  \
0  2025    37  BOLLERIA          969.84               0          0   

   dias_semana week_start  
0            7 2025-09-08  


In [43]:
# Eliminar columna 'fecha' si existe
if 'fecha' in df_ts.columns:
    df_ts = df_ts.drop(columns=['fecha'])

# Ajustar tipos para coincidir con el schema del Feature Group histórico
df_ts['year'] = df_ts['year'].astype('int64')  # bigint
df_ts['week'] = df_ts['week'].astype('int64')  # bigint
df_ts['familia'] = df_ts['familia'].astype('string')  # string
df_ts['base_imponible'] = df_ts['base_imponible'].astype('float64')  # double
df_ts['is_summer_peak'] = df_ts['is_summer_peak'].astype('int32')  # int
df_ts['is_easter'] = df_ts['is_easter'].astype('int64')  # bigint
df_ts['week_start'] = pd.to_datetime(df_ts['week_start'])  # timestamp

print(df_ts.dtypes)
print(df_ts.head())

year                       int64
week                       int64
familia           string[python]
base_imponible           float64
is_summer_peak             int32
is_easter                  int64
dias_semana                int64
week_start        datetime64[ns]
dtype: object
   year  week   familia  base_imponible  is_summer_peak  is_easter  \
0  2025    37  BOLLERIA          969.84               0          0   

   dias_semana week_start  
0            7 2025-09-08  


In [44]:
# 5. Conectar a hopsworks
project = hopsworks.login(
    api_key_value=config.HOPSWORKS_API_KEY, 
    project=config.HOPSWORKS_PROJECT_NAME)

# Conectar al feature store
feature_store = project.get_feature_store()

# Conectar al Feature Group histórico
try:
    feature_group = feature_store.get_feature_group(
        name=config.FEATURE_GROUP_NAME,
        version=config.FEATURE_GROUP_VERSION,
        
    )
    if feature_group is None:
        raise Exception("El Feature Group histórico no existe o el nombre/version no coinciden exactamente. Verifica en Hopsworks.")
except Exception as e:
    print(f"Error al crear/conectar el Feature Group: {e}")

2025-09-21 19:58:36,867 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-09-21 19:58:36,871 INFO: Initializing external client
2025-09-21 19:58:36,871 INFO: Base URL: https://c.app.hopsworks.ai:443
Connection closed.
2025-09-21 19:58:36,871 INFO: Initializing external client
2025-09-21 19:58:36,871 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-09-21 19:58:37,993 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272


In [5]:
# Añadir solo la nueva semana al Feature Group histórico (evitando duplicados)
from src.data_utils import transformar_a_series_temporales
import pandas as pd

# Supón que df contiene solo la última semana descargada
df_ts = transformar_a_series_temporales(df, familia="BOLLERIA")
ultima_semana = df_ts['week_start'].max()
print(f"Semana a insertar: {ultima_semana}")

# --- FIX: Conversión explícita de tipos antes del insert ---
# Eliminar columna 'fecha' si existe
if 'fecha' in df_ts.columns:
    df_ts = df_ts.drop(columns=['fecha'])
# Ajustar tipos para coincidir con el schema del Feature Group histórico
# (esto es CRÍTICO para evitar errores de tipo en Hopsworks)
df_ts['year'] = df_ts['year'].astype('int64')  # bigint
df_ts['week'] = df_ts['week'].astype('int64')  # bigint
df_ts['familia'] = df_ts['familia'].astype('string')  # string
df_ts['base_imponible'] = df_ts['base_imponible'].astype('float64')  # double
df_ts['is_summer_peak'] = df_ts['is_summer_peak'].astype('int32')  # int (¡IMPORTANTE!)
df_ts['is_easter'] = df_ts['is_easter'].astype('int64')  # bigint
df_ts['week_start'] = pd.to_datetime(df_ts['week_start'])  # timestamp
print(df_ts.dtypes)
print(df_ts.head())
# --- FIN FIX ---

# Conectar a Hopsworks y al Feature Store
from src.inference import conectar_hopsworks_feature_store
import src.config as config
proyecto, feature_store = conectar_hopsworks_feature_store()
feature_group = feature_store.get_feature_group(
    name=config.FEATURE_GROUP_NAME,
    version=config.FEATURE_GROUP_VERSION,
)

# Leer semanas ya presentes en el Feature Group
df_hopsworks = feature_group.read()
semanas_existentes = set(df_hopsworks['week_start'].values)

# Filtrar solo las filas de la nueva semana que no estén ya en Hopsworks
nuevas_filas = df_ts[~df_ts['week_start'].isin(semanas_existentes)]
if nuevas_filas.empty:
    print("La semana ya existe en el Feature Group. No se inserta nada.")
else:
    print(f"Insertando {len(nuevas_filas)} filas nuevas en el Feature Group.")
    feature_group.insert(nuevas_filas, write_options={'wait_for_job': True})
    print("Inserción incremental realizada correctamente.")

ModuleNotFoundError: No module named 'pandas'

In [46]:
# Actualizar el Feature View de histórico tras insertar los datos en el Feature Group
from src.inference import conectar_hopsworks_feature_store
from scripts.update_feature_view import update_feature_view
import src.config as config

# Conexión a Hopsworks
proyecto, feature_store = conectar_hopsworks_feature_store()
update_feature_view(
    feature_store,
    config.HISTORICAL_FEATURE_GROUP_METADATA,
    config.HISTORICAL_FEATURE_VIEW_METADATA
 )
print("Feature View de histórico actualizado correctamente.")

2025-09-21 19:58:44,192 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-09-21 19:58:44,195 INFO: Initializing external client
2025-09-21 19:58:44,196 INFO: Base URL: https://c.app.hopsworks.ai:443
Connection closed.
2025-09-21 19:58:44,195 INFO: Initializing external client
2025-09-21 19:58:44,196 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-09-21 19:58:45,246 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272




Feature View 'times_series_bolleria_feature_view' eliminado correctamente.
Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1242272/fs/1224799/fv/times_series_bolleria_feature_view/version/1
Feature View 'times_series_bolleria_feature_view' creado y actualizado correctamente.
Feature View de histórico actualizado correctamente.
Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1242272/fs/1224799/fv/times_series_bolleria_feature_view/version/1
Feature View 'times_series_bolleria_feature_view' creado y actualizado correctamente.
Feature View de histórico actualizado correctamente.


FIN

In [6]:
# Instalar pandas en el entorno del notebook gestionado por Poetry
!poetry run pip install pandas

Collecting pandas
  Downloading pandas-2.3.2-cp311-cp311-win_amd64.whl (11.3 MB)
     --------------------------------------- 11.3/11.3 MB 54.7 MB/s eta 0:00:00
Collecting numpy>=1.23.2
  Downloading numpy-2.3.3-cp311-cp311-win_amd64.whl (13.1 MB)
     --------------------------------------- 13.1/13.1 MB 54.4 MB/s eta 0:00:00
Collecting pytz>=2020.1
  Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Collecting tzdata>=2022.7
  Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, numpy, pandas
Successfully installed numpy-2.3.3 pandas-2.3.2 pytz-2025.2 tzdata-2025.2




In [9]:
# Instalar pyarrow en el entorno del notebook gestionado por Poetry
!poetry run pip install pyarrow

Collecting pyarrow
  Downloading pyarrow-21.0.0-cp311-cp311-win_amd64.whl (26.2 MB)
     --------------------------------------- 26.2/26.2 MB 46.9 MB/s eta 0:00:00
Installing collected packages: pyarrow
Successfully installed pyarrow-21.0.0




In [11]:
# Instalar google-cloud-bigquery en el entorno del notebook gestionado por Poetry
!poetry run pip install google-cloud-bigquery

Collecting google-cloud-bigquery
  Downloading google_cloud_bigquery-3.38.0-py3-none-any.whl (259 kB)
     -------------------------------------- 259.3/259.3 kB 5.3 MB/s eta 0:00:00
Collecting google-api-core[grpc]<3.0.0,>=2.11.1
  Downloading google_api_core-2.25.1-py3-none-any.whl (160 kB)
     -------------------------------------- 160.8/160.8 kB 9.4 MB/s eta 0:00:00
Collecting google-auth<3.0.0,>=2.14.1
  Downloading google_auth-2.40.3-py2.py3-none-any.whl (216 kB)
     ------------------------------------- 216.1/216.1 kB 12.9 MB/s eta 0:00:00
Collecting google-cloud-core<3.0.0,>=2.4.1
  Downloading google_cloud_core-2.4.3-py2.py3-none-any.whl (29 kB)
Collecting google-resumable-media<3.0.0,>=2.0.0
  Downloading google_resumable_media-2.7.2-py2.py3-none-any.whl (81 kB)
     ---------------------------------------- 81.3/81.3 kB 4.4 MB/s eta 0:00:00
Collecting requests<3.0.0,>=2.21.0
  Downloading requests-2.32.5-py3-none-any.whl (64 kB)
     ---------------------------------------- 



In [13]:
# Instalar python-dotenv en el entorno del notebook gestionado por Poetry
!poetry run pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.1




In [17]:
# Instalar hopsworks en el entorno del notebook gestionado por Poetry
!poetry run pip install hopsworks

Collecting hopsworks
  Downloading hopsworks-4.4.2-py3-none-any.whl (691 kB)
     -------------------------------------- 691.6/691.6 kB 8.7 MB/s eta 0:00:00
Collecting pyhumps==1.6.1
  Downloading pyhumps-1.6.1-py3-none-any.whl (5.0 kB)
Collecting furl
  Downloading furl-2.1.4-py2.py3-none-any.whl (27 kB)
Collecting boto3
  Downloading boto3-1.40.40-py3-none-any.whl (139 kB)
     ---------------------------------------- 139.3/139.3 kB ? eta 0:00:00
Collecting pandas[mysql]<2.3.0
  Downloading pandas-2.2.3-cp311-cp311-win_amd64.whl (11.6 MB)
     --------------------------------------- 11.6/11.6 MB 36.4 MB/s eta 0:00:00
Collecting numpy<2
  Downloading numpy-1.26.4-cp311-cp311-win_amd64.whl (15.8 MB)
     --------------------------------------- 15.8/15.8 MB 54.4 MB/s eta 0:00:00
Collecting pyjks
  Downloading pyjks-20.0.0-py2.py3-none-any.whl (45 kB)
     ---------------------------------------- 45.3/45.3 kB ? eta 0:00:00
Collecting mock
  Downloading mock-5.2.0-py3-none-any.whl (31 kB)

  DEPRECATION: twofish is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559
ERROR: Could not install packages due to an OSError: [WinError 5] Acceso denegado: 'C:\\Workspace\\mlops_fleca_project\\.venv\\lib\\site-packages\\google\\~upb\\_message.pyd'
Check the permissions.

