In [103]:
import sys
from pathlib import Path
from datetime import datetime

# Añade src al path para importar los módulos
sys.path.append(str(Path().resolve().parent / 'src'))
import config

In [104]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [105]:
import hopsworks

# Conexión a hopsworks y proyecto   
project = hopsworks.login(project=config.HOPSWORKS_PROJECT_NAME, api_key_value=config.HOPSWORKS_API_KEY)
feature_store = project.get_feature_store()


print("Conectado a proyecto:", project.name)
print("Conectado al feature store:", feature_store.name)


2025-08-24 13:10:10,982 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-08-24 13:10:10,986 INFO: Initializing external client
2025-08-24 13:10:10,987 INFO: Base URL: https://c.app.hopsworks.ai:443
Connection closed.
2025-08-24 13:10:10,986 INFO: Initializing external client
2025-08-24 13:10:10,987 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-08-24 13:10:12,048 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272
Conectado a proyecto: fleca_mlops
Conectado al feature store: fleca_mlops_featurestore

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1242272
Conectado a proyecto: fleca_mlops
Conectado al feature store: fleca_mlops_featurestore


In [106]:
from datetime import timedelta
import pandas as pd

FG_NAME = "times_series_bolleria_feature_group"
FV_NAME = "times_series_bolleria_feature_view"
FV_VERSION = 1

# Lectura batch cruda de feature views
feature_views= feature_store.get_feature_view(name=FV_NAME, version=FV_VERSION)
ts_df = feature_views.get_batch_data(
    start_date=None,
    end_date=None,
    features=None,
    )

# Reseteo de índice y ordenación por la columna de fecha antes de visualizar
ts_df = ts_df.sort_values('week_start').reset_index(drop=True)
ts_df.head(5)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.72s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.72s) 


Unnamed: 0,familia,base_imponible,is_summer_peak,is_easter,week_start
0,BOLLERIA,825.11,0,0,2023-01-02 00:00:00+00:00
1,BOLLERIA,658.4,0,0,2023-01-09 00:00:00+00:00
2,BOLLERIA,741.4,0,0,2023-01-16 00:00:00+00:00
3,BOLLERIA,653.64,0,0,2023-01-23 00:00:00+00:00
4,BOLLERIA,680.46,0,0,2023-01-30 00:00:00+00:00


In [107]:
from src.model import transformar_features_target

# Procesar datos usando la función mejorada que acepta tuplas directamente
ts_df = transformar_features_target(
    ts_df,
    lags_list=[1, 2, 3, 52], 
    columna_target='base_imponible',
    cols_exogenas=['is_easter', 'is_summer_peak'],
    periodos_adelante=1,
    eliminar_nulos=True,
    return_format='dataframe'  # Obtenemos un único DataFrame con features y target
)

# Resetear el índice
ts_df = ts_df.reset_index(drop=True)

# Mostrar las primeras filas del DataFrame transformado
ts_df.head(5)

2025-08-24 13:10:26,097 INFO: Usando DataFrame de entrada: (133, 5)
2025-08-24 13:10:26,100 INFO: Retornando DataFrame combinado: (80, 8)
2025-08-24 13:10:26,100 INFO: Retornando DataFrame combinado: (80, 8)


Unnamed: 0,base_imponible_lag1,base_imponible_lag2,base_imponible_lag3,base_imponible_lag52,is_easter,is_summer_peak,week_start,target
0,572.51,534.79,563.18,825.11,0,0,2024-01-15 00:00:00+00:00,680.3
1,597.65,572.51,534.79,658.4,0,0,2024-01-22 00:00:00+00:00,603.99
2,680.3,597.65,572.51,741.4,0,0,2024-01-29 00:00:00+00:00,600.14
3,603.99,680.3,597.65,653.64,0,0,2024-02-05 00:00:00+00:00,689.32
4,600.14,603.99,680.3,680.46,0,0,2024-02-12 00:00:00+00:00,627.76


In [108]:
# Cargamos el modelo para predecir
import mlflow.pyfunc

MODEL_NAME = 'RandomForest'
MODEL_VERSION = 1 # Debe ser un entero, no un string

model = mlflow.pyfunc.load_model(model_uri=f'models:/{MODEL_NAME}/{MODEL_VERSION}')
print('Modelo cargado:', model)


Modelo cargado: mlflow.pyfunc.loaded_model:
  artifact_path: file:///C:/Workspace/mlops_fleca_project/mlruns/5/models/m-ba35a128968c4b91a5792e5e52edc8b2/artifacts
  flavor: mlflow.sklearn
  run_id: 2e4c1ecb6ff34f778d1ee9df56566402



In [109]:
# Predicción directa para la próxima semana (sin autoregresión)
from datetime import timedelta

print("=== PREDICCIÓN DIRECTA DE LA PRÓXIMA SEMANA ===")

# Selecciona las columnas de features (excluye las que no son usadas por el modelo)
feature_names = [col for col in ts_df.columns if col not in ['week_start', 'target']]

# Tomar la última fila y preparar los features para la próxima semana
ultimo_lunes = ts_df['week_start'].max()
last_row = ts_df.iloc[-1].copy()
fecha_siguiente = ultimo_lunes + timedelta(days=7)
last_row['week_start'] = fecha_siguiente

# Usar DataFrame para evitar el warning de nombres de features
X = pd.DataFrame([last_row[feature_names].values], columns=feature_names)
prediccion = model.predict(X)[0]

print(f"Fecha predicha: {fecha_siguiente.date()}")
print(f"Predicción base_imponible: {prediccion:.2f}")

prediccion = pd.DataFrame({
    'week_start': [fecha_siguiente],
    'predicted_base_imponible': [prediccion]
})
print(prediccion)

=== PREDICCIÓN DIRECTA DE LA PRÓXIMA SEMANA ===
Fecha predicha: 2025-08-11
Predicción base_imponible: 1656.61
                 week_start  predicted_base_imponible
0 2025-08-11 00:00:00+00:00                 1656.6125


In [None]:
if mlflow.active_run():
    mlflow.end_run()
    
# Crear o seleccionar experimento
from mlflow.tracking import MlflowClient
client = MlflowClient()
experiment_name = "fleca_bolleria_inference"
experiment = client.get_experiment_by_name(experiment_name)
if experiment is None:
    experiment_id = mlflow.create_experiment(experiment_name)
else:
    experiment_id = experiment.experiment_id
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='file:///C:/Workspace/mlops_fleca_project/mlruns/6', creation_time=1756034135167, experiment_id='6', last_update_time=1756034135167, lifecycle_stage='active', name='fleca_bolleria_inference', tags={}>

In [113]:
import mlflow
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("fleca_bolleria_inference")  

# Guardar solo la predicción y loguear como artefacto en MLflow
output_path = Path("predicciones_batch.csv")
df_predicciones = pd.DataFrame(prediccion)
df_predicciones.to_csv(output_path, index=False)
print(f'Predicciones guardadas en {output_path}')

with mlflow.start_run(run_name="Batch Inference"):
    mlflow.log_artifact(output_path)



Predicciones guardadas en predicciones_batch.csv
🏃 View run Batch Inference at: http://127.0.0.1:5000/#/experiments/6/runs/5079dc5f60824d32b19e84c3464ae4f2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/6


In [114]:
# Run de inferencia
# Iniciar run de inferencia

if mlflow.active_run():
    mlflow.end_run()

with mlflow.start_run(run_name="Batch Inference"):
    # Cargar modelo desde Model Registry
    model_uri = f"models:/{MODEL_NAME}/{MODEL_VERSION}"  # Usa variables para nombre y versión
    model = mlflow.pyfunc.load_model(model_uri)
    mlflow.log_param("model_name", MODEL_NAME)
    mlflow.log_param("model_version", MODEL_VERSION)

    print(f"Modelo '{MODEL_NAME}' version '{MODEL_VERSION}' cargado desde el Model Registry de MLflow.")

    # Predecir
    X = ts_df[feature_names]  # Usa DataFrame de features
    prediccion = model.predict(X)

    # Guardar predicciones
    df_predicciones = pd.DataFrame(prediccion, columns=['predicted_base_imponible'])
    df_output = pd.concat([ts_df[['week_start']].reset_index(drop=True), df_predicciones], axis=1)
    df_output.to_csv(output_path, index=False)
    print(f'Predicciones guardadas en {output_path}')

    # Logging de resultado
    mlflow.log_params({
        "fecha_prediccion": datetime.now().isoformat(),
        "num_predicciones": len(df_output)
    })
    mlflow.log_artifact(str(output_path))

    print("Proceso de inferencia trackeado correctamente desde MLflow.")


Modelo 'RandomForest' version '1' cargado desde el Model Registry de MLflow.
Predicciones guardadas en predicciones_batch.csv
Proceso de inferencia trackeado correctamente desde MLflow.
🏃 View run Batch Inference at: http://127.0.0.1:5000/#/experiments/6/runs/100aa03055494acd86621619071d1801
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/6
