# Experiment Tracking com MLflow

Este notebook demonstra como usar o MLflow para tracking de experimentos com nosso modelo mockado de recomendação.

In [1]:

import os
import sys
import pandas as pd
import mlflow

# Adiciona o diretório src ao PYTHONPATH
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../../'))
if project_dir not in sys.path:
    sys.path.append(project_dir)

from src.config import configure_mlflow, get_config, CONFIG
from src.recommendation_model.mocked_model import MockedRecommender, MLflowWrapper
from src.recommendation_model.lgbm_ranker import LightGBMRanker
from src.features.schemas import get_model_signature, create_mock_input_example
from src.evaluation.utils import evaluate_model
from src.train.utils import load_train_data
from src.train.core import log_model_to_mlflow, log_basic_metrics, get_run_name
from src.data.data_loader import get_evaluation_data

2025-03-01 17:11:33,157 - src.config - INFO - config.py - Ambiente: dev
2025-03-01 17:11:33,871 - config - INFO - config.py - Ambiente: dev


In [2]:
CONFIG

{'MLFLOW_TRACKING_URI': 'http://localhost:5001',
 'MLFLOW_REGISTRY_URI': 'http://localhost:5001',
 'EXPERIMENT': 'news-recommendation-dev',
 'MODEL_NAME': 'news-recommender-dev',
 'API_HOST': '0.0.0.0',
 'API_PORT': 8000,
 'MODEL_ALIAS': 'champion',
 'USE_S3': False,
 'S3_BUCKET': 'fiap-mleng-datathon-data-grupo57',
 'DATA_PATH': 'data/',
 'COLD_START_THRESHOLD': 5,
 'SAMPLE_RATE': 0.1,
 'SCALING_RANGE': 100,
 'NEWS_DIRECTORY': 'challenge-webmedia-e-globo-2023/itens/itens',
 'USERS_DIRECTORY': 'challenge-webmedia-e-globo-2023/files/treino',
 'MODEL_PARAMS': {'threshold': 0.5, 'top_k': 5}}

## 1. Configuração do MLflow

In [3]:
# Configura o MLflow
configure_mlflow()

In [4]:
model_params = get_config("MODEL_PARAMS", {})
model_params

{'threshold': 0.5, 'top_k': 5}

## 2. Treinamento e Tracking

In [6]:
model_name = get_config("MODEL_NAME", "news-recommender")
X_train, y_train = load_train_data()
eval_data = get_evaluation_data()
run_name = get_run_name(model_name)


2025-03-01 17:14:00,514 - config - INFO - factory.py - Inicializando armazenamento local
2025-03-01 17:14:00,515 - src.config - INFO - utils.py - Carregando X_train de data/train/X_train.parquet...


FileNotFoundError: [Errno 2] No such file or directory: 'data/train/X_train.parquet'

In [None]:


with mlflow.start_run(run_name=run_name) as run:
    model = LightGBMRanker(**model_params)
    model.train(X_train, y_train)
    mlflow.log_params(model_params)
    metrics = evaluate_model(model, eval_data)
    log_basic_metrics(X_train, metrics)
    log_model_to_mlflow(model, model_name, run.info.run_id)
    print(f"Modelo treinado. Run ID: {run.info.run_id}")

## 3. Carregamento e Predição

In [6]:
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/{get_config('MODEL_NAME')}")

# Faz predições com o modelo carregado
predictions = loaded_model.predict(input_example)
print(f"Predictions: {predictions}")

# Teste com o modelo original também
original_predictions = model.predict(input_example)
print(f"Original predictions: {original_predictions}")

Predictions: [0.5]
Original predictions: [0.5]
