In [1]:
!pip install "numpy<2.0" "scikit-learn<1.4" "scipy<1.12" "imbalanced-learn<0.13"




In [4]:
# Se ainda n√£o instalou essas vers√µes no container, rode UMA vez:

# ============================================
# 0. IMPORTS
# ============================================
import os
import numpy as np
import pandas as pd

import requests
from mlflow.tracking import MlflowClient

from sqlalchemy import create_engine

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
)

from imblearn.over_sampling import SMOTE

import matplotlib.pyplot as plt

import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature


# ============================================
# 1. CONEX√ÉO COM POSTGRES (SEU "SNOWFLAKE" LOCAL)
# ============================================

DB_USER = os.getenv("DB_USER", "mluser")
DB_PASSWORD = os.getenv("DB_PASSWORD", "mlpass")
DB_HOST = os.getenv("DB_HOST", "postgres")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME", "mldb")

db_url = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(db_url)

print("Conectando no Postgres em:", db_url)

# ============================================
# 2. LER OS DADOS DA TABELA
# ============================================

TABLE_NAME = "dados_analise"

df = pd.read_sql(f'SELECT * FROM "{TABLE_NAME}"', engine)

print("Shape da base:", df.shape)
print("Colunas:", df.columns.tolist())

# ============================================
# 3. DEFINIR FEATURES E TARGET
# ============================================

TARGET_COL = "DEATH_EVENT"

if TARGET_COL not in df.columns:
    raise ValueError(
        f'A coluna alvo "{TARGET_COL}" n√£o existe na tabela. '
        f"Ajuste TARGET_COL para uma das colunas: {df.columns.tolist()}"
    )

df = df.dropna(subset=[TARGET_COL])

y = df[TARGET_COL]

# tira colunas que n√£o s√£o features (se n√£o existirem, ignora)
drop_cols = [TARGET_COL, "device_name", "ts", "id"]
drop_cols_present = [c for c in drop_cols if c in df.columns]
X = df.drop(columns=drop_cols_present)

print("Shape X:", X.shape)
print("Shape y:", y.shape)

# ============================================
# 4. TRAIN / TEST SPLIT
# ============================================

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=42,
    stratify=y
)

print("Treino:", X_train.shape, "Teste:", X_test.shape)

# ============================================
# 5. SCALER + SMOTE
# ============================================

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

smote = SMOTE(random_state=42)
X_train_smote_scaled, y_train_smote = smote.fit_resample(
    X_train_scaled, y_train
)

print("Ap√≥s SMOTE:")
(unique, counts) = np.unique(y_train_smote, return_counts=True)
print("Distribui√ß√£o de y_train_smote:", dict(zip(unique, counts)))

# ============================================
# 6. CONFIGURA√á√ÉO DO MLFLOW
# ============================================

MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://mlflow:5000")
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

EXPERIMENT_NAME = "meu_experimento_trendz"
mlflow.set_experiment(EXPERIMENT_NAME)

# garante que n√£o tem run antigo aberto
mlflow.end_run()

print("MLflow tracking URI:", mlflow.get_tracking_uri())
print("Experimento:", mlflow.get_experiment_by_name(EXPERIMENT_NAME))

# ============================================
# 7. TREINAR MODELO FINAL (RandomForest + SMOTE)
# ============================================

rf_final = RandomForestClassifier(
    n_estimators=200,
    max_depth=3,
    random_state=42,
    class_weight="balanced"
)

rf_final.fit(X_train_smote_scaled, y_train_smote)
y_pred = rf_final.predict(X_test_scaled)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, zero_division=0)
rec = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)

print("\n=== M√©tricas RF + SMOTE (modelo final) ===")
print("Accuracy :", acc)
print("Precision:", prec)
print("Recall   :", rec)
print("F1-score :", f1)

# ============================================
# 8. ENVIAR PARA O MLFLOW (PARAMS + M√âTRICAS +
#    CSV + GR√ÅFICOS + MODELO)
# ============================================

with mlflow.start_run(run_name="RF_SMOTE_modelo_final") as run:
    # -------- Par√¢metros --------
    mlflow.log_param("algoritmo", "RandomForestClassifier")
    mlflow.log_param("oversampling", "SMOTE")
    mlflow.log_param("n_estimators", rf_final.n_estimators)
    mlflow.log_param("max_depth", rf_final.max_depth)
    mlflow.log_param("class_weight", "balanced")
    mlflow.log_param("n_features", X_train_smote_scaled.shape[1])
    mlflow.log_param("tabela_origem", TABLE_NAME)
    mlflow.log_param("target_col", TARGET_COL)

    # -------- M√©tricas --------
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", prec)
    mlflow.log_metric("recall", rec)
    mlflow.log_metric("f1_score", f1)
    
    # -------- 8.0. CSV de avalia√ß√£o --------
    eval_df = pd.DataFrame({
        "y_true": y_test.values,
        "y_pred": y_pred
    })
    eval_dir = "eval"
    os.makedirs(eval_dir, exist_ok=True)
    eval_csv_path = os.path.join(eval_dir, "y_true_y_pred.csv")
    eval_df.to_csv(eval_csv_path, index=False)
    
    mlflow.log_artifact(eval_csv_path, artifact_path="tabelas")

    # -------- 8.1. Matriz de confus√£o (PNG) --------
    cm = confusion_matrix(y_test, y_pred)
    fig_cm, ax_cm = plt.subplots()
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(ax=ax_cm)
    ax_cm.set_title("Matriz de confus√£o")
    fig_cm.tight_layout()

    mlflow.log_figure(fig_cm, "plots/confusion_matrix.png")
    plt.close(fig_cm)

    # -------- 8.2. Import√¢ncia das features (PNG) --------
    feature_names = list(X.columns)
    importances = rf_final.feature_importances_
    indices = np.argsort(importances)[::-1]

    fig_imp, ax_imp = plt.subplots(figsize=(8, 5))
    ax_imp.bar(range(len(importances)), importances[indices])
    ax_imp.set_xticks(range(len(importances)))
    ax_imp.set_xticklabels(
        [feature_names[i] for i in indices],
        rotation=90
    )
    ax_imp.set_title("Import√¢ncia das features")
    ax_imp.set_ylabel("Import√¢ncia relativa")
    fig_imp.tight_layout()

    mlflow.log_figure(fig_imp, "plots/feature_importance.png")
    plt.close(fig_imp)
    
    # -------- 8.3. Modelo (artefato principal) --------
    signature = infer_signature(
        X_train_smote_scaled,
        rf_final.predict(X_train_smote_scaled)
    )

    mlflow.sklearn.log_model(
        rf_final,
        artifact_path="model",
        signature=signature,
        registered_model_name="meu_modelo_trendz"
    )

    print("\nRun ID:", run.info.run_id)

print("\n‚úÖ Modelo + m√©tricas + CSV + gr√°ficos enviados para o MLflow!")
print("Abra http://localhost:5000:")
print(" - Veja params/metrics na vis√£o do run;")
print(" - Em Artifacts: tabelas/, plots/ e model/.")
print("No Model Registry, promova `meu_modelo_trendz` para `Production` para o mlflow-serving usar.")



    # -------- 9. enviando para o tb -------
MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://mlflow:5000")
MODEL_NAME = os.getenv("MLFLOW_MODEL_NAME", "meu_modelo_trendz")

TB_BASE_URL = os.getenv("TB_BASE_URL", "http://thingsboard:9090")

TB_TOKEN = os.getenv("TB_METRICS_TOKEN", "7QQlaAIt0eWGk91MGhlN")

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
versions = client.search_model_versions(f"name='{MODEL_NAME}'")

for mv in sorted(versions, key=lambda v: int(v.version)):
    run_id = mv.run_id
    version = int(mv.version)

    run = client.get_run(run_id)
    metrics = run.data.metrics
    params = run.data.params

    acc = metrics.get("accuracy")
    prec = metrics.get("precision")
    rec = metrics.get("recall")
    f1 = metrics.get("f1_score")

    n_estimators = params.get("n_estimators")
    max_depth = params.get("max_depth")

    payload = {
        "version": version,
        "run_id": run_id,
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1_score": f1,
    }

    if n_estimators is not None:
        try:
            payload["n_estimators"] = int(n_estimators)
        except ValueError:
            payload["n_estimators"] = n_estimators

    if max_depth is not None:
        try:
            payload["max_depth"] = int(max_depth)
        except ValueError:
            payload["max_depth"] = max_depth

    payload = {k: v for k, v in payload.items() if v is not None}

    print(f"\nEnviando vers√£o {version} (run_id={run_id}) para ThingsBoard:")
    print(payload)

    try:
        resp = requests.post(
            f"{TB_BASE_URL}/api/v1/{TB_TOKEN}/telemetry",
            json=payload,
            timeout=5,
        )
        print("Resposta TB:", resp.status_code, resp.text)
    except Exception as e:
        print("Erro ao enviar para TB:", e)


Conectando no Postgres em: postgresql://mluser:mlpass@postgres:5432/mldb
Shape da base: (307, 13)
Colunas: ['age', 'anaemia', 'creatinine_phosphokinase', 'diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking', 'time', 'DEATH_EVENT']
Shape X: (307, 12)
Shape y: (307,)
Treino: (214, 12) Teste: (93, 12)
Ap√≥s SMOTE:
Distribui√ß√£o de y_train_smote: {0: 142, 1: 142}
MLflow tracking URI: http://mlflow:5000
Experimento: <Experiment: artifact_location='mlflow-artifacts:/2', creation_time=1764719532557, experiment_id='2', last_update_time=1764719532557, lifecycle_stage='active', name='meu_experimento_trendz', tags={}>

=== M√©tricas RF + SMOTE (modelo final) ===
Accuracy : 0.8602150537634409
Precision: 0.8275862068965517
Recall   : 0.75
F1-score : 0.7868852459016394


Registered model 'meu_modelo_trendz' already exists. Creating a new version of this model...
2025/12/03 13:57:18 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: meu_modelo_trendz, version 11
Created version '11' of model 'meu_modelo_trendz'.



Run ID: 2d5e082a032242bcb9bb8b1ef4ff3627
üèÉ View run RF_SMOTE_modelo_final at: http://mlflow:5000/#/experiments/2/runs/2d5e082a032242bcb9bb8b1ef4ff3627
üß™ View experiment at: http://mlflow:5000/#/experiments/2

‚úÖ Modelo + m√©tricas + CSV + gr√°ficos enviados para o MLflow!
Abra http://localhost:5000:
 - Veja params/metrics na vis√£o do run;
 - Em Artifacts: tabelas/, plots/ e model/.
No Model Registry, promova `meu_modelo_trendz` para `Production` para o mlflow-serving usar.

Enviando vers√£o 1 (run_id=1d6ee8b7f0ec4c228ae97fa85cbaaeab) para ThingsBoard:
{'version': 1, 'run_id': '1d6ee8b7f0ec4c228ae97fa85cbaaeab', 'accuracy': 0.8688524590163934, 'precision': 0.875, 'recall': 0.7, 'f1_score': 0.7777777777777777, 'n_estimators': 200, 'max_depth': 5}
Resposta TB: 200 

Enviando vers√£o 2 (run_id=212dc7f320f54a9da8a80333b5e56baf) para ThingsBoard:
{'version': 2, 'run_id': '212dc7f320f54a9da8a80333b5e56baf', 'accuracy': 0.8688524590163934, 'precision': 0.875, 'recall': 0.7, 'f1_score'