In [1]:
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import JSONResponse, StreamingResponse
# main.py


In [2]:
import pandas as pd
import numpy as np
import joblib, yaml
from pathlib import Path
from sklearn.metrics import (
    f1_score, balanced_accuracy_score, roc_auc_score,
    average_precision_score, confusion_matrix
)

# ---------- Utilidades ----------
REQUIRED = ["Issue","Sub-issue","Product","Sub-product","Company","State","Date received"]

def ensure_columns(df: pd.DataFrame):
    missing = [c for c in REQUIRED if c not in df.columns]
    return missing

def scores_from_model(model, X: pd.DataFrame) -> np.ndarray:
    if hasattr(model, "predict_proba"):
        return model.predict_proba(X)[:,1]
    if hasattr(model, "decision_function"):
        s = model.decision_function(X); smin, smax = float(np.min(s)), float(np.max(s))
        return (s - smin)/(smax - smin + 1e-9)
    return model.predict(X).astype(float)

# ---------- Configuración ----------
app = FastAPI(title="API de Scoring Timely Response")

repo_root = Path(__file__).resolve().parents[0]
default_model = repo_root/"models"/"trained_model.pkl"
default_cfg   = repo_root/"models"/"model_config.yaml"

# Cargar modelo por defecto
model, thr_default, model_name = None, 0.5, "desconocido"
try:
    model = joblib.load(default_model)
    if default_cfg.exists():
        cfg = yaml.safe_load(default_cfg.read_text(encoding="utf-8"))
        thr_default = float(cfg.get("threshold", 0.5))
        model_name = str(cfg.get("model", model_name))
except Exception as e:
    print(f"⚠️ Error cargando modelo: {e}")

# ---------- Endpoint principal ----------
@app.post("/predict")
async def predict(file: UploadFile = File(...), threshold: float = Form(thr_default)):
    """
    Recibe un CSV, devuelve predicciones, KPIs y matriz de confusión (si hay etiqueta real).
    """
    # Leer CSV
    df = pd.read_csv(file.file, low_memory=False)

    # Validar columnas mínimas
    missing = ensure_columns(df)
    if missing:
        return JSONResponse(status_code=400, content={"error": f"Faltan columnas: {missing}"})

    # Hacer scoring
    scores = scores_from_model(model, df)
    pred = (scores >= threshold).astype(int)

    # Preparar resultados
    df_out = df.copy()
    df_out["score"] = scores
    df_out["pred"] = pred

    response = {
        "modelo": model_name,
        "umbral": threshold,
        "filas": int(len(df_out)),
        "score_medio": float(np.mean(scores)),
        "rate_predicho_yes": float((pred==1).mean()),
        "resultados": df_out.head(50).to_dict(orient="records")  # muestra primeras 50 filas
    }

    # Si hay columna ground truth, calcular métricas
    if "Timely response?" in df.columns:
        y_true = df["Timely response?"].astype("string").str.strip().str.lower().map(
            {"yes":1,"y":1,"true":1,"no":0,"n":0,"false":0}
        )
        mask = y_true.notna()
        if mask.any():
            y = y_true[mask].astype(int).to_numpy()
            y_pred = pred[mask]

            response["metricas"] = {
                "BalancedAccuracy": float(balanced_accuracy_score(y, y_pred)),
                "F1_No": float(f1_score((y==0).astype(int), (y_pred==0).astype(int))),
                "F1_Yes": float(f1_score(y, y_pred)),
                "ROC_AUC": float(roc_auc_score(y, scores[mask])),
                "PR_AUC": float(average_precision_score(y, scores[mask])),
                "ConfusionMatrix": confusion_matrix(y, y_pred, labels=[0,1]).tolist()
            }
        else:
            response["metricas"] = {"info": "La columna 'Timely response?' no contiene valores válidos."}

    return response


NameError: name '__file__' is not defined