# 04_inference.ipynb

Creado solo para cargar artifacts, aplicar el modelo calibrado y generar scores para el dataset cargado con metricas de negocio. Solo un check antes de pasar mi codigo a repo para darle un production-like structure en executables.

In [2]:
import json, joblib, numpy as np, pandas as pd
from pathlib import Path

In [4]:
# --- Load/create all we need ---
ART_DIR = Path('.')
MODEL_PREFIT = ART_DIR/'model_best_prefit.pkl'
MODEL_CAL = ART_DIR/'model_best_calibrated.pkl'
THR_JSON = ART_DIR/'threshold.json'
TARGET = 'fraude'

best_est = joblib.load(MODEL_PREFIT)
cal = joblib.load(MODEL_CAL)
thr_info = json.load(open(THR_JSON))
THR = float(thr_info['thr'])
print('Loaded threshold =', THR)

def align_features(df, ref_cols):
    return df.reindex(columns=ref_cols, fill_value=0)

FEATURES = list(best_est.feature_names_in_)

Loaded threshold = 0.16


In [6]:
# Batch scoring demo
inp = pd.read_csv('./test_processed.csv')
X = align_features(inp.drop(columns=[TARGET, 'monto_raw']), FEATURES)
amt = inp['monto_raw'].astype(float).values

prob = cal.predict_proba(X)[:,1]
pred = (prob >= THR).astype(int)

def profit_total(y_true, y_prob, amount, thr):
    approve = (y_prob < thr)
    legit, fraud = (y_true==0), (y_true==1)
    gain = 0.25 * amount[approve & legit].sum()
    loss = 1.0 * amount[approve & fraud].sum()
    return float(gain - loss)

profit_at_thr = profit_total(inp[TARGET].values, prob, amt, THR)

out = inp.copy()
out['p_fraud'] = prob
out['decision'] = np.where(out['p_fraud'] >= THR, 'BLOCK', 'APPROVE')
out.to_csv('./inference_output.csv', index=False)
print({'n': len(out), 'thr': THR, 'profit_at_thr': profit_at_thr})
out.head()

{'n': 22500, 'thr': 0.16, 'profit_at_thr': 165145.315}


Unnamed: 0,a,b,c,d,e,f,h,k,l,m,n,p,score,g_freq,j_te,monto_log1p,monto_raw,fraude,p_fraud,decision
0,0.390435,0.472249,-0.289989,-0.582729,0.031695,-0.057705,-0.297,0.333898,-1.015449,-0.726569,0.328568,-1.116497,-0.4862,0.579626,0.061217,-1.064098,6.29,0,0.023171,APPROVE
1,0.390435,0.770656,-0.288752,1.414357,0.061766,-0.053969,-0.297,-0.661089,-0.957625,0.950013,0.328568,0.895658,-1.624445,0.579626,-0.352349,-0.991876,6.87,0,0.025673,APPROVE
2,0.390435,-0.458404,-0.294786,1.414357,-0.044037,-0.015367,-1.002455,-0.289558,0.131675,1.50263,0.328568,0.895658,0.514075,0.579626,-1.15868,0.147059,25.32,0,0.018296,APPROVE
3,0.390435,0.947811,-0.284564,0.016397,-0.079153,-0.065176,0.337909,0.397301,-1.272442,-0.626661,0.328568,-1.116497,0.445091,0.579626,-0.301075,0.240162,28.05,0,0.008117,APPROVE
4,0.390435,0.708455,-0.270002,0.515668,-0.079153,0.205038,0.479,1.578414,1.800377,-0.264494,0.328568,0.895658,-1.141554,0.579626,-0.253644,1.475272,106.58,0,0.005882,APPROVE


In [14]:
# --- Single-row prediction helper ---
# --- Single-row prediction helper (robusto a columnas faltantes) ---
def _to_feature_df(row_dict: dict):
    """
    Convierte un dict a DataFrame de features alineado con el entrenamiento.
    - Agrega columnas faltantes con 0
    - Descarta columnas no usadas (TARGET, monto_raw si vinieran)
    """
    df = pd.DataFrame([row_dict])
    # por si el dict incluye columnas no-features:
    for col in [TARGET, "monto_raw"]:
        if col in df.columns:
            df = df.drop(columns=[col])
    # Alinear contra FEATURES sin exigir que existan todas
    X = df.reindex(columns=FEATURES, fill_value=0)
    return X

def predict_one(row_dict: dict):
    """
    Ejecuta inferencia sobre un registro individual usando el modelo calibrado.
    Retorna probabilidad de fraude y decisión de negocio según el umbral THR.
    """
    X = _to_feature_df(row_dict)
    p = float(cal.predict_proba(X)[:, 1][0])
    return {"p_fraud": p, "decision": "BLOCK" if p >= THR else "APPROVE"}
    
# --- Dummy example (registro ficticio) ---
dummy_tx = {
    "score": 520,
    "monto": 750.0,
    "a": 2,
    "b": 1,
    "c": 0,
    "d": 5,
    "h": 0,
    "j_te": 0.12,
    "g_freq": 0.05,
    "k": 1,
    "l": 0,
    "m": 2,
    "n": 1,
    "monto_log1p": np.log1p(750.0)
}

predict_one(dummy_tx)

{'p_fraud': 0.04478566721081734, 'decision': 'APPROVE'}