In [17]:
# Librerías e importaciones
import pandas as pd
import numpy as np
import joblib
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

In [18]:
# Cargar dataset procesado
df = pd.read_csv('../data/raw/bank_marketing.csv')
df['Class'] = df['Class'].map({'no': 0, 'yes': 1, 1: 0, 2: 1, 0: 0})
df = df.loc[~df['Class'].isnull(), :]
y = df['Class']
X = df.drop('Class', axis=1)

In [19]:
# Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [20]:
# Codificar variables categóricas
X_train_enc = pd.get_dummies(X_train)
X_test_enc = pd.get_dummies(X_test)
X_test_enc = X_test_enc.reindex(columns=X_train_enc.columns, fill_value=0)

In [21]:
# Entrenar y guardar Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_enc, y_train)
import os
os.makedirs('../models', exist_ok=True)
joblib.dump(rf, '../models/random_forest.pkl')

['../models/random_forest.pkl']

In [22]:
# Entrenar y guardar XGBoost
xgb_model = xgb.XGBClassifier(eval_metric='logloss', use_label_encoder=False, random_state=42)
params = {
    'max_depth': [3, 5, 7],
    'n_estimators': [100, 300],
    'learning_rate': [0.01, 0.1]
}
grid = GridSearchCV(xgb_model, params, cv=3, scoring='roc_auc', n_jobs=-1)
grid.fit(X_train_enc, y_train)
joblib.dump(grid.best_estimator_, '../models/xgb_best.pkl')
joblib.dump(X_train_enc.columns.tolist(), '../models/xgb_columns.pkl')

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


['../models/xgb_columns.pkl']

In [24]:
# Ejemplo de predicción directa con XGBoost
model = joblib.load('../models/xgb_best.pkl')
columns = joblib.load('../models/xgb_columns.pkl')
ejemplo = {
    'age': 35,
    'job': 'admin.',
    'marital': 'married',
    'education': 'secondary',
    'default': 'no',
    'balance': 1000,
    'housing': 'yes',
    'loan': 'no',
    'contact': 'cellular',
    'day': 15,
    'month': 'may',
    'duration': 300,
    'campaign': 2,
    'pdays': 999,
    'previous': 0,
    'poutcome': 'unknown'
}
data = pd.DataFrame([ejemplo])
data_enc = pd.get_dummies(data)
data_enc = data_enc.reindex(columns=columns, fill_value=0)
pred = int(model.predict(data_enc)[0])
print('Predicción:', pred)

Predicción: 0
