In [4]:
# --------------------
# MONTAJE DE DRIVE Y DIRECTORIO DEL PROYECTO
# --------------------
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Proyecto-IA-DeteccionDeFraudeEnTransacciones

# --------------------
# IMPORTS GENERALES
# --------------------
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Añadir src al path para importar utils y visualization
sys.path.append('/content/drive/MyDrive/Proyecto-IA-DeteccionDeFraudeEnTransacciones/src')
from utils import load_data, save_model
from visualization import save_plot

# Directorios principales
data_raw = "data/"
data_processed = "data/processed"
models_dir = "models/trained_models"
os.makedirs(data_processed, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

print("Setup completo ✅")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Proyecto-IA-DeteccionDeFraudeEnTransacciones
Setup completo ✅


In [5]:
# --------------------
# CARGAR DATA LIMPIA
# --------------------
df = load_data(os.path.join(data_processed, "creditcard_clean.csv"))

# Separar X e y
X = df.drop("Class", axis=1)
y = df["Class"]

# Split train/test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# --------------------
# ENTRENAR RANDOM FOREST
# --------------------
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(
    n_estimators=100,
    class_weight='balanced',
    random_state=42
)
rf_model.fit(X_train, y_train)
save_model(rf_model, "rf_model.pkl", models_dir)
print("RandomForest entrenado y guardado ✅")
print(rf_model.get_params())

# --------------------
# ENTRENAR XGBOOST
# --------------------
from xgboost import XGBClassifier

xgb_model = XGBClassifier(
    n_estimators=100,
    scale_pos_weight=y_train.value_counts()[0]/y_train.value_counts()[1],
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)
xgb_model.fit(X_train, y_train)
save_model(xgb_model, "xgb_model.pkl", models_dir)
print("XGBoost entrenado y guardado ✅")
print(xgb_model.get_params())


Modelo guardado en models/trained_models/rf_model.pkl
RandomForest entrenado y guardado ✅
{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Modelo guardado en models/trained_models/xgb_model.pkl
XGBoost entrenado y guardado ✅
{'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': 'logloss', 'feature_types': None, 'feature_weights': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': None, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': None, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': 100, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': 42, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': np.float64(599.02416918429), 'subsample': None, 'tree_method': None, 'validate_pa