In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score
import joblib

In [2]:
# Cargar datos
df = pd.read_csv("data_limpio_umbral.csv") 
X = df.drop(columns="Bankrupt?")
y = df["Bankrupt?"]

In [3]:
# Separar en train/test con estratificación
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Definir el modelo base y el grid de hiperparámetros
svm = SVC(probability=True, class_weight='balanced', random_state=42)
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.01, 0.001],
    'kernel': ['rbf']
}

In [4]:
# Definir validación cruzada estratificada
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Buscar los mejores hiperparámetros
grid = GridSearchCV(
    estimator=svm,
    param_grid=param_grid,
    scoring='roc_auc',
    cv=cv,
    n_jobs=-1,
    verbose=1
)

grid.fit(X_train, y_train)
best_svm = grid.best_estimator_

Fitting 5 folds for each of 9 candidates, totalling 45 fits


In [5]:
# Evaluar en test set
y_pred = best_svm.predict(X_test)
y_proba = best_svm.predict_proba(X_test)[:, 1]

print("Mejores hiperparámetros:", grid.best_params_)
print("\nClassification Report en test:")
print(classification_report(y_test, y_pred))
print(f"ROC AUC: {roc_auc_score(y_test, y_proba):.4f}")

Mejores hiperparámetros: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}

Classification Report en test:
              precision    recall  f1-score   support

         0.0       0.99      0.85      0.92      1320
         1.0       0.17      0.86      0.28        44

    accuracy                           0.85      1364
   macro avg       0.58      0.86      0.60      1364
weighted avg       0.97      0.85      0.90      1364

ROC AUC: 0.9251


In [6]:
# Guardar el modelo entrenado
joblib.dump(best_svm, "model.pkl")

['model.pkl']