# 1. Submuestreo de la Clase Mayoritaria (Undersampling)

Reduce la cantidad de ejemplos de la clase mayoritaria para balancear el dataset.

In [None]:
from imblearn.under_sampling import RandomUnderSampler
import pandas as pd

# Simulación de datos
df = pd.DataFrame({
    'feature1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
    'churn': [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1]  # Clase desbalanceada
})

X = df.drop(columns=['churn'])
y = df['churn']

# Aplicar undersampling
undersampler = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = undersampler.fit_resample(X, y)

print(y_resampled.value_counts())  # Ver distribución balanceada


churn
0    6
1    6
Name: count, dtype: int64


# 2. Sobremuestreo de la Clase Minoritaria (Oversampling)

In [None]:
from imblearn.over_sampling import RandomOverSampler

# Aplicar oversampling
oversampler = RandomOverSampler(random_state=42)
X_resampled, y_resampled = oversampler.fit_resample(X, y)

print(y_resampled.value_counts())  # Ver distribución balanceada

churn
0    11
1    11
Name: count, dtype: int64


# 3. SMOTE (Synthetic Minority Over-sampling Technique)

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

print(y_resampled.value_counts())  # Ver distribución balanceada

churn
0    11
1    11
Name: count, dtype: int64


# 4. Ponderación de Clases

In [None]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

# Definir las clases
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array([0, 1]),
    y=y
)

print(dict(enumerate(class_weights)))  # Muestra los pesos por clase


{0: 0.7727272727272727, 1: 1.4166666666666667}
