In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from catboost import CatBoostClassifier

# Cargar el dataset
file_path = 'C:/Users/Administrator/Documents/Proyecto seg clientes refuerzo/Proyecto_seg_clientes/Data/teleCust1000t_listo.csv'
data = pd.read_csv(file_path)

# Separar las características (X) y la variable objetivo (y)
X = data.drop('custcat', axis=1)
y = data['custcat']

# Dividir los datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Crear el pipeline
pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('smote', SMOTE(random_state=42)),
    ('classifier', CatBoostClassifier(
        iterations=500,
        learning_rate=0.001,
        depth=8,
        l2_leaf_reg=3,
        verbose=0,
        random_state=42
    ))
])
# Entrenar el modelo
pipeline.fit(X_train, y_train)

# Predicciones
y_pred = pipeline.predict(X_test)

# Calcular la exactitud
accuracy = accuracy_score(y_test, y_pred)
print(f"Exactitud del modelo: {accuracy:.2f}")

# Reporte de clasificación
print(classification_report(y_test, y_pred))


Exactitud del modelo: 0.38
              precision    recall  f1-score   support

           1       0.36      0.30      0.33        53
           2       0.36      0.30      0.33        44
           3       0.43      0.43      0.43        56
           4       0.36      0.49      0.41        47

    accuracy                           0.38       200
   macro avg       0.38      0.38      0.37       200
weighted avg       0.38      0.38      0.38       200



In [28]:
from collections import Counter
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

print("Distribución de clases antes del SMOTE:", Counter(y_train))
print("Distribución de clases después del SMOTE:", Counter(y_resampled))


Distribución de clases antes del SMOTE: Counter({3: 225, 1: 213, 4: 189, 2: 173})
Distribución de clases después del SMOTE: Counter({3: 225, 4: 225, 1: 225, 2: 225})


In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE

# Cargar el dataset
file_path = 'C:/Users/Administrator/Documents/Proyecto seg clientes refuerzo/Proyecto_seg_clientes/Data/teleCust1000t_listo.csv'
data = pd.read_csv(file_path)

# Separar las características (X) y la variable objetivo (y)
X = data.drop('custcat', axis=1)
y = data['custcat']

# Dividir en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Crear modelos base
catboost_model = CatBoostClassifier(verbose=0, random_state=42)
xgboost_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
knn_model = KNeighborsClassifier(n_neighbors=5)

# Crear un clasificador de votación
voting_ensemble = VotingClassifier(estimators=[
    ('catboost', catboost_model),
    ('xgboost', xgboost_model),
    ('knn', knn_model)
], voting='soft')  # 'soft' usa las probabilidades de cada modelo

# Crear un pipeline
pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),         # Escalado de características
    ('smote', SMOTE(random_state=42)),    # Sobremuestreo con SMOTE
    ('classifier', voting_ensemble)      # Ensamble
])

# Entrenar el modelo
pipeline.fit(X_train, y_train)

# Predicciones
y_pred = pipeline.predict(X_test)

# Calcular la exactitud
accuracy = accuracy_score(y_test, y_pred)
print(f"Exactitud del ensamble: {accuracy:.2f}")

# Reporte de clasificación
print(classification_report(y_test, y_pred))


Parameters: { "use_label_encoder" } are not used.



Exactitud del ensamble: 0.34
              precision    recall  f1-score   support

           1       0.38      0.32      0.35        53
           2       0.29      0.25      0.27        44
           3       0.37      0.45      0.41        56
           4       0.32      0.34      0.33        47

    accuracy                           0.34       200
   macro avg       0.34      0.34      0.34       200
weighted avg       0.34      0.34      0.34       200

