In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from scipy import stats
from sklearn.linear_model import LassoCV
from sklearn.ensemble import BaggingClassifier, VotingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")

In [23]:
df = pd.read_csv(r"../data/df_dummies.csv")
features = pd.read_csv(r"../data/features.csv")
target = pd.read_csv(r"../data/target.csv")

results = {}

## Bagging and Pasting

In [24]:


# Dividir el conjunto de datos
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Bagging con DecisionTreeClassifier
bagging_model = BaggingClassifier(estimator=DecisionTreeClassifier(), 
                                   n_estimators=100, 
                                   random_state=42)

# Entrenar el modelo
bagging_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_bagging = bagging_model.predict(X_test)

# Evaluar el modelo
bagging_accuracy = accuracy_score(y_test, y_pred_bagging)
print(f'Accuracy del modelo de Bagging: {bagging_accuracy:.2f}')

# Pasting con DecisionTreeClassifier
pasting_model = BaggingClassifier(estimator=DecisionTreeClassifier(), 
                                   n_estimators=100, 
                                   bootstrap=False,  # Sin reemplazo
                                   random_state=42)

# Entrenar el modelo
pasting_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_pasting = pasting_model.predict(X_test)

# Evaluar el modelo
pasting_accuracy = accuracy_score(y_test, y_pred_pasting)
results['Bagging and Pasting'] = pasting_accuracy
print(f'Accuracy del modelo de Pasting: {pasting_accuracy:.2f}')

Accuracy del modelo de Bagging: 0.77
Accuracy del modelo de Pasting: 0.76


## Random Forest

In [25]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Crear el modelo de Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Entrenar el modelo
rf_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_rf = rf_model.predict(X_test)

# Evaluar el modelo
rf_accuracy = accuracy_score(y_test, y_pred_rf)
results['Random Forest'] = rf_accuracy
print(f'Accuracy del modelo de Random Forest: {rf_accuracy:.2f}')

Accuracy del modelo de Random Forest: 0.77


## Gradient Boosting

In [26]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Crear el modelo de Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)

# Entrenar el modelo
gb_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_gb = gb_model.predict(X_test)

# Evaluar el modelo
gb_accuracy = accuracy_score(y_test, y_pred_gb)
results['Gradient Boosting'] = gb_accuracy
print(f'Accuracy del modelo de Gradient Boosting: {gb_accuracy:.2f}')

Accuracy del modelo de Gradient Boosting: 0.82


## Adaptive Boosting

In [27]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Crear el modelo de AdaBoost
# Usaremos un árbol de decisión como clasificador base
ada_model = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1), 
                                n_estimators=100, 
                                random_state=42)

# Entrenar el modelo
ada_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_ada = ada_model.predict(X_test)

# Evaluar el modelo
ada_accuracy = accuracy_score(y_test, y_pred_ada)
results["Adaptative Boosting"] = ada_accuracy
print(f'Accuracy del modelo de AdaBoost: {ada_accuracy:.2f}')

Accuracy del modelo de AdaBoost: 0.82


## Results

In [28]:
results_df = pd.DataFrame(results.items(), columns=['Modelo', 'Precisión'])
results_df.sort_values(by='Precisión', ascending=False, inplace=True)

# Mostrar los resultados
print("Resultados de los Modelos:")
print(results_df)

# Determinar y mostrar el mejor modelo
best_model = results_df.iloc[0]
print(f"\nEl mejor modelo es: {best_model['Modelo']} con una precisión de: {best_model['Precisión']:.2f}")

Resultados de los Modelos:
                Modelo  Precisión
2    Gradient Boosting   0.819940
3  Adaptative Boosting   0.815476
1        Random Forest   0.774554
0  Bagging and Pasting   0.758185

El mejor modelo es: Gradient Boosting con una precisión de: 0.82
