In [6]:
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score, f1_score, roc_auc_score
import pandas as pd

df = pd.read_csv('airline_passenger_satisfaction_cleaned.csv')

X = df.drop('satisfaction', axis=1)

# Definir y (la columna objetivo)
y = df['satisfaction']

# Convertir las características categóricas a numéricas usando One-Hot Encoding
X = pd.get_dummies(X, drop_first=True)
# Dividir los datos en conjunto de entrenamiento (80%) y prueba (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Inicializar el modelo LGBMClassifier
model = LGBMClassifier(n_estimators=500, learning_rate=0.1, max_depth=6, random_state=42)

# Entrenar el modelo
model.fit(X_train, y_train)

# Predecir en el conjunto de prueba
y_pred = model.predict(X_test)

# Evaluar el modelo
accuracy = accuracy_score(y_test, y_pred)
print(f"Exactitud del modelo LightGBM en el conjunto de prueba: {accuracy:.2f}")

lgb_train_preds = model.predict(X_train)
lgb_test_preds = model.predict(X_test)
lgb_train_proba = model.predict_proba(X_train)[:, 1]
lgb_test_proba = model.predict_proba(X_test)[:, 1]

lgb_train_recall = recall_score(y_train, lgb_train_preds)
lgb_test_recall = recall_score(y_test, lgb_test_preds)
lgb_train_f1 = f1_score(y_train, lgb_train_preds)
lgb_test_f1 = f1_score(y_test, lgb_test_preds)
lgb_train_auc = roc_auc_score(y_train, lgb_train_proba)
lgb_test_auc = roc_auc_score(y_test, lgb_test_proba)

print(f"LightGBM - Recall en Entrenamiento: {lgb_train_recall:.2f}")
print(f"LightGBM - Recall en Prueba: {lgb_test_recall:.2f}")
print(f"LightGBM - F1 Score en Entrenamiento: {lgb_train_f1:.2f}")
print(f"LightGBM - F1 Score en Prueba: {lgb_test_f1:.2f}")
print(f"LightGBM - AUC en Entrenamiento: {lgb_train_auc:.2f}")
print(f"LightGBM - AUC en Prueba: {lgb_test_auc:.2f}")





[LightGBM] [Info] Number of positive: 35833, number of negative: 47042
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003566 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 934
[LightGBM] [Info] Number of data points in the train set: 82875, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.432374 -> initscore=-0.272172
[LightGBM] [Info] Start training from score -0.272172
Exactitud del modelo LightGBM en el conjunto de prueba: 0.97
LightGBM - Recall en Entrenamiento: 0.97
LightGBM - Recall en Prueba: 0.94
LightGBM - F1 Score en Entrenamiento: 0.98
LightGBM - F1 Score en Prueba: 0.96
LightGBM - AUC en Entrenamiento: 1.00
LightGBM - AUC en Prueba: 1.00
