<center> <h1> Modelos Predictivos </h1> </center>

### Carga de datos preprocesados y partición en entrenamiento y validación

In [None]:
# Manipulacion de datos...
import pandas as pd
import numpy as np
# Visualizacion de datos...
import matplotlib.pyplot as plt
import seaborn as sns
# Modelo...
from sklearn.linear_model import LogisticRegression
# Metricas de evaluacion...
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import ConfusionMatrixDisplay, classification_report
import scikitplot as skplt
# No presentar advertencias...
import warnings
warnings.filterwarnings("ignore")

In [None]:
df_preprocessing = pd.read_csv("../Data/Preprocessing/preprocessing_data.csv")
df_preprocessing.head()

* Particionamiento del dataset en entrenamiento y validación

Separar variables dependientes e independientes:

In [None]:
X = df_preprocessing.drop("is_canceled",axis=1)
y = df_preprocessing["is_canceled"].values

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,
                                                random_state=42)

**Validando Shapes**

In [None]:
X_train.shape

In [None]:
X_test.shape

## Regresión Logística

In [None]:
model_lr = LogisticRegression(random_state=42)
# Entrenamiento del modelo...
model_lr.fit(X_train,y_train)
# Predicciones...
y_pred_lr = model_lr.predict(X_test)
print(accuracy_score(y_test,y_pred_lr))

#### Evaluando modelo regresión Logística

In [None]:
weights = pd.Series(model_lr.coef_[0],
                   index=X_train.columns.values)

print(weights.sort_values(ascending=False)[:10].plot(kind="bar"))

El gráfico de pesos anterior, evidencia que para la predicción positiva (en este caso las variables que conllevan al cancelar la reserva)

In [None]:
print(weights.sort_values(ascending=False)[-10:].plot(kind="bar"))

De forma similar, aqui observamos de forma descendiente los valores negativos que son las variables que ayudan a predecir cuando el estado es cero, osea, cuando **no hay cancelación**.

In [None]:
fig = plt.figure(figsize=(11,11))
cm = confusion_matrix(y_test, y_pred_lr, labels=model_lr.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model_lr.classes_)
disp.plot(cmap="viridis")
plt.title("Matriz de confusión Regresión Logística", fontsize=16)
plt.show()

In [None]:
1731+683

In [None]:
# Metricas de evaluacion...
print(classification_report(y_test,y_pred_lr))

**Curva ROC**

In [None]:
y_prob_lr = model_lr.predict_proba(X_test)

In [None]:
skplt.metrics.plot_roc_curve(y_test,y_prob_lr)
plt.show()

### Redes Neuronales

In [None]:
from keras import models, layers, optimizers, regularizers
from keras.callbacks import Callback

class TrainingCallback(Callback):
  def on_epoch_end(self, epoch, logs = {}):
    if logs.get("accuracy") > 0.85:
      print("Lo logramos, nuestro modelo llego a 85%, detenemos el entrenamiento")
      self.model.stop_training = True

In [None]:
# Modelo Redes neuronales con regulizers...
NN = models.Sequential()
# Capa de entrada...
NN.add(layers.Dense(64, activation = "relu", input_dim = X_train.shape[1],kernel_initializer="he_uniform"))

# DropOut... inactivando el 50% de las neuronas
NN.add(layers.Dropout(0.5))

# Anadiendo 2 capas mas (ocultas)...
NN.add(layers.Dense(32, activation = "relu",kernel_initializer="he_uniform")) # input ya se sabe q entra
NN.add(layers.Dense(16, activation = "relu",kernel_initializer="he_uniform")) # input ya se sabe q entra
NN.add(layers.Dense(8, activation = "relu",kernel_initializer="he_uniform")) # input ya se sabe q entra
# DropOut... inactivando el 40% de las neuronas
NN.add(layers.Dropout(0.4))
# Capa de salida con func activacion sigmoid por clasificacion binaria...
NN.add(layers.Dense(1, activation = "sigmoid"))

# optimizers...
opt = optimizers.SGD(lr=0.01, momentum=0.9)
# Compilar modelo...
# rmsprop version mejorada descenso gradiente:
NN.compile(optimizer="rmsprop", loss="binary_crossentropy",
             metrics="accuracy")

# Callback:
callback = TrainingCallback()
# Entrenando...
history = NN.fit(X_train,y_train, epochs=250, callbacks= [callback],
                   batch_size = 20, validation_data=(X_test,y_test))

In [None]:
NN.summary()

In [None]:
history_dict = history.history
loss_values = history_dict["loss"]
val_loss_values = history_dict["val_loss"]

# epocas igual a rango entre 1 y longitud loss values + 1 para rango en eje x
epochs = range(1,len(loss_values)+1)

# Grafica...
plt.plot(epochs,loss_values,"o",label="training")
plt.plot(epochs,val_loss_values,"--",label="validation")
plt.legend()
plt.show()

In [None]:
history_dict = history.history
accuracy_values = history_dict["accuracy"]
val_accuracy_values = history_dict["val_accuracy"]

# epocas igual a rango entre 1 y longitud loss values + 1 para rango en eje x
epochs = range(1,len(accuracy_values)+1)

# Grafica...
plt.plot(epochs,accuracy_values,"o",label="training")
plt.plot(epochs,val_accuracy_values,"--",label="validation")
plt.legend()
plt.show()

In [None]:
#PREDICCIONES
Y_ann = NN.predict(X_test)
Y_ann = Y_ann > 0.5
Y_ann.shape

In [None]:
plt.figure(figsize=(5,2))
plt.title("Matriz de confusión Redes Neuronales", fontsize=16)
sns.heatmap(confusion_matrix(y_test,Y_ann),annot=True,
           cmap="cividis",fmt=".0f")
plt.show();

In [None]:
precision_ann = accuracy_score(y_test,Y_ann)
print("Accuracy del modelo Redes Neuronales:")
print(precision_ann)

In [None]:
# Reporte de clasificación del modelo...
target_names = ['Non-Canceled', 'Canceled']
print(classification_report(y_test, Y_ann, target_names=target_names))

In [None]:
# Probabilidades de cancelacion predichas Redes Neuronales...
Y_ann_prob = NN.predict(X_test, verbose=1)
Y_ann_prob

### Gradient Boosting Trees

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
# Optimizacion parametrica...
from sklearn.model_selection import RandomizedSearchCV

In [None]:
# Definir regressor...
# Se define sin ningun parametro para configurarlos

clf_gbt_opt = GradientBoostingClassifier(random_state=42)

# Definicon de grilla de parametros...
parametros = {
    "loss": ["los_loss","exponential"],
    "learning_rate": [0.001, 0.01, 0.1, 0.2],
    "n_estimators": [100, 200, 300, 400],
    "subsample": [0.0,0.5,1.0],
    "criterion": ["friedman_mse"],
    "max_depth": [3, 5, 8, 10]
}

rand_est_gbt = RandomizedSearchCV(clf_gbt_opt, parametros, n_iter=100,
    cv=6, scoring="accuracy").fit(X_train,y_train)

print(rand_est_gbt.best_estimator_)

print("="*40)
print(rand_est_gbt.best_params_)

In [None]:
clf_gbt_opt = GradientBoostingClassifier(loss='exponential', max_depth=5, n_estimators=400,
                           random_state=42,subsample=1.0,learning_rate=0.2).fit(X_train, np.ravel(y_train))

# Predicciones de la probabilidad de default con el modelo
gbt_preds_opt = clf_gbt_opt.predict_proba(X_test)

# Dataframes con probabilidades de dafault predicha y datos reales
preds_df_opt = pd.DataFrame(gbt_preds_opt[:,1], columns = ['prob_canceled'])
true_df = pd.DataFrame(y_test, columns=["real_date_canceled"])

# Comparación de los dos marcos de datos
print(pd.concat([true_df.reset_index(drop = True), preds_df_opt], axis = 1))

In [None]:
# Predicciones...
gbt_preds_opt = clf_gbt_opt.predict(X_test)

# Cadena de valores de los resulta
print(gbt_preds_opt)

# Reporte de clasificación del modelo
target_names = ['Non-Canceled', 'Canceled']
print(classification_report(y_test, gbt_preds_opt, target_names=target_names))

In [None]:
gbt_proba_opt = clf_gbt_opt.predict_proba(X_test)
gbt_proba_opt

In [None]:
skplt.metrics.plot_roc_curve(y_test,gbt_proba_opt)
plt.title("Curva ROC Gradient Booosting Classifier Opt")
plt.show()

In [None]:
# Accuracy del modelo...
print(accuracy_score(y_test,gbt_preds_opt))

In [None]:
fig = plt.figure(figsize=(11,3))
cm = confusion_matrix(y_test, gbt_preds_opt, labels=clf_gbt_opt.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf_gbt_opt.classes_)
disp.plot(cmap="viridis")
plt.title("Matriz De Confusión Gradient Boosting Classifier Opt")
plt.show()