In [404]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
    RidgeCV,
    ElasticNetCV,
    LassoCV,
    SGDRegressor,
    LogisticRegression
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
     mean_squared_error, 
     r2_score, 
     mean_absolute_error,
     classification_report, 
     confusion_matrix,
     ConfusionMatrixDisplay,
     balanced_accuracy_score, 
     log_loss,
     roc_curve, 
     roc_auc_score, 
     auc,
     accuracy_score
)
import shap
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np
import tensorflow as tf
from sklearn.metrics import f1_score as sklearn_f1_score
import pandas as pd
import numpy as np
import tensorflow as tf
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
import joblib

---

## NN RainTomorrow

In [None]:
file_path = "df_train.csv"
df = pd.read_csv(file_path, sep=",", engine="python")

X = df.drop(['Date', 'RainTomorrow', 'RainfallTomorrow'], axis=1)
y = df[["RainTomorrow"]]
y["RainTomorrow"] = y["RainTomorrow"].map({"Yes": 1, "No": 0}).astype(float)

In [414]:
X.columns

Index(['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine',
       'WindGustDir', 'WindGustSpeed', 'WindDir9am', 'WindDir3pm',
       'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm',
       'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am',
       'Temp3pm', 'RainToday'],
      dtype='object')

In [463]:
class CustomScaler(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.scaler = StandardScaler()
        self.columnas_numericas = None
        self.columnas_categoricas = ['WindGustDir', 'WindDir9am', 'WindDir3pm', 'RainToday']

    def fit(self, X, y=None):
        if 'Date' in X.columns:
            self.columnas_categoricas.append('Date')
        self.columnas_numericas = X.columns.difference(self.columnas_categoricas)
        X_numeric = X[self.columnas_numericas]
        self.scaler.fit(X_numeric)
        return self

    def transform(self, X, y=None):
        X_numeric = X[self.columnas_numericas]
        X_escalado_numeric = self.scaler.transform(X_numeric)
        X_esc = pd.DataFrame(X_escalado_numeric, columns=self.columnas_numericas, index=X.index)
        X_scaled = X[self.columnas_categoricas].join(X_esc)
        return X_scaled

class DummiesTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.columns_ = None

    def fit(self, X, y=None):
        X = X.copy()

        # Agrupar direcciones y convertir a dummies
        X["RainToday"] = X["RainToday"].map({"Yes": 1, "No": 0}).astype(float)
        X["WindGustDir_Agrupado"] = X["WindGustDir"].apply(self.agrupar_direcciones)
        X["WindDir9am_Agrupado"] = X["WindDir9am"].apply(self.agrupar_direcciones)
        X["WindDir3pm_Agrupado"] = X["WindDir3pm"].apply(self.agrupar_direcciones)

        # Convertir a dummies
        X = self.convertir_a_dummies(X, "WindGustDir_Agrupado", "WindGustDir")
        X = self.convertir_a_dummies(X, "WindDir9am_Agrupado", "WindDir9am")
        X = self.convertir_a_dummies(X, "WindDir3pm_Agrupado", "WindDir3pm")

        # Eliminar las columnas originales
        X = X.drop(["WindGustDir", "WindDir9am", "WindDir3pm"], axis=1)

        # Guardar las columnas generadas durante el fit
        self.columns_ = X.columns
        
        return self

    def transform(self, X, y=None):
        X = X.copy()

        # Aplicar las mismas transformaciones que en fit
        X["RainToday"] = X["RainToday"].map({"Yes": 1, "No": 0}).astype(float)
        X["WindGustDir_Agrupado"] = X["WindGustDir"].apply(self.agrupar_direcciones)
        X["WindDir9am_Agrupado"] = X["WindDir9am"].apply(self.agrupar_direcciones)
        X["WindDir3pm_Agrupado"] = X["WindDir3pm"].apply(self.agrupar_direcciones)

        # Convertir a dummies
        X = self.convertir_a_dummies(X, "WindGustDir_Agrupado", "WindGustDir")
        X = self.convertir_a_dummies(X, "WindDir9am_Agrupado", "WindDir9am")
        X = self.convertir_a_dummies(X, "WindDir3pm_Agrupado", "WindDir3pm")

        # Eliminar las columnas originales
        X = X.drop(["WindGustDir", "WindDir9am", "WindDir3pm"], axis=1)
        
        # Asegurarse de que todas las columnas de fit están presentes en la transformación
        for col in self.columns_:
            if col not in X.columns:
                X[col] = 0
        
        # Reordenar las columnas como en el fit
        X = X[self.columns_]
        
        return X

    def agrupar_direcciones(self, direccion):
        grupos_principales = {
            "N": ["N", "NNW", "NNE"],
            "S": ["S", "SSW", "SSE"],
            "E": ["E", "ENE", "ESE", "SE", "NE"],
            "W": ["W", "WNW", "WSW", "SW", "NW"],
        }

        for grupo, direcciones in grupos_principales.items():
            if direccion in direcciones:
                return grupo

        return "Otro"

    def convertir_a_dummies(self, X, columna_agrupada, prefijo):
        dummies = pd.get_dummies(X[columna_agrupada], dtype=int, drop_first=True)
        dummies = dummies.rename(columns={
            "N": f"{prefijo}_N", 
            "S": f"{prefijo}_S", 
            "W": f"{prefijo}_W"
        })
        X = X.drop(columna_agrupada, axis=1)
        X = pd.concat([X, dummies], axis=1)
        return X

from sklearn.utils.class_weight import compute_class_weight

from sklearn.utils.class_weight import compute_class_weight

class NeuralNetworkTensorFlowRl(BaseEstimator, ClassifierMixin):
    def __init__(self, batch_size=32, epochs=10, learning_rate=0.001, dropout_rate=0.3, n_units_layer_0=128, n_units_layer_1=64, class_weight_factor=1.7):
        self.batch_size = batch_size
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.dropout_rate = dropout_rate
        self.n_units_layer_0 = n_units_layer_0
        self.n_units_layer_1 = n_units_layer_1
        self.class_weight_factor = class_weight_factor
        self.model = None

    def build_model(self, input_shape):
        model = tf.keras.Sequential()
        model.add(tf.keras.Input(shape=(input_shape,)))
        model.add(tf.keras.layers.Dense(self.n_units_layer_0, activation='relu'))
        model.add(tf.keras.layers.Dropout(self.dropout_rate))
        model.add(tf.keras.layers.Dense(self.n_units_layer_1, activation='relu'))
        model.add(tf.keras.layers.Dropout(self.dropout_rate))
        model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), 
                      loss='binary_crossentropy', 
                      metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
        return model

    def fit(self, X, y, X_val=None, y_val=None):
        X = np.array(X)
        y = np.array(y).ravel()  # Asegurarse de que y es un array 1D

        self.model = self.build_model(X.shape[1])

        # Computar pesos de clase leves
        classes = np.unique(y)
        class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y)
        class_weights = {i: w * self.class_weight_factor for i, w in enumerate(class_weights)}

        # Ajustando el modelo con class_weight
        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=0, class_weight=class_weights)
        return self

    def predict(self, X):
        X = np.array(X)
        predictions = self.model.predict(X)
        return (predictions > 0.5).astype(int)

    def predict_proba(self, X):
        X = np.array(X)
        return self.model.predict(X)

In [464]:
from sklearn.pipeline import Pipeline

# Crear el Pipeline completo con escalado, transformación de dummies y el modelo
pipeline = Pipeline([
    ('scaler', CustomScaler()),
    ('dummies', DummiesTransformer()),
    ('nn_model_lr', NeuralNetworkTensorFlowRl(batch_size=32, epochs=100, learning_rate=0.001, dropout_rate=0.3, n_units_layer_0=64, n_units_layer_1=32))
])

# Entrenar el Pipeline con X e y
pipeline.fit(X, y)


In [468]:
joblib.dump(pipeline, './pipelines/pipelineRL.joblib')

['./pipelines/pipelineRL.joblib']

In [None]:
lista = []
for i in range(500):
    # Extraer la cuarta fila (índice 3) en formato DataFrame
    fila = X.iloc[[i]]  # La fila con index 3, como DataFrame

    # Transformar la cuarta fila usando el pipeline (excepto la última etapa)
    fila_transformed = pipeline.named_steps['scaler'].transform(fila)
    fila_transformed = pipeline.named_steps['dummies'].transform(fila_transformed)

    # Asegurarse de que la forma coincide
    # print(f"Shape de la cuarta fila transformada: {fila_transformed.shape}")  # Debería ser (1, 26) si coincide con el entrenamiento

    # Probar el Pipeline con la cuarta fila transformada
    prediccion_fila = pipeline.named_steps['nn_model_lr'].predict(fila_transformed)
    if prediccion_fila > 0:
        lista.append(i)
    print(i)
    print(prediccion_fila)

    # Mostrar el resultado
    print(f"Predicción para la cuarta fila: {prediccion_fila[0]}")

In [466]:
print(lista)

[20, 21, 22, 43, 44, 45, 46, 47, 68, 70, 71, 99, 100, 101, 102, 113, 114, 138, 139, 145, 146, 150, 151, 152, 156, 157, 158, 159, 165, 175, 176, 177, 178, 180, 182, 184, 192, 193, 194, 195, 196, 202, 206, 207, 232, 233, 235, 245, 262, 263, 264, 283, 296, 297, 298, 324, 325, 326, 328, 350, 356, 357, 358, 359, 363, 364, 368, 395, 396, 397, 398, 399, 400, 401, 402, 406, 407, 408, 423, 424, 426, 427, 429, 451, 452, 459, 460, 462, 477]


In [467]:
print(len(lista))

89
