In [None]:
# Librerias
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
    RidgeCV,
    ElasticNetCV,
    LassoCV,
    SGDRegressor,
    LogisticRegression
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
     mean_squared_error, 
     r2_score, 
     mean_absolute_error,
     classification_report, 
     confusion_matrix,
     ConfusionMatrixDisplay,
     balanced_accuracy_score, 
     log_loss,
     roc_curve, 
     roc_auc_score, 
     auc,
     accuracy_score
)
import shap

In [19]:
file_path = "weatherAUS.csv"
df = pd.read_csv(file_path, sep=",", engine="python")

In [20]:
df["Date"] = pd.to_datetime(df["Date"])

fecha_limite = "2016-01-01"

df_train = df[df["Date"] < fecha_limite]

df_test = df[df["Date"] >= fecha_limite]

### Split Data class

In [15]:
class Split_data:
    def convert_date(self, df):
        df["Date"] = pd.to_datetime(df["Date"])
    
    def split_data(self, df):
        self.fecha_limite = "2016-01-01"
        self.df_train = df[df["Date"] < self.fecha_limite]
        self.df_test = df[df["Date"] >= self.fecha_limite]
        return self.df_train, self.df_test

In [36]:
class DataSplitter:
    def __init__(self, df, random_state=42):
        self.df = df
        self.random_state = random_state
        self.df_train = None
        self.df_test = None
        # self.X_train = None
        # self.X_test = None
        # self.y_train = None
        # self.y_test = None
        self.fecha_limite = "2016-01-01"
        
    def split_data(self):
        self.df["Date"] = pd.to_datetime(df["Date"])
        self.df_train = df[df["Date"] < self.fecha_limite]
        self.df_test = df[df["Date"] >= self.fecha_limite]
        # self.X_train, self.X_test, self.y_train, self.y_test = \
        #     train_test_split(X, y, test_size=self.test_size, random_state=self.random_state)
        
        # return self.X_train, self.X_test, self.y_train, self.y_test
        return self.df_train, self.df_test

In [37]:
split_data = Split_data()
# split_data.split_data(df)
df_train, df_test = split_data.split_data(df)

### Transform Data class

In [32]:
class TransformData:
    def __init__(self, df_train, df_test):
        self.df_train = df_train
        self.df_test = df_test
        self.ciudades = [
    " Adelaide",
    "Canberra",
    "Cobar",
    "Dartmoor",
    "Melbourne",
    "MelbourneAirport",
    "MountGambier",
    "Sydney",
    "SydneyAirport",
]
        self.scaler = StandardScaler()

    def delete_unnamed(self):
        self.df_train = self.df_train.drop("Unnamed: 0", axis=1)
        self.df_test = self.df_test.drop("Unnamed: 0", axis=1)
        return self.df_train, self.df_test
    
    def delete_ciudades(self):
        self.df_train = self.df_train[self.df_train["Location"].isin(self.ciudades)]
        self.df_train = self.df_train.drop("Location", axis=1)
        self.df_test = self.df_test[self.df_test["Location"].isin(self.ciudades)]
        self.df_test = self.df_test.drop("Location", axis=1)
        return self.df_train, self.df_test
    
    def determinar_bimestre(self, fecha):
        mes = fecha.month
        if 1 <= mes <= 2:
            return "Bimestre 1"
        elif 3 <= mes <= 4:
            return "Bimestre 2"
        elif 5 <= mes <= 6:
            return "Bimestre 3"
        elif 7 <= mes <= 8:
            return "Bimestre 4"
        elif 9 <= mes <= 10:
            return "Bimestre 5"
        else:
            return "Bimestre 6"
    
    def bimestre(self):
        self.df_train["Bimestre"] = self.df_train["Date"].apply(lambda x: self.determinar_bimestre(x))
        self.df_test["Bimestre"] = self.df_test["Date"].apply(lambda x: self.determinar_bimestre(x))
        return self.df_train, self.df_test
    
    def Rainfall(self):
        mediana_por_dia_train = self.df_train.groupby(self.df_train["Date"].dt.date)["Rainfall"].median()
        self.df_train["Rainfall"] = self.df_train.apply(
            lambda row: (
                mediana_por_dia_train[row["Date"].date()]
                if pd.isnull(row["Rainfall"])
                else row["Rainfall"]
            ),
            axis=1,
        )
        mediana_por_dia_test = df_test.groupby(df["Date"].dt.date)["Rainfall"].median()
        df_test["Rainfall"] = df_test.apply(
            lambda row: (
                mediana_por_dia_test[row["Date"].date()]
                if pd.isnull(row["Rainfall"])
                else row["Rainfall"]
            ),
            axis=1,
        )
        return self.df_train, self.df_test
    
    def Evaporation(self):
        bim = self.df_train.groupby("Bimestre")
        medians = bim["Evaporation"].median()
        for bimestre, median in medians.items():
            self.df_train.loc[
                (self.df_train["Bimestre"] == bimestre) & (self.df_train["Evaporation"].isnull()),
                "Evaporation",
            ] = median
        for bimestre, median in medians.items():
            self.df_test.loc[
                (self.df_test["Bimestre"] == bimestre) & (self.df_test["Evaporation"].isnull()),
                "Evaporation",
            ] = median
        return self.df_train, self.df_test
    
    def Sunshine(self):
        self.df_train["Sunshine"] = self.df_train.groupby(self.df_train["Date"].dt.day)["Sunshine"].transform(
            lambda x: x.fillna(x.mean())
        )
        self.df_test["Sunshine"] = self.df_test.groupby(self.df_test["Date"].dt.day)["Sunshine"].transform(
            lambda x: x.fillna(x.mean())
        )
        return self.df_train, self.df_test
    
    def WindDir(self):
        self.df_train["WindGustDir"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "WindGustDir"
        ].transform(lambda x: x.fillna(x.mode().iloc[0]))
        self.df_train["WindDir9am"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "WindDir9am"
        ].transform(lambda x: x.fillna(x.mode().iloc[0]))
        self.df_train["WindDir3pm"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "WindDir3pm"
        ].transform(lambda x: x.fillna(x.mode().iloc[0]))

        self.df_test["WindGustDir"] = self.df_test.groupby(self.df_test["Date"].dt.day)[
            "WindGustDir"
        ].transform(lambda x: x.fillna(x.mode().iloc[0]))
        self.df_test["WindDir9am"] = self.df_test.groupby(self.df_test["Date"].dt.day)["WindDir9am"].transform(
            lambda x: x.fillna(x.mode().iloc[0])
        )
        self.df_test["WindDir3pm"] = self.df_test.groupby(self.df_test["Date"].dt.day)["WindDir3pm"].transform(
            lambda x: x.fillna(x.mode().iloc[0])
        )
        return self.df_train, self.df_test
    
    def WindSpeed(self):
        self.df_train["WindGustSpeed"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "WindGustSpeed"
        ].transform(lambda x: x.fillna(x.median()))
        self.df_train["WindSpeed9am"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "WindSpeed9am"
        ].transform(lambda x: x.fillna(x.median()))
        self.df_train["WindSpeed3pm"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "WindGustSpeed"
        ].transform(lambda x: x.fillna(x.median()))

        self.df_test["WindGustSpeed"] = self.df_test.groupby(self.df_test["Date"].dt.day)[
            "WindGustSpeed"
        ].transform(lambda x: x.fillna(x.median()))
        self.df_test["WindSpeed9am"] = self.df_test.groupby(self.df_test["Date"].dt.day)[
            "WindSpeed9am"
        ].transform(lambda x: x.fillna(x.median()))
        self.df_test["WindSpeed3pm"] = self.df_test.groupby(self.df_test["Date"].dt.day)[
            "WindGustSpeed"
        ].transform(lambda x: x.fillna(x.median()))

        self.df_train["WindSpeed_Difference"] = self.df_train["WindSpeed9am"] - self.df_train["WindSpeed3pm"]
        self.df_train.drop(["WindSpeed9am", "WindSpeed3pm"], axis=1, inplace=True)
        self.df_test["WindSpeed_Difference"] = self.df_test["WindSpeed9am"] - self.df_test["WindSpeed3pm"]
        self.df_test.drop(["WindSpeed9am", "WindSpeed3pm"], axis=1, inplace=True)
        return self.df_train, self.df_test
    
    def Humidity(self):
        self.df_train["Humidity9am"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "Humidity9am"
        ].transform(lambda x: x.fillna(x.median()))
        self.df_train["Humidity3pm"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "Humidity3pm"
        ].transform(lambda x: x.fillna(x.median()))

        self.df_test["Humidity9am"] = self.df_test.groupby(self.df_test["Date"].dt.day)[
            "Humidity9am"
        ].transform(lambda x: x.fillna(x.median()))
        self.df_test["Humidity3pm"] = self.df_test.groupby(self.df_test["Date"].dt.day)[
            "Humidity3pm"
        ].transform(lambda x: x.fillna(x.median()))

        self.df_train["Humidity_Difference"] = self.df_train["Humidity9am"] - self.df_train["Humidity3pm"]
        self.df_train.drop(["Humidity9am", "Humidity3pm"], axis=1, inplace=True)

        self.df_test["Humidity_Difference"] = self.df_test["Humidity9am"] - self.df_test["Humidity3pm"]
        self.df_test.drop(["Humidity9am", "Humidity3pm"], axis=1, inplace=True)
        return self.df_train, self.df_test
    
    def Cloud(self):
        self.df_train["Cloud9am"] = self.df_train.groupby(self.df_train["Date"].dt.day)["Cloud9am"].transform(
            lambda x: x.fillna(x.median())
        )
        self.df_train["Cloud3pm"] = self.df_train.groupby(self.df_train["Date"].dt.day)["Cloud3pm"].transform(
            lambda x: x.fillna(x.median())
        )

        self.df_test["Cloud9am"] = self.df_test.groupby(self.df_test["Date"].dt.day)["Cloud9am"].transform(
            lambda x: x.fillna(x.median())
        )
        self.df_test["Cloud3pm"] = self.df_test.groupby(self.df_test["Date"].dt.day)["Cloud3pm"].transform(
            lambda x: x.fillna(x.median())
        )

        self.df_train["Cloud_Difference"] = self.df_train["Cloud9am"] - self.df_train["Cloud3pm"]
        self.df_train.drop(["Cloud9am", "Cloud3pm"], axis=1, inplace=True)

        self.df_test["Cloud_Difference"] = self.df_test["Cloud9am"] - self.df_test["Cloud3pm"]
        self.df_test.drop(["Cloud9am", "Cloud3pm"], axis=1, inplace=True)
        return self.df_train, self.df_test
    
    def Pressure(self):
        self.df_train["Pressure9am"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "Pressure9am"
        ].transform(lambda x: x.fillna(x.mean()))
        self.df_train["Pressure3pm"] = self.df_train.groupby(self.df_train["Date"].dt.day)[
            "Pressure3pm"
        ].transform(lambda x: x.fillna(x.mean()))

        self.df_test["Pressure9am"] = self.df_test.groupby(self.df_test["Date"].dt.day)[
            "Pressure9am"
        ].transform(lambda x: x.fillna(x.mean()))
        self.df_test["Pressure3pm"] = self.df_test.groupby(self.df_test["Date"].dt.day)[
            "Pressure3pm"
        ].transform(lambda x: x.fillna(x.mean()))

        self.df_train["Pressure_Difference"] = self.df_train["Pressure9am"] - self.df_train["Pressure3pm"]
        self.df_train.drop(["Pressure9am", "Pressure3pm"], axis=1, inplace=True)

        self.df_test["Pressure_Difference"] = self.df_test["Pressure9am"] - self.df_test["Pressure3pm"]
        self.df_test.drop(["Pressure9am", "Pressure3pm"], axis=1, inplace=True)
        return self.df_train, self.df_test
    
    def Temp(self):
        median_min_temp_by_bimestre_train = self.df_train.groupby("Bimestre")["MinTemp"].median()

        for bimestre, median_temp in median_min_temp_by_bimestre_train.items():
            self.df_train.loc[self.df_train["Bimestre"] == bimestre, "MinTemp"] = self.df_train.loc[
                self.df_train["Bimestre"] == bimestre, "MinTemp"
            ].fillna(median_temp)
        for bimestre, median_temp in median_min_temp_by_bimestre_train.items():
            self.df_test.loc[self.df_test["Bimestre"] == bimestre, "MinTemp"] = self.df_test.loc[
                self.df_test["Bimestre"] == bimestre, "MinTemp"
            ].fillna(median_temp)
        median_max_temp_by_bimestre_train = self.df_train.groupby("Bimestre")["MaxTemp"].median()
        for bimestre, median_temp in median_max_temp_by_bimestre_train.items():
            self.df_train.loc[self.df_train["Bimestre"] == bimestre, "MaxTemp"] = self.df_train.loc[
                self.df_train["Bimestre"] == bimestre, "MaxTemp"
            ].fillna(median_temp)

        for bimestre, median_temp in median_max_temp_by_bimestre_train.items():
            self.df_test.loc[self.df_test["Bimestre"] == bimestre, "MaxTemp"] = self.df_test.loc[
                self.df_test["Bimestre"] == bimestre, "MaxTemp"
            ].fillna(median_temp)
        
        self.df_train["Dif_Temp_Max_Min"] = self.df_train["MaxTemp"] - self.df_train["MinTemp"]
        self.df_train.drop(["MaxTemp", "MinTemp"], axis=1, inplace=True)

        self.df_test["Dif_Temp_Max_Min"] = self.df_test["MaxTemp"] - self.df_test["MinTemp"]
        self.df_test.drop(["MaxTemp", "MinTemp"], axis=1, inplace=True)
        
        ## Temp9am y Temp3pm
        self.df_train["Temp9am"] = self.df_train.groupby(self.df_train["Date"].dt.day)["Temp9am"].transform(
            lambda x: x.fillna(x.median())
        )
        self.df_train["Temp3pm"] = self.df_train.groupby(self.df_train["Date"].dt.day)["Temp3pm"].transform(
            lambda x: x.fillna(x.median())
        )
        self.df_test["Temp9am"] = self.df_test.groupby(self.df_test["Date"].dt.day)["Temp9am"].transform(
            lambda x: x.fillna(x.median())
        )
        self.df_test["Temp3pm"] = self.df_test.groupby(self.df_test["Date"].dt.day)["Temp3pm"].transform(
            lambda x: x.fillna(x.median())
        )

        self.df_train["Temp_Difference"] = self.df_train["Temp3pm"] - self.df_train["Temp9am"]
        self.df_train.drop(["Temp3pm", "Temp9am"], axis=1, inplace=True)
        self.df_test["Temp_Difference"] = self.df_test["Temp3pm"] - self.df_test["Temp9am"]
        self.df_test.drop(["Temp3pm", "Temp9am"], axis=1, inplace=True)

        return self.df_train, self.df_test
    
    def RainToday(self):
        moda_RainToday_train = self.df_train.groupby("Date")["RainToday"].transform(
            lambda x: x.mode().iloc[0] if not x.mode().empty else None
        )
        self.df_train["RainToday"] = self.df_train["RainToday"].fillna(moda_RainToday_train)

        moda_RainToday_test = self.df_test.groupby("Date")["RainToday"].transform(
            lambda x: x.mode().iloc[0] if not x.mode().empty else None
        )
        self.df_test["RainToday"] = self.df_test["RainToday"].fillna(moda_RainToday_test)

        self.df_train["RainToday"] = self.df_train["RainToday"].map({"Yes": 1, "No": 0})
        self.df_test["RainToday"] = self.df_test["RainToday"].map({"Yes": 1, "No": 0})

        return self.df_train, self.df_test
    
    def RainTomorrow(self):
        moda_RainTomorrow_test = self.df_test.groupby("Date")["RainTomorrow"].transform(
            lambda x: x.mode().iloc[0] if not x.mode().empty else None
        )
        self.df_test["RainTomorrow"] = self.df_test["RainTomorrow"].fillna(moda_RainTomorrow_test)

        moda_RainfallTomorrow_test = self.df_test.groupby("Date")["RainfallTomorrow"].transform(
            lambda x: x.mode().iloc[0] if not x.mode().empty else None
        )
        self.df_test["RainfallTomorrow"] = self.df_test["RainfallTomorrow"].fillna(
            moda_RainfallTomorrow_test
        )

        self.df_train["RainTomorrow"] = self.df_train["RainTomorrow"].map({"Yes": 1, "No": 0})
        self.df_test["RainTomorrow"] = self.df_test["RainTomorrow"].map({"Yes": 1, "No": 0})

        return self.df_train, self.df_test
    
    def agrupar_direcciones(self, direccion):
        grupos_principales = {
            "N": ["N", "NNW", "NNE"],
            "S": ["S", "SSW", "SSE"],
            "E": ["E", "ENE", "ESE", "SE", "NE"],
            "W": ["W", "WNW", "WSW", "SW", "NW"],
        }

        for grupo, direcciones in grupos_principales.items():
            if direccion in direcciones:
                return grupo

        return "Otro"

    def Dummies(self):
        orien = [
            "SSW",
            "S",
            "SE",
            "NNE",
            "WNW",
            "N",
            "ENE",
            "NE",
            "E",
            "SW",
            "W",
            "WSW",
            "NNW",
            "ESE",
            "SSE",
            "NW",
        ]
        self.df_train["WindGustDir_Agrupado"] = self.df_train["WindGustDir"].apply(self.agrupar_direcciones)
        self.df_train["WindDir9am_Agrupado"] = self.df_train["WindDir9am"].apply(self.agrupar_direcciones)
        self.df_train["WindDir3pm_Agrupado"] = self.df_train["WindDir3pm"].apply(self.agrupar_direcciones)
        self.df_train = self.df_train.drop("WindGustDir", axis=1)
        self.df_train = self.df_train.drop("WindDir9am", axis=1)
        self.df_train = self.df_train.drop("WindDir3pm", axis=1)

        self.df_test["WindGustDir_Agrupado"] = self.df_test["WindGustDir"].apply(self.agrupar_direcciones)
        self.df_test["WindDir9am_Agrupado"] = self.df_test["WindDir9am"].apply(self.agrupar_direcciones)
        self.df_test["WindDir3pm_Agrupado"] = self.df_test["WindDir3pm"].apply(self.agrupar_direcciones)
        self.df_test = self.df_test.drop("WindGustDir", axis=1)
        self.df_test = self.df_test.drop("WindDir9am", axis=1)
        self.df_test = self.df_test.drop("WindDir3pm", axis=1)

        ## WindGustDir
        d_WindGustDir_train = pd.get_dummies(
        self.df_train["WindGustDir_Agrupado"], dtype=int, drop_first=True
        )

        d_WindGustDir_train = d_WindGustDir_train.rename(
            columns={"N": "WindGustDir_N", "S": "WindGustDir_S", "W": "WindGustDir_W"}
        )
        self.df_train = self.df_train.drop("WindGustDir_Agrupado", axis=1)
        self.df_train = pd.concat([self.df_train, d_WindGustDir_train], axis=1)

        d_WindGustDir_test = pd.get_dummies(
            self.df_test["WindGustDir_Agrupado"], dtype=int, drop_first=True
        )
        d_WindGustDir_test = d_WindGustDir_test.rename(
            columns={"N": "WindGustDir_N", "S": "WindGustDir_S", "W": "WindGustDir_W"}
        )
        self.df_test = self.df_test.drop("WindGustDir_Agrupado", axis=1)
        self.df_test = pd.concat([self.df_test, d_WindGustDir_test], axis=1)

        ## WindDir9am
        d_WindDir9am_train = pd.get_dummies(
        self.df_train["WindDir9am_Agrupado"], dtype=int, drop_first=True
        )
        d_WindDir9am_train = d_WindDir9am_train.rename(
            columns={"N": "WindDir9am_N", "S": "WindDir9am_S", "W": "WindDir9am_W"}
        )
        self.df_train = self.df_train.drop("WindDir9am_Agrupado", axis=1)
        self.df_train = pd.concat([self.df_train, d_WindDir9am_train], axis=1)

        d_WindDir9am_test = pd.get_dummies(
            self.df_test["WindDir9am_Agrupado"], dtype=int, drop_first=True
        )
        d_WindDir9am_test = d_WindDir9am_test.rename(
            columns={"N": "WindDir9am_N", "S": "WindDir9am_S", "W": "WindDir9am_W"}
        )
        self.df_test = self.df_test.drop("WindDir9am_Agrupado", axis=1)
        self.df_test = pd.concat([self.df_test, d_WindDir9am_test], axis=1)

        ## WindDir3pm
        d_WindDir3pm_train = pd.get_dummies(
        self.df_train["WindDir3pm_Agrupado"], dtype=int, drop_first=True
        )
        d_WindDir3pm_train = d_WindDir3pm_train.rename(
            columns={"N": "WindDir3pm_N", "S": "WindDir3pm_S", "W": "WindDir3pm_W"}
        )
        self.df_train = self.df_train.drop("WindDir3pm_Agrupado", axis=1)
        self.df_train = pd.concat([self.df_train, d_WindDir3pm_train], axis=1)
        
        d_WindDir3pm_test = pd.get_dummies(
            self.df_test["WindDir3pm_Agrupado"], dtype=int, drop_first=True
        )
        d_WindDir3pm_test = d_WindDir3pm_test.rename(
            columns={"N": "WindDir3pm_N", "S": "WindDir3pm_S", "W": "WindDir3pm_W"}
        )
        self.df_test = self.df_test.drop("WindDir3pm_Agrupado", axis=1)
        self.df_test = pd.concat([self.df_test, d_WindDir3pm_test], axis=1)

        return self.df_train, self.df_test
    
    def drop(self):
        self.df_train.drop("Bimestre", axis=1, inplace=True)
        self.df_train.drop("Date", axis=1, inplace=True)
        self.df_test.drop("Bimestre", axis=1, inplace=True)
        self.df_test.drop("Date", axis=1, inplace=True)
        return self.df_train, self.df_test
        
    

### Transform Data class

In [None]:
class DataScaler:
    def __init__(self, df_train, df_test):
        self.df_train = df_train
        self.df_test = df_test
        self.scaler = StandardScaler()
    
    def Scale(self):
        # df_train
        df_train_estandarizado = self.scaler.fit_transform(self.df_train)
        self.df_train = pd.DataFrame(df_train_estandarizado, columns=self.df_train.columns)
        # df_test
        df_test_estandarizado = self.scaler.fit_transform(self.df_test)
        self.df_test = pd.DataFrame(df_test_estandarizado, columns=self.df_test.columns)
        return self.df_train, self.df_test

### Regularize Data Class

In [None]:
class DataRegularizer:
    def __init__(self, df_train, df_test):
        self.df_train = df_train
        self.df_test = df_test
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None

    def Regularize(self):
        # X, y TRAIN
        self.X_train = self.df_train[
            [
                "Rainfall",
                "Evaporation",
                "Sunshine",
                "WindGustSpeed",
                "RainToday",
                "WindSpeed_Difference",
                "Humidity_Difference",
                "Cloud_Difference",
                "Pressure_Difference",
                "Temp_Difference",
                "Dif_Temp_Max_Min",
                "WindGustDir_N",
                "WindGustDir_S",
                "WindGustDir_W",
                "WindDir9am_N",
                "WindDir9am_S",
                "WindDir9am_W",
                "WindDir3pm_N",
                "WindDir3pm_S",
                "WindDir3pm_W",
            ]
        ]
        self.y_train = self.df_train["RainfallTomorrow"]

        # X, y TEST
        self.X_test = self.df_test[
            [
                "Rainfall",
                "Evaporation",
                "Sunshine",
                "WindGustSpeed",
                "RainToday",
                "WindSpeed_Difference",
                "Humidity_Difference",
                "Cloud_Difference",
                "Pressure_Difference",
                "Temp_Difference",
                "Dif_Temp_Max_Min",
                "WindGustDir_N",
                "WindGustDir_S",
                "WindGustDir_W",
                "WindDir9am_N",
                "WindDir9am_S",
                "WindDir9am_W",
                "WindDir3pm_N",
                "WindDir3pm_S",
                "WindDir3pm_W",
            ]
        ]
        self.y_test = self.df_test["RainfallTomorrow"]
        return self.X_train, self.y_train, self.X_test, self.y_test