<a href="https://colab.research.google.com/github/murmurmaomao/-Data-Science-for-Smart-Environments_GRS35306/blob/main/Hyperparameter_Optimization_Catboost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install catboost
!pip install optuna

Collecting catboost
  Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.2
Collecting optuna
  Downloading optuna-3.5.0-py3-none-any.whl (413 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.4/413.4 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.0-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.1-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m11.7 MB/s[0

# Import

In [None]:
import warnings

warnings.filterwarnings("ignore")

import os
import gc
import datetime

import numpy as np
import pandas as pd
import polars as pl
import plotly.express as px
import pyarrow as pa

import catboost as cb
import optuna
from sklearn.metrics import mean_absolute_error

import holidays

# Class

## Load Data

In [None]:
class DataStorage:
    # root = "..\data"
    root = "/content/drive/MyDrive/predict-energy-behavior-of-prosumers/Data"
    # root = "/kaggle/input/predict-energy-behavior-of-prosumers"

    data_cols = [
        "target",
        "county",
        "is_business",
        "product_type",
        "is_consumption",
        "datetime",
        "row_id",
    ]
    client_cols = [
        "product_type",
        "county",
        "eic_count",
        "installed_capacity",
        "is_business",
        "date",
    ]
    gas_prices_cols = ["forecast_date", "lowest_price_per_mwh", "highest_price_per_mwh"]
    electricity_prices_cols = ["forecast_date", "euros_per_mwh"]
    forecast_weather_cols = [
        "latitude",
        "longitude",
        "origin_datetime",
        "hours_ahead",
        "temperature",
        "dewpoint",
        "cloudcover_high",
        "cloudcover_low",
        "cloudcover_mid",
        "cloudcover_total",
        "10_metre_u_wind_component",
        "10_metre_v_wind_component",
        "forecast_datetime",
        "direct_solar_radiation",
        "surface_solar_radiation_downwards",
        "snowfall",
        "total_precipitation",
    ]
    historical_weather_cols = [
        "datetime",
        "temperature",
        "dewpoint",
        "rain",
        "snowfall",
        "surface_pressure",
        "cloudcover_total",
        "cloudcover_low",
        "cloudcover_mid",
        "cloudcover_high",
        "windspeed_10m",
        "winddirection_10m",
        "shortwave_radiation",
        "direct_solar_radiation",
        "diffuse_radiation",
        "latitude",
        "longitude",
    ]
    location_cols = ["longitude", "latitude", "county"]
    target_cols = [
        "target",
        "county",
        "is_business",
        "product_type",
        "is_consumption",
        "datetime",
    ]

    def __init__(self):
        self.df_data = pl.read_csv(
            os.path.join(self.root, "train.csv"),
            columns=self.data_cols,
            try_parse_dates=True,
        )
        self.df_client = pl.read_csv(
            os.path.join(self.root, "client.csv"),
            columns=self.client_cols,
            try_parse_dates=True,
        )
        self.df_gas_prices = pl.read_csv(
            os.path.join(self.root, "gas_prices.csv"),
            columns=self.gas_prices_cols,
            try_parse_dates=True,
        )
        self.df_electricity_prices = pl.read_csv(
            os.path.join(self.root, "electricity_prices.csv"),
            columns=self.electricity_prices_cols,
            try_parse_dates=True,
        )
        self.df_forecast_weather = pl.read_csv(
            os.path.join(self.root, "forecast_weather.csv"),
            columns=self.forecast_weather_cols,
            try_parse_dates=True,
        )
        self.df_historical_weather = pl.read_csv(
            os.path.join(self.root, "historical_weather.csv"),
            columns=self.historical_weather_cols,
            try_parse_dates=True,
        )
        self.df_weather_station_to_county_mapping = pl.read_csv(
            os.path.join(self.root, "weather_station_to_county_mapping.csv"),
            columns=self.location_cols,
            try_parse_dates=True,
        )
        self.df_data = self.df_data.filter(
            (pl.col("datetime") >= pd.to_datetime("2022-01-01"))&
            (pl.col("datetime") <= pd.to_datetime("2023-05-17"))
        )
        self.df_target = self.df_data.select(self.target_cols)

        self.schema_data = self.df_data.schema
        self.schema_client = self.df_client.schema
        self.schema_gas_prices = self.df_gas_prices.schema
        self.schema_electricity_prices = self.df_electricity_prices.schema
        self.schema_forecast_weather = self.df_forecast_weather.schema
        self.schema_historical_weather = self.df_historical_weather.schema
        self.schema_target = self.df_target.schema

        self.df_weather_station_to_county_mapping = (
            self.df_weather_station_to_county_mapping.with_columns(
                pl.col("latitude").cast(pl.datatypes.Float32),
                pl.col("longitude").cast(pl.datatypes.Float32),
            )
        )

    def update_with_new_data(
        self,
        df_new_client,
        df_new_gas_prices,
        df_new_electricity_prices,
        df_new_forecast_weather,
        df_new_historical_weather,
        df_new_target,
    ):
        df_new_client = pl.from_pandas(
            df_new_client[self.client_cols], schema_overrides=self.schema_client
        )
        df_new_gas_prices = pl.from_pandas(
            df_new_gas_prices[self.gas_prices_cols],
            schema_overrides=self.schema_gas_prices,
        )
        df_new_electricity_prices = pl.from_pandas(
            df_new_electricity_prices[self.electricity_prices_cols],
            schema_overrides=self.schema_electricity_prices,
        )
        df_new_forecast_weather = pl.from_pandas(
            df_new_forecast_weather[self.forecast_weather_cols],
            schema_overrides=self.schema_forecast_weather,
        )
        df_new_historical_weather = pl.from_pandas(
            df_new_historical_weather[self.historical_weather_cols],
            schema_overrides=self.schema_historical_weather,
        )
        df_new_target = pl.from_pandas(
            df_new_target[self.target_cols], schema_overrides=self.schema_target
        )

        self.df_client = pl.concat([self.df_client, df_new_client]).unique(
            ["date", "county", "is_business", "product_type"]
        )
        self.df_gas_prices = pl.concat([self.df_gas_prices, df_new_gas_prices]).unique(
            ["forecast_date"]
        )
        self.df_electricity_prices = pl.concat(
            [self.df_electricity_prices, df_new_electricity_prices]
        ).unique(["forecast_date"])
        self.df_forecast_weather = pl.concat(
            [self.df_forecast_weather, df_new_forecast_weather]
        ).unique(["forecast_datetime", "latitude", "longitude", "hours_ahead"])
        self.df_historical_weather = pl.concat(
            [self.df_historical_weather, df_new_historical_weather]
        ).unique(["datetime", "latitude", "longitude"])
        self.df_target = pl.concat([self.df_target, df_new_target]).unique(
            ["datetime", "county", "is_business", "product_type", "is_consumption"]
        )



## Feature Enegineering

In [None]:
class FeaturesGenerator:
    def __init__(self, data_storage):
        self.data_storage = data_storage
        self.estonian_holidays = list(
            holidays.country_holidays("EE", years=range(2021, 2026)).keys()
        )

    def _add_general_features(self, df_features):
        df_features = (
            df_features.with_columns(
                pl.col("datetime").dt.ordinal_day().alias("dayofyear"),
                pl.col("datetime").dt.hour().alias("hour"),
                pl.col("datetime").dt.day().alias("day"),
                pl.col("datetime").dt.weekday().alias("weekday"),
                pl.col("datetime").dt.month().alias("month"),
                pl.col("datetime").dt.year().alias("year"),
            )
            .with_columns(
                pl.concat_str(
                    "county",
                    "is_business",
                    "product_type",
                    "is_consumption",
                    separator="_",
                ).alias("segment"),
            )
            .with_columns(
                (np.pi * pl.col("dayofyear") / 183).sin().alias("sin(dayofyear)"),
                (np.pi * pl.col("dayofyear") / 183).cos().alias("cos(dayofyear)"),
                (np.pi * pl.col("hour") / 12).sin().alias("sin(hour)"),
                (np.pi * pl.col("hour") / 12).cos().alias("cos(hour)"),
            )
        )
        return df_features

    def _add_client_features(self, df_features):
        df_client = self.data_storage.df_client

        df_features = df_features.join(
            df_client.with_columns(
                (pl.col("date") + pl.duration(days=2)).cast(pl.Date)
            ),
            on=["county", "is_business", "product_type", "date"],
            how="left",
        )
        return df_features

    def is_country_holiday(self, row):
        return (
            datetime.date(row["year"], row["month"], row["day"])
            in self.estonian_holidays
        )

    def _add_holidays_features(self, df_features):
        df_features = df_features.with_columns(
            pl.struct(["year", "month", "day"])
            .apply(self.is_country_holiday)
            .alias("is_country_holiday")
        )
        return df_features

    def _add_forecast_weather_features(self, df_features):
        df_forecast_weather = self.data_storage.df_forecast_weather
        df_weather_station_to_county_mapping = (
            self.data_storage.df_weather_station_to_county_mapping
        )

        df_forecast_weather = (
            df_forecast_weather.rename({"forecast_datetime": "datetime"})
            .filter((pl.col("hours_ahead") >= 22) & pl.col("hours_ahead") <= 45)
            .drop("hours_ahead")
            .with_columns(
                pl.col("latitude").cast(pl.datatypes.Float32),
                pl.col("longitude").cast(pl.datatypes.Float32),
            )
            .join(
                df_weather_station_to_county_mapping,
                how="left",
                on=["longitude", "latitude"],
            )
            .drop("longitude", "latitude", "origin_datetime")
        )

        df_forecast_weather_date = (
            df_forecast_weather.group_by("datetime").mean().drop("county")
        )

        df_forecast_weather_local = (
            df_forecast_weather.filter(pl.col("county").is_not_null())
            .group_by("county", "datetime")
            .mean()
        )

        for hours_lag in [0, 7 * 24]:
            df_features = df_features.join(
                df_forecast_weather_date.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),
                on="datetime",
                how="left",
                suffix=f"_forecast_{hours_lag}h",
            )
            df_features = df_features.join(
                df_forecast_weather_local.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),
                on=["county", "datetime"],
                how="left",
                suffix=f"_forecast_local_{hours_lag}h",
            )

        df_features = df_features.with_columns(
            (
                pl.col(f"temperature_forecast_local_0h")
                / (pl.col(f"temperature_forecast_local_168h") + 1e-3)
            ).alias(f"temperature_forecast_local_0h/168h"),
            (
                pl.col(f"surface_solar_radiation_downwards_forecast_local_0h")
                / (pl.col(f"surface_solar_radiation_downwards_forecast_local_168h") + 1e-3)
            ).alias(f"surface_solar_radiation_downwards_forecast_local_0h/168h"),
        )

        return df_features

    def _add_historical_weather_features(self, df_features):
        df_historical_weather = self.data_storage.df_historical_weather
        df_weather_station_to_county_mapping = (
            self.data_storage.df_weather_station_to_county_mapping
        )

        df_historical_weather = (
            df_historical_weather.with_columns(
                pl.col("latitude").cast(pl.datatypes.Float32),
                pl.col("longitude").cast(pl.datatypes.Float32),
            )
            .join(
                df_weather_station_to_county_mapping,
                how="left",
                on=["longitude", "latitude"],
            )
            .drop("longitude", "latitude")
        )

        df_historical_weather_date = (
            df_historical_weather.group_by("datetime").mean().drop("county")
        )

        df_historical_weather_local = (
            df_historical_weather.filter(pl.col("county").is_not_null())
            .group_by("county", "datetime")
            .mean()
        )

        for hours_lag in [2 * 24, 7 * 24]:
            df_features = df_features.join(
                df_historical_weather_date.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),
                on="datetime",
                how="left",
                suffix=f"_historical_{hours_lag}h",
            )
            df_features = df_features.join(
                df_historical_weather_local.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),
                on=["county", "datetime"],
                how="left",
                suffix=f"_historical_local_{hours_lag}h",
            )

        for hours_lag in [1 * 24]:
            df_features = df_features.join(
                df_historical_weather_date.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag),
                    pl.col("datetime").dt.hour().alias("hour"),
                )
                .filter(pl.col("hour") <= 10)
                .drop("hour"),
                on="datetime",
                how="left",
                suffix=f"_historical_{hours_lag}h",
            )

        df_features = df_features.with_columns(
            (
                pl.col(f"temperature_historical_local_48h")
                / (pl.col(f"temperature_historical_local_168h") + 1e-3)
            ).alias(f"temperature_historical_local_48h/168h"),
            (
                pl.col(f"direct_solar_radiation_historical_local_48h")
                / (pl.col(f"direct_solar_radiation_historical_local_168h") + 1e-3)
            ).alias(f"direct_solar_radiation_historical_local_48h/168h"),
            (
                pl.col(f"temperature_historical_24h")
                / (pl.col(f"temperature") + 1e-3)
            ).alias(f"temperature_historical_24h/48h"),
            (
                pl.col(f"direct_solar_radiation_historical_24h")
                / (pl.col(f"direct_solar_radiation") + 1e-3)
            ).alias(f"direct_solar_radiation_historical_24h/48h"),
        )

        return df_features

    def _add_target_features(self, df_features):
        df_target = self.data_storage.df_target

        df_target_all_type_sum = (
            df_target.group_by(["datetime", "county", "is_business", "is_consumption"])
            .sum()
            .drop("product_type")
        )

        df_target_all_county_type_sum = (
            df_target.group_by(["datetime", "is_business", "is_consumption"])
            .sum()
            .drop("product_type", "county")
        )

        for hours_lag in [
            2 * 24,
            3 * 24,
            4 * 24,
            5 * 24,
            6 * 24,
            7 * 24,
            8 * 24,
            9 * 24,
            10 * 24,
            11 * 24,
            12 * 24,
            13 * 24,
            14 * 24,
        ]:
            df_features = df_features.join(
                df_target.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ).rename({"target": f"target_{hours_lag}h"}),
                on=[
                    "county",
                    "is_business",
                    "product_type",
                    "is_consumption",
                    "datetime",
                ],
                how="left",
            )

        for hours_lag in [2 * 24, 3 * 24, 7 * 24, 14 * 24]:
            df_features = df_features.join(
                df_target_all_type_sum.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ).rename({"target": f"target_all_type_sum_{hours_lag}h"}),
                on=["county", "is_business", "is_consumption", "datetime"],
                how="left",
            )

            df_features = df_features.join(
                df_target_all_county_type_sum.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ).rename({"target": f"target_all_county_type_sum_{hours_lag}h"}),
                on=["is_business", "is_consumption", "datetime"],
                how="left",
                suffix=f"_all_county_type_sum_{hours_lag}h",
            )

        cols_for_stats = [
            f"target_{hours_lag}h" for hours_lag in [2 * 24, 3 * 24, 4 * 24, 5 * 24]
        ]
        df_features = df_features.with_columns(
            df_features.select(cols_for_stats).mean(axis=1).alias(f"target_mean"),
            df_features.select(cols_for_stats)
            .transpose()
            .std()
            .transpose()
            .to_series()
            .alias(f"target_std"),
        )

        for target_prefix, lag_nominator, lag_denomonator in [
            ("target", 24 * 7, 24 * 14),
            ("target", 24 * 2, 24 * 9),
            ("target", 24 * 3, 24 * 10),
            ("target", 24 * 2, 24 * 3),
            ("target_all_type_sum", 24 * 2, 24 * 3),
            ("target_all_type_sum", 24 * 7, 24 * 14),
            ("target_all_county_type_sum", 24 * 2, 24 * 3),
            ("target_all_county_type_sum", 24 * 7, 24 * 14),
        ]:
            df_features = df_features.with_columns(
                (
                    pl.col(f"{target_prefix}_{lag_nominator}h")
                    / (pl.col(f"{target_prefix}_{lag_denomonator}h") + 1e-3)
                ).alias(f"{target_prefix}_ratio_{lag_nominator}_{lag_denomonator}")
            )

        return df_features

    def _reduce_memory_usage(self, df_features):
        df_features = df_features.with_columns(pl.col(pl.Float64).cast(pl.Float32))
        return df_features

    def _drop_columns(self, df_features):
        df_features = df_features.drop(
            "date", "datetime", "hour", "dayofyear"
        )
        return df_features

    def _to_pandas(self, df_features, y):
        cat_cols = [
            "county",
            "is_business",
            "product_type",
            "is_consumption",
            "segment",
        ]

        if y is not None:
            df_features = pd.concat([df_features.to_pandas(), y.to_pandas()], axis=1)
        else:
            df_features = df_features.to_pandas()

        df_features[cat_cols] = df_features[cat_cols].astype("category")

        if 'row_id' in df_features.columns:
            df_features = df_features.drop("row_id", axis=1)

        return df_features

    def generate_features(self, df_prediction_items):
        if "target" in df_prediction_items.columns:
            df_prediction_items, y = (
                df_prediction_items.drop("target"),
                df_prediction_items.select("target"),
            )
        else:
            y = None

        df_features = df_prediction_items.with_columns(
            pl.col("datetime").cast(pl.Date).alias("date"),
        )

        for add_features in [
            self._add_general_features,
            self._add_client_features,
            self._add_forecast_weather_features,
            self._add_historical_weather_features,
            self._add_target_features,
            self._add_holidays_features,
            self._reduce_memory_usage,
            self._drop_columns,
        ]:
            df_features = add_features(df_features)

        df_features = self._to_pandas(df_features, y)

        return df_features

# Initialisation

In [None]:
data_storage = DataStorage()
features_generator = FeaturesGenerator(data_storage=data_storage)

### Feature Generation

In [None]:
df_train_features = features_generator.generate_features(data_storage.df_data)
df_train_features = df_train_features[df_train_features['target'].notnull()]

In [None]:
df_train_features.shape

(1604324, 172)

# Catboost Models Hyperparameter Optimization

In [None]:
def catboost_model(mask,trial,is_diff):
    X = df_train_features[mask].drop(columns=["target"])
    Y = df_train_features[mask]["target"] - (df_train_features[mask]["target_48h"].fillna(0) * is_diff)
    train_size=0.85
    X_train, X_test = X.head(int(len(Y)*train_size)) , X.tail(int(len(Y)*(1-train_size)))
    y_train, y_test = Y.head(int(len(Y)*train_size)) , Y.tail(int(len(Y)*(1-train_size)))
    params = {
        "random_seed":49,
        "loss_function":"RMSE",
        "task_type":"GPU",
        'bootstrap_type': 'Bernoulli',

        "iterations": trial.suggest_int("iterations", 2000, 10000, step=500),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.5),
        "depth": trial.suggest_int("depth", 1, 15),
        'random_strength': trial.suggest_uniform('random_strength',10,50),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 10, 1000),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.1, 5,)
    }
    cat_features = ["county", "is_business", "product_type", "is_consumption", "segment"]
    model = cb.CatBoostRegressor(**params, silent=True)
    model.fit(X_train, y_train , cat_features=cat_features , eval_set=(X_test, y_test))
    pred_cat=model.predict(X_test)
    mae = mean_absolute_error(y_test, pred_cat)
    return mae

In [None]:
def Hyperparameter_study(mask,study_name,is_diff,n_trials):
    study=optuna.create_study(direction='minimize',study_name=study_name)
    func = lambda trial: catboost_model(mask,trial,is_diff)
    study.optimize(func, n_trials=n_trials)
    return study

In [None]:
def Hyperparameter_study_visualization(study,target_name,study_name):
    optuna.visualization.plot_optimization_history(study,target_name=target_name).show()
    # plot_parallel_coordinate: interactively visualizes the hyperparameters and scores
    optuna.visualization.plot_parallel_coordinate(study,target_name=target_name).show()
    # plot_slice: shows the evolution of the search. You can see where in the hyperparameter space your search went and which parts of the space were explored more.
    optuna.visualization.plot_slice(study,target_name=target_name).show()
    #Visualize parameter importances.
    optuna.visualization.plot_param_importances(study,target_name=target_name).show()
    #Visualize empirical distribution function
    optuna.visualization.plot_edf(study,target_name=target_name).show()
    print(f"\tBest value (MAE): {study.best_value:.5f}")
    print(f"\tBest params {study_name}:")
    for key, value in study.best_params.items():
        print(f"\t\t{key}: {value}")

In [None]:
consumption = df_train_features["is_consumption"] == 1
production = df_train_features["is_consumption"] == 0
n_trials = 30

## Consumption_model_Catboost

In [None]:
study_consumption = Hyperparameter_study(mask=consumption,study_name="Consumption_model_Catboost",is_diff=0 , n_trials=n_trials)

[I 2024-01-25 02:30:55,243] A new study created in memory with name: Consumption_model_Catboost
[I 2024-01-25 02:33:55,801] Trial 0 finished with value: 73.47410375895937 and parameters: {'iterations': 8500, 'learning_rate': 0.3926234976584529, 'depth': 7, 'random_strength': 37.06472597365693, 'subsample': 0.9031576424454716, 'min_data_in_leaf': 739, 'reg_lambda': 0.30851441817550884}. Best is trial 0 with value: 73.47410375895937.
[I 2024-01-25 02:39:07,981] Trial 1 finished with value: 73.66797406657264 and parameters: {'iterations': 10000, 'learning_rate': 0.4386559442646001, 'depth': 9, 'random_strength': 20.740944047136537, 'subsample': 0.9316126983301964, 'min_data_in_leaf': 621, 'reg_lambda': 1.4491060338116233}. Best is trial 0 with value: 73.47410375895937.
[I 2024-01-25 02:40:37,678] Trial 2 finished with value: 64.0505171824274 and parameters: {'iterations': 5000, 'learning_rate': 0.22997621133602522, 'depth': 6, 'random_strength': 10.952091998710207, 'subsample': 0.64895547

In [None]:
Hyperparameter_study_visualization(study = study_consumption,target_name="MAE",study_name="Consumption_model_Catboost")

	Best value (MAE): 59.43302
	Best params Consumption_model_Catboost:
		iterations: 4500
		learning_rate: 0.01240813245424538
		depth: 11
		random_strength: 15.676074935669043
		subsample: 0.7877393817115238
		min_data_in_leaf: 119
		reg_lambda: 3.7806933338325157


## Production_model_Catboost

In [None]:
study_production = Hyperparameter_study(mask=production,study_name="Production_model_Catboost",is_diff=0 , n_trials=n_trials)

[I 2024-01-25 06:01:25,596] A new study created in memory with name: Production_model_Catboost
[I 2024-01-25 06:03:09,475] Trial 0 finished with value: 52.84381830461911 and parameters: {'iterations': 9000, 'learning_rate': 0.13100131645113774, 'depth': 4, 'random_strength': 12.94205724425176, 'subsample': 0.547751447599385, 'min_data_in_leaf': 463, 'reg_lambda': 4.886361816548906}. Best is trial 0 with value: 52.84381830461911.
[I 2024-01-25 06:06:41,360] Trial 1 finished with value: 59.18584277926279 and parameters: {'iterations': 10000, 'learning_rate': 0.13993963009677296, 'depth': 7, 'random_strength': 30.456669595064977, 'subsample': 0.9781468646178463, 'min_data_in_leaf': 259, 'reg_lambda': 4.793955999292637}. Best is trial 0 with value: 52.84381830461911.
[I 2024-01-25 06:07:34,078] Trial 2 finished with value: 56.500136858845124 and parameters: {'iterations': 8500, 'learning_rate': 0.40276536693061205, 'depth': 2, 'random_strength': 15.355500424655816, 'subsample': 0.741421975

In [None]:
Hyperparameter_study_visualization(study = study_production,target_name="MAE",study_name="Production_model_Catboost")

	Best value (MAE): 51.42945
	Best params Production_model_Catboost:
		iterations: 8000
		learning_rate: 0.05078515042769798
		depth: 3
		random_strength: 14.176793288653343
		subsample: 0.5249233420252732
		min_data_in_leaf: 754
		reg_lambda: 3.8887691626560406


## Consumption_48h_diif_model_Catboost

In [None]:
study_consumption_48h_diif = Hyperparameter_study(mask=consumption, study_name="Consumption_48h_diif_model_Catboost", is_diff=1 , n_trials=n_trials)

[I 2024-01-25 07:28:18,552] A new study created in memory with name: Consumption_48h_diif_model_Catboost
[I 2024-01-25 07:29:14,947] Trial 0 finished with value: 60.85326948930696 and parameters: {'iterations': 2500, 'learning_rate': 0.07025670457566684, 'depth': 7, 'random_strength': 48.60503106383933, 'subsample': 0.5163295285540281, 'min_data_in_leaf': 176, 'reg_lambda': 1.4379244941221576}. Best is trial 0 with value: 60.85326948930696.
[I 2024-01-25 08:12:42,858] Trial 1 finished with value: 68.20807624489647 and parameters: {'iterations': 6500, 'learning_rate': 0.1727322601503304, 'depth': 15, 'random_strength': 33.21424656944389, 'subsample': 0.7550934084773187, 'min_data_in_leaf': 658, 'reg_lambda': 4.9504333536576235}. Best is trial 0 with value: 60.85326948930696.
[I 2024-01-25 08:14:15,637] Trial 2 finished with value: 67.18466645781865 and parameters: {'iterations': 8000, 'learning_rate': 0.4172506439258091, 'depth': 4, 'random_strength': 11.04573017451644, 'subsample': 0.8

In [None]:
Hyperparameter_study_visualization(study = study_consumption_48h_diif,target_name="MAE",study_name="Consumption_48h_diif_model_Catboost")

	Best value (MAE): 58.38045
	Best params Consumption_48h_diif_model_Catboost:
		iterations: 9000
		learning_rate: 0.016862985221162422
		depth: 10
		random_strength: 29.84529179985335
		subsample: 0.5799090266204205
		min_data_in_leaf: 790
		reg_lambda: 2.9400782862345847


## Production_48h_diif_model_Catboost

In [None]:
study_production_48h_diif = Hyperparameter_study(mask=production, study_name="Production_48h_diif_model_Catboost", is_diff=1 , n_trials=n_trials)

[I 2024-01-25 11:28:30,038] A new study created in memory with name: Production_48h_diif_model_Catboost
[I 2024-01-25 11:30:10,824] Trial 0 finished with value: 52.695500492837915 and parameters: {'iterations': 4500, 'learning_rate': 0.012268578655076297, 'depth': 7, 'random_strength': 29.835148887994922, 'subsample': 0.752280310808296, 'min_data_in_leaf': 561, 'reg_lambda': 3.329176488799522}. Best is trial 0 with value: 52.695500492837915.
[I 2024-01-25 11:34:52,619] Trial 1 finished with value: 60.59160896308812 and parameters: {'iterations': 7000, 'learning_rate': 0.23419177689344747, 'depth': 10, 'random_strength': 49.94688283912376, 'subsample': 0.7474349687278995, 'min_data_in_leaf': 627, 'reg_lambda': 1.2136376629514343}. Best is trial 0 with value: 52.695500492837915.
[I 2024-01-25 11:43:59,464] Trial 2 finished with value: 62.99101019931487 and parameters: {'iterations': 10000, 'learning_rate': 0.3710815554219042, 'depth': 11, 'random_strength': 43.536341053572045, 'subsample

In [None]:
Hyperparameter_study_visualization(study = study_consumption_48h_diif,target_name="MAE",study_name="Production_48h_diif_model_Catboost")

	Best value (MAE): 58.38045
	Best params Production_48h_diif_model_Catboost:
		iterations: 9000
		learning_rate: 0.016862985221162422
		depth: 10
		random_strength: 29.84529179985335
		subsample: 0.5799090266204205
		min_data_in_leaf: 790
		reg_lambda: 2.9400782862345847
