**This notebook is just a slight modification of the below notebook**(Hyperparam tuning)


https://www.kaggle.com/code/albansteff/enefit-estonian-holidays-lb-65-79



In [1]:
import warnings

warnings.filterwarnings("ignore")

import os, joblib, gc, pickle
import yaml
import numpy as np
import pandas as pd
import polars as pl
import plotly.express as px
import seaborn as sns
import catboost as cbt 
import lightgbm as lgb
import matplotlib.pyplot as plt
import ctypes;
libc = ctypes.CDLL('libc.so.6');
from tqdm import tqdm
import holidays
import datetime 
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.stattools import adfuller, coint
import sklearn

import torch
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts

# Classes

In [2]:
!mkdir models

In [3]:
path_models = '/kaggle/input/models-enefit/'
path_data = '/kaggle/input/dataset-enefit/'

In [4]:
n_models = 1
n_estimators = 2500
device ='cpu'
l_ca = ["county", "is_business", "product_type", "is_consumption", 'segment']

In [5]:
# LSTM
sequence_length = 7
batch_size = 1024 * 10

In [6]:
log_feature = ['cloudcover_high_historical_168h', 'cloudcover_mid_historical_local_168h', 'cloudcover_mid_historical_24h', 'snowfall_historical_48h', 'rain_historical_168h', 'cloudcover_high_historical_local_168h', 'temperature_historical_24h', 'windspeed_10m_historical_local_168h', 'cloudcover_total_forecast_local_168h', 'dewpoint_historical_48h', 'cloudcover_high_historical_48h', 'surface_pressure_historical_24h', 'surface_pressure_historical_local_48h', 'cloudcover_mid_forecast_local_168h', 'winddirection_10m_historical_168h', 'cloudcover_low_historical_168h', '10_metre_v_wind_component_forecast_local_0h', 'cloudcover_total_forecast_168h', 'cloudcover_total_historical_168h', 'target_all_county_type_sum_ratio_168_336', 'winddirection_10m_historical_local_168h', 'direct_solar_radiation_historical_24h']
l1=['installed_capacity', 'target_mean', 'target_std']

### DataStorage

In [7]:
class DataStorage:
    root = "/kaggle/input/predict-energy-behavior-of-prosumers"

    data_cols = ["target","county","is_business","product_type","is_consumption","datetime","row_id",]
    client_cols = ["product_type","county","eic_count","installed_capacity","is_business","date",]
    gas_prices_cols = ["forecast_date", "lowest_price_per_mwh", "highest_price_per_mwh"]
    electricity_prices_cols = ["forecast_date", "euros_per_mwh"]
    forecast_weather_cols = ["latitude","longitude","hours_ahead","temperature","dewpoint","cloudcover_high","cloudcover_low","cloudcover_mid","cloudcover_total","10_metre_u_wind_component","10_metre_v_wind_component","forecast_datetime","direct_solar_radiation","surface_solar_radiation_downwards","snowfall","total_precipitation",]
    historical_weather_cols = ["datetime","temperature","dewpoint","rain","snowfall","surface_pressure","cloudcover_total","cloudcover_low","cloudcover_mid","cloudcover_high","windspeed_10m","winddirection_10m","shortwave_radiation",    "direct_solar_radiation","diffuse_radiation","latitude","longitude",]
    location_cols = ["longitude", "latitude", "county"]
    target_cols = ["target","county","is_business","product_type","is_consumption","datetime",]

    def __init__(self):
        self.df_data = pl.read_csv(os.path.join(self.root, "train.csv"),columns=self.data_cols,try_parse_dates=True,)
        self.df_client = pl.read_csv(os.path.join(self.root, "client.csv"),columns=self.client_cols,try_parse_dates=True,)
        self.df_gas_prices = pl.read_csv(os.path.join(self.root, "gas_prices.csv"),columns=self.gas_prices_cols,try_parse_dates=True,)
        self.df_electricity_prices = pl.read_csv(os.path.join(self.root, "electricity_prices.csv"),columns=self.electricity_prices_cols,try_parse_dates=True,)
        self.df_forecast_weather = pl.read_csv(os.path.join(self.root, "forecast_weather.csv"),columns=self.forecast_weather_cols,try_parse_dates=True,)
        self.df_historical_weather = pl.read_csv(os.path.join(self.root, "historical_weather.csv"),columns=self.historical_weather_cols,try_parse_dates=True,)
        self.df_weather_station_to_county_mapping = pl.read_csv(os.path.join(self.root, "weather_station_to_county_mapping.csv"),columns=self.location_cols,try_parse_dates=True,)
        self.df_data = self.df_data.filter(pl.col("datetime") >= pd.to_datetime("2022-01-01"))
        self.df_target = self.df_data.select(self.target_cols)

        self.schema_data = self.df_data.schema
        self.schema_client = self.df_client.schema
        self.schema_gas_prices = self.df_gas_prices.schema
        self.schema_electricity_prices = self.df_electricity_prices.schema
        self.schema_forecast_weather = self.df_forecast_weather.schema
        self.schema_historical_weather = self.df_historical_weather.schema
        self.schema_target = self.df_target.schema

        self.df_weather_station_to_county_mapping = (self.df_weather_station_to_county_mapping.with_columns(pl.col("latitude").cast(pl.datatypes.Float32),pl.col("longitude").cast(pl.datatypes.Float32),))
        self.max_date_data = self.df_data['datetime'].max()

    def update_with_new_data(self,df_new_client,df_new_gas_prices,df_new_electricity_prices,df_new_forecast_weather,df_new_historical_weather,df_new_target,):
        df_new_client = pl.from_pandas(df_new_client[self.client_cols], schema_overrides=self.schema_client)
        df_new_gas_prices = pl.from_pandas(df_new_gas_prices[self.gas_prices_cols],schema_overrides=self.schema_gas_prices,)
        df_new_electricity_prices = pl.from_pandas(df_new_electricity_prices[self.electricity_prices_cols],schema_overrides=self.schema_electricity_prices,)
        df_new_forecast_weather = pl.from_pandas(df_new_forecast_weather[self.forecast_weather_cols],schema_overrides=self.schema_forecast_weather,)
        df_new_historical_weather = pl.from_pandas(df_new_historical_weather[self.historical_weather_cols],schema_overrides=self.schema_historical_weather,)
        df_new_target = pl.from_pandas(df_new_target[self.target_cols], schema_overrides=self.schema_target)

        self.df_client = pl.concat([self.df_client, df_new_client]).unique(["date", "county", "is_business", "product_type"])
        self.df_gas_prices = pl.concat([self.df_gas_prices, df_new_gas_prices]).unique(["forecast_date"])
        self.df_electricity_prices = pl.concat([self.df_electricity_prices, df_new_electricity_prices]).unique(["forecast_date"])
        self.df_forecast_weather = pl.concat([self.df_forecast_weather, df_new_forecast_weather]).unique(["forecast_datetime", "latitude", "longitude", "hours_ahead"])
        self.df_historical_weather = pl.concat([self.df_historical_weather, df_new_historical_weather]).unique(["datetime", "latitude", "longitude"])
        self.df_target = pl.concat([self.df_target, df_new_target]).unique(["datetime", "county", "is_business", "product_type", "is_consumption"])

    def preprocess_test(self, df_test):
        df_test = df_test.rename(columns={"prediction_datetime": "datetime"})
        df_test = pl.from_pandas(df_test[self.data_cols[1:]], schema_overrides=self.schema_data)
        return df_test
    
    def filter_on_test(self, df_test):
        df_test = df_test.filter(df_test['datetime'] > self.max_date_data)
        return df_test

### FeaturesGenerator

In [8]:
class FeaturesGenerator:
    def __init__(self, data_storage):
        self.data_storage = data_storage
        self.estonian_holidays = holidays.country_holidays('EE', years=range(2021, 2026))
        self.first = True
        self.hours_list= [i * 24 for i in range(2,15)]
        self.all_targets_laged = [ f"target_{hours_lag}h" for hours_lag in self.hours_list]
        self.cols_for_stats = [ f"target_{hours_lag}h" for hours_lag in [2 * 24, 3 * 24, 4 * 24, 5 * 24, 7 * 24]]
        

    def _add_general_features(self, df_features):
        df_features = (
            df_features.with_columns(
                pl.col("datetime").dt.ordinal_day().alias("dayofyear"),
                pl.col("datetime").dt.date().alias("mdate"),
                pl.col("datetime").dt.hour().alias("hour"),
                pl.col("datetime").dt.day().alias("day"),
                pl.col("datetime").dt.weekday().alias("weekday"),
                pl.col("datetime").dt.month().alias("month"),
                pl.col("datetime").dt.year().alias("year"),
            )
            .with_columns(
                pl.concat_str("county","is_business","product_type","is_consumption",separator="_",).alias("segment"),
            )
            .with_columns(
                (np.pi * pl.col("dayofyear") / 183).sin().alias("sin(dayofyear)"),
                (np.pi * pl.col("dayofyear") / 183).cos().alias("cos(dayofyear)"),
                (np.pi * pl.col("hour") / 12).sin().alias("sin(hour)"),
                (np.pi * pl.col("hour") / 12).cos().alias("cos(hour)"),
            )
        )
        return df_features

    def _add_client_features(self, df_features):
        df_client = self.data_storage.df_client

        df_features = df_features.join(
            df_client.with_columns((pl.col("date") + pl.duration(days=2)).cast(pl.Date)),
            on=["county", "is_business", "product_type", "date"], how="left",
        )
        return df_features
    
    def is_country_holiday(self, row):
        return (datetime.date(row["year"], row["month"], row["day"])in self.estonian_holidays)

    def _holidays_features(self, df_features):
        df_features = df_features.with_columns(
            pl.struct(["year", "month", "day"])
            .apply(self.is_country_holiday)
            .alias("is_country_holiday")
        )
        return df_features
    
    def _add_forecast_weather_features(self, df_features):
        df_forecast_weather = self.data_storage.df_forecast_weather
        df_weather_station_to_county_mapping = (
            self.data_storage.df_weather_station_to_county_mapping
        )

        df_forecast_weather = (
            df_forecast_weather.rename({"forecast_datetime": "datetime"})
            .filter((pl.col("hours_ahead") >= 22) & pl.col("hours_ahead") <= 45)
            #.drop("hours_ahead")
            .with_columns(pl.col("latitude").cast(pl.datatypes.Float32),pl.col("longitude").cast(pl.datatypes.Float32),)
            .join(
                df_weather_station_to_county_mapping,
                how="left",
                on=["longitude", "latitude"],
            )
            .drop("longitude", "latitude")
        )

        df_forecast_weather_date = (df_forecast_weather.group_by("datetime").mean().drop("county"))

        df_forecast_weather_local = (df_forecast_weather.filter(pl.col("county").is_not_null()).group_by("county", "datetime").mean())

        for hours_lag in [0,  2 * 24, 7 * 24]:
            df_features = df_features.join(
                df_forecast_weather_date.with_columns(pl.col("datetime") + pl.duration(hours=hours_lag)),
                on="datetime",
                how="left",
                suffix=f"_forecast_{hours_lag}h",
            )
            df_features = df_features.join(
                df_forecast_weather_local.with_columns(pl.col("datetime") + pl.duration(hours=hours_lag)),
                on=["county", "datetime"],
                how="left",
                suffix=f"_forecast_local_{hours_lag}h",
            )

        return df_features

    def _add_historical_weather_features(self, df_features):
        df_historical_weather = self.data_storage.df_historical_weather
        df_weather_station_to_county_mapping = (self.data_storage.df_weather_station_to_county_mapping)

        df_historical_weather = (
            df_historical_weather.with_columns(
                pl.col("latitude").cast(pl.datatypes.Float32),
                pl.col("longitude").cast(pl.datatypes.Float32),
            )
            .join(
                df_weather_station_to_county_mapping,
                how="left",
                on=["longitude", "latitude"],
            )
            .drop("longitude", "latitude")
        )

        df_historical_weather_date = (
            df_historical_weather.group_by("datetime").mean().drop("county")
        )

        df_historical_weather_local = (
            df_historical_weather.filter(pl.col("county").is_not_null())
            .group_by("county", "datetime")
            .mean()
        )

        for hours_lag in [2 * 24, 7 * 24]:
            df_features = df_features.join(
                df_historical_weather_date.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),
                on="datetime",
                how="left",
                suffix=f"_historical_{hours_lag}h",
            )
            df_features = df_features.join(
                df_historical_weather_local.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),
                on=["county", "datetime"],
                how="left",
                suffix=f"_historical_local_{hours_lag}h",
            )

        for hours_lag in [1 * 24]:
            df_features = df_features.join(
                df_historical_weather_date.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag),
                    pl.col("datetime").dt.hour().alias("hour"),
                )
                .filter(pl.col("hour") <= 10)
                .drop("hour"),
                on="datetime",
                how="left",
                suffix=f"_historical_{hours_lag}h",
            )

        return df_features

    def _add_target_features(self, df_features):
        df_target = self.data_storage.df_target

        df_target_all_type_sum = (df_target.group_by(["datetime", "county", "is_business", "is_consumption"]).sum().drop("product_type"))
        df_target_all_county_type_sum = (df_target.group_by(["datetime", "is_business", "is_consumption"]).sum().drop("product_type", "county"))

        for hours_lag in self.hours_list:
            df_features = df_features.join(
                df_target.with_columns(pl.col("datetime") + pl.duration(hours=hours_lag)).rename({"target": f"target_{hours_lag}h"}),
                on=["county","is_business","product_type","is_consumption","datetime",], how="left",)

        for hours_lag in [2 * 24, 3 * 24, 7 * 24, 14 * 24]:
            df_features = df_features.join( df_target_all_type_sum.with_columns(pl.col("datetime") + pl.duration(hours=hours_lag)).rename({"target": f"target_all_type_sum_{hours_lag}h"}),
                on=["county", "is_business", "is_consumption", "datetime"], how="left",)

            df_features = df_features.join( df_target_all_county_type_sum.with_columns(pl.col("datetime") + pl.duration(hours=hours_lag)).rename({"target": f"target_all_county_type_sum_{hours_lag}h"}),
                on=["is_business", "is_consumption", "datetime"], how="left", suffix=f"_all_county_type_sum_{hours_lag}h", )

        df_features = df_features.with_columns(
            df_features.select(self.cols_for_stats).mean(axis=1).alias(f"target_mean"),
            df_features.select(self.cols_for_stats).transpose().std().transpose().to_series().alias(f"target_std"),
            df_features.select(self.all_targets_laged).mean(axis=1).alias(f"all_target_mean"),
            df_features.select(self.all_targets_laged).transpose().std().transpose().to_series().alias(f"all_target_std"),
        )

        for target_prefix, lag_nominator, lag_denomonator in [
            ("target", 24 * 7, 24 * 14),
            ("target", 24 * 2, 24 * 9),
            ("target", 24 * 3, 24 * 10),
            ("target", 24 * 2, 24 * 3),
            ("target_all_type_sum", 24 * 2, 24 * 3),
            ("target_all_type_sum", 24 * 7, 24 * 14),
            ("target_all_county_type_sum", 24 * 2, 24 * 3),
            ("target_all_county_type_sum", 24 * 7, 24 * 14),
        ]:
            df_features = df_features.with_columns(
                (pl.col(f"{target_prefix}_{lag_nominator}h")/ (pl.col(f"{target_prefix}_{lag_denomonator}h") + 1e-3)).alias(f"{target_prefix}_ratio_{lag_nominator}_{lag_denomonator}")
            )
        return df_features

    def _reduce_memory_usage(self, df_features):
        df_features = df_features.with_columns(pl.col(pl.Float64).cast(pl.Float32))
        return df_features

    def _drop_columns(self, df_features):
        df_features = df_features.drop(["date", "datetime", "hour", "dayofyear", 'mdate', 'literal',
                                        'year', 'log_target_all_county_type_sum_ratio_168_336',
                                        'windspeed_10m_historical_local_48h', 'cloudcover_low_historical_168h', 'total_precipitation_forecast_168h', 'cloudcover_high_forecast_168h', 'cloudcover_low_historical_local_168h', 'surface_pressure_historical_local_48h', '10_metre_u_wind_component_forecast_48h', 'log_winddirection_10m_historical_168h', 'log_cloudcover_low_historical_168h', 'cloudcover_high_historical_168h', 'surface_pressure_historical_168h', '10_metre_u_wind_component_forecast_local_0h', 'windspeed_10m', 'target_48h_max_day_county', 'cloudcover_high_historical_local_168h', 'cloudcover_low_forecast_168h', 'mean_3_ssrdfl0_mean_day', 'log_cloudcover_total_forecast_168h', 'rain', 'mean_5_target_48h_mean_day', 'log_cloudcover_total_historical_168h', 'target_48h_std_day_county', 'cloudcover_mid_forecast_local_168h', 'rain_historical_168h', 'mean_3_target_48h_mean_day', 'total_precipitation_forecast_48h', 'temperature_historical_24h', '10_metre_u_wind_component_forecast_local_48h', 'log_cloudcover_mid_forecast_local_168h', 'dewpoint_historical_48h', 'ssrdfl0_min_day_county', 'cloudcover_high_historical_24h', 'windspeed_10m_historical_168h', 'snowfall_forecast_48h', 'std_3_target_48h', 'mean_5_target_48h', '10_metre_v_wind_component_diff_1', 'rain_historical_local_168h', 'winddirection_10m', 'temperature_forecast_48h', 'log_cloudcover_high_historical_local_168h', 'z_score_5_ssrdfl0_mean_day', 'z_score_3_ssrdfl0_mean_day', 'dewpoint_historical_local_48h', 'cloudcover_high_forecast_48h', 'snowfall_historical_48h', 'snowfall_forecast_local_48h', 'log_cloudcover_high_historical_168h', 'mean_3_target_48h', 'log_windspeed_10m_historical_local_168h', 'std_5_target_48h_mean_day', 'surface_pressure', 'mean_5_target_48h_sr', 'log_cloudcover_total_forecast_local_168h', 'std_3_target_48h_mean_day', 'std_3_ssrdfl0_mean_day', 'z_score_5_target_48h_mean_day',
                                        'snowfall_forecast_168h', 'mean_5_ssrdfl0_mean_day', 'log_temperature_historical_24h', 'winddirection_10m_historical_local_48h', 'total_precipitation_forecast_local_48h', 'log_surface_pressure_historical_24h', 'cloudcover_high_forecast_local_48h', 'winddirection_10m_historical_24h', 'windspeed_10m_historical_local_168h', 'dewpoint_historical_24h', 'dewpoint_forecast_48h', 'std_5_target_48h', 'snowfall_historical_local_48h',
                                        'cloudcover_mid_historical_168h', 'cloudcover_total_historical_local_168h', 'cloudcover_mid_historical_local_168h', 'winddirection_10m_historical_local_168h', '10_metre_v_wind_component_forecast_local_168h', 'cloudcover_total_forecast_168h', '10_metre_v_wind_component_forecast_48h', 'std_5_ssrdfl0_mean_day', 'log_dewpoint_historical_48h', 'log_winddirection_10m_historical_local_168h', 'z_score_3_target_48h', 'kurt', 'snowfall_forecast_local_168h',
                                        'hours_ahead_forecast_168h', 'hours_ahead_forecast_local_168h',
                                       ], axis = 1 , errors='ignore')
        return df_features

    def _to_pandas(self, df_features, y):
        if y is not None:
            df_features = pd.concat([df_features.to_pandas(), y.to_pandas()], axis=1)
        else:
            df_features = df_features.to_pandas()

        df_features = df_features.set_index("row_id")
        df_features[l_ca] = df_features[l_ca].astype("category")

        #df_features = df_features.drop('segment', axis = 1)
        return df_features
    
    # added some new features here
    def _additional_features(self,df):        
        #####################################################
        df['feature1'] = df['installed_capacity'] * df['surface_solar_radiation_downwards'] / (df['temperature'] + 273.15)
        df['feature2'] = df['installed_capacity'] * df['surface_solar_radiation_downwards'] / df['total_precipitation']
        df['skew'] = df[self.cols_for_stats].skew(axis = 1)
        df['kurt'] = df[self.cols_for_stats].kurt(axis = 1)
        df['skew2'] = df[self.all_targets_laged].skew(axis = 1)
        df['kurt2'] = df[self.all_targets_laged].kurt(axis = 1)
        df['target_mean_r_all_target_mean'] = df['target_mean'] / df['all_target_mean']
        df['target_std_r_all_target_std'] = df['target_std'] / df['all_target_std']
        df['target_mean_sr'] = df['target_mean'] / df['target_std']
        df['all_target_sr'] = df['all_target_mean'] / df['all_target_std']
        
        for col in ['temperature', 'dewpoint', '10_metre_u_wind_component', '10_metre_v_wind_component', 'target_48h', 'feature1',  'target_168h']:
            for window in [1]:
                df[f"{col}_diff_{window}"] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])[col].diff(window)
                
        df['feature1_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["feature1_diff_1"].transform('mean')
        df['feature1_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["feature1_diff_1"].transform('std')
        
        df['target_48h_diff_1_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h_diff_1"].transform('mean')
        df['target_48h_diff_1_mean_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business', 'year', 'day', 'month'])["target_48h_diff_1"].transform('mean')
        df['target_48h_diff_1_std_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h_diff_1"].transform('std')
        df['target_48h_diff_1_std_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business', 'year', 'day', 'month'])["target_48h_diff_1"].transform('std')

        df['target_48h_m_target_72h'] = df['target_48h'] - df['target_72h']
        df['target_48h_m_target_96h'] = df['target_48h'] - df['target_96h']   
        df['target_48h_m_target_169h'] = df['target_48h'] - df['target_168h']   
        df['target_48h_m_target_336h'] = df['target_48h'] - df['target_336h']    
        df['target_48h_m_target_mean'] = df['target_48h'] - df['target_mean']

        df['accel_3'] = df['target_48h_m_target_96h'] - df['target_48h_m_target_mean']
        df['z_score_target_48h'] = ((df['target_48h'] - df['target_mean']) / df['target_std']).fillna(0)
                
        df['mean_target_48h_72h'] = (df['target_48h'] + df['target_72h']) / 2
        df['mean_target_48h_72h_96h'] = (df['target_48h'] + df['target_72h'] + df['target_96h']) / 3

        df['target_48h_m_mean_target_48h_72h'] = df['target_48h'] - df['mean_target_48h_72h']
        df['z_score_1_target_48h'] = ((df['target_48h'] - df['target_48h_m_mean_target_48h_72h']) / df[['target_48h','target_72h']].std(axis = 1)).fillna(0)

        df['target_48h_m_mean_target_48h_72h_96h'] = df['target_48h'] - df['mean_target_48h_72h_96h']
        df['m_std_target_48h_72h_96h'] = df[['target_48h','target_72h','target_96h']].std(axis = 1)
        df['z_score_2_target_48h'] = ((df['target_48h'] - df['target_48h_m_mean_target_48h_72h_96h']) / df['m_std_target_48h_72h_96h']).fillna(0)

        df['diff_dsrfl'] =  df['direct_solar_radiation_forecast_local_0h'] / df['direct_solar_radiation_forecast_local_168h']
        df['diff_ssrdfl'] =  df['surface_solar_radiation_downwards_forecast_local_0h'] / df['surface_solar_radiation_downwards_forecast_local_168h']
        df['ratio_target48_ic'] =  df['target_48h'] / df['installed_capacity']
        df['ratio_target_168h_ic'] =  df['target_168h'] / df['installed_capacity']
        df['ratio_target_mean_ic'] =  df['target_mean'] / df['installed_capacity']
        
        if self.first :
            self.first = False
            self.dic_min ={}
            for col in log_feature[:]:
                self.dic_min[col] = min(df[col])

        for col in log_feature:
            df[f'log_{col}'] = np.where((df[col] - self.dic_min[col])!= 0, np.log(df[col] - self.dic_min[col]),0)
            
        #####################################################
        df['mean_3_target_48h'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['target_48h'].rolling(3).mean().values
        df['mean_5_target_48h'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['target_48h'].rolling(5).mean().values
        df['std_3_target_48h'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['target_48h'].rolling(3).std().values
        df['std_5_target_48h'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['target_48h'].rolling(5).std().values
        df['z_score_3_target_48h'] = ((df['target_48h'] - df['mean_3_target_48h']) / df['std_3_target_48h']).fillna(0)
        df['z_score_5_target_48h'] = ((df['target_48h'] - df['mean_5_target_48h']) / df['std_5_target_48h']).fillna(0)
        
        df['mean_3_target_48h_sr'] =  df['mean_3_target_48h'] / df['std_3_target_48h']
        df['mean_5_target_48h_sr'] =  df['mean_5_target_48h'] / df['std_5_target_48h']
        
        #####################################################
        df['target_48h_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h"].transform('mean')
        df['target_48h_mean_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h"].transform('mean')
        
        df['target_48h_min_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h"].transform('min')
        df['target_48h_min_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h"].transform('min')
                
        df['target_48h_max_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h"].transform('max')
        df['target_48h_max_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h"].transform('max')

        df['target_48h_std_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h"].transform('std')
        df['target_48h_std_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["target_48h"].transform('std')

        df['target_48h_sr_day'] =  df['target_48h_mean_day'] / df['target_48h_std_day']
        df['target_48h_sr_day_county'] =  df['target_48h_mean_day_county'] / df['target_48h_std_day_county']
        
        df['mean_3_target_48h_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['target_48h_mean_day'].rolling(3).mean().values
        df['mean_5_target_48h_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['target_48h_mean_day'].rolling(5).mean().values
        df['std_3_target_48h_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['target_48h_mean_day'].rolling(3).std().values
        df['std_5_target_48h_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['target_48h_mean_day'].rolling(5).std().values
        df['z_score_3_target_48h_mean_day'] = ((df['target_48h_mean_day'] - df['mean_3_target_48h_mean_day']) / df['std_3_target_48h_mean_day']).fillna(0)
        df['z_score_5_target_48h_mean_day'] = ((df['target_48h_mean_day'] - df['mean_5_target_48h_mean_day']) / df['std_5_target_48h_mean_day']).fillna(0)
        
        #####################################################
        df['ssrdfl0_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["surface_solar_radiation_downwards_forecast_local_0h"].transform('mean')
        df['ssrdfl0_mean_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["surface_solar_radiation_downwards_forecast_local_0h"].transform('mean')
        
        df['feature3'] = df['installed_capacity'] * df['ssrdfl0_mean_day'] / (df['temperature'] + 273.15)
        df['feature4'] = df['installed_capacity'] * df['ssrdfl0_mean_day_county'] / (df['temperature'] + 273.15)
        
        df['ssrdfl0_min_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["surface_solar_radiation_downwards_forecast_local_0h"].transform('min')
        df['ssrdfl0_min_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["surface_solar_radiation_downwards_forecast_local_0h"].transform('min')
                
        df['ssrdfl0_max_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["surface_solar_radiation_downwards_forecast_local_0h"].transform('max')
        df['ssrdfl0_max_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["surface_solar_radiation_downwards_forecast_local_0h"].transform('max')

        df['ssrdfl0_std_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["surface_solar_radiation_downwards_forecast_local_0h"].transform('std')
        df['ssrdfl0_std_day_county'] = df.groupby(['is_consumption', 'product_type', 'is_business','year', 'day', 'month'])["surface_solar_radiation_downwards_forecast_local_0h"].transform('std')

        df['ssrdfl0_sr_day'] =  df['ssrdfl0_mean_day'] / df['ssrdfl0_std_day']
        df['ssrdfl0_sr_day_county'] =  df['ssrdfl0_mean_day_county'] / df['ssrdfl0_std_day_county']
        
        df['mean_3_ssrdfl0_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['ssrdfl0_mean_day'].rolling(3).mean().values
        df['mean_5_ssrdfl0_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['ssrdfl0_mean_day'].rolling(5).mean().values
        df['std_3_ssrdfl0_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['ssrdfl0_mean_day'].rolling(3).std().values
        df['std_5_ssrdfl0_mean_day'] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])['ssrdfl0_mean_day'].rolling(5).std().values
        df['z_score_3_ssrdfl0_mean_day'] = ((df['ssrdfl0_mean_day'] - df['mean_3_ssrdfl0_mean_day']) / df['std_3_target_48h_mean_day']).fillna(0)
        df['z_score_5_ssrdfl0_mean_day'] = ((df['ssrdfl0_mean_day'] - df['mean_5_ssrdfl0_mean_day']) / df['std_5_target_48h_mean_day']).fillna(0)

        df.replace([np.inf, -np.inf], np.nan, inplace=True)
        return df
    
    def _log_outliers(self,df):
        for i in l1:
            df = df.with_columns([(f"log_outliers_{i}", pl.when(df[i] != 0).then(np.log(pl.col(i))).otherwise(0))])
        return df
    
    def generate_features(self, df_prediction_items):
        if "target" in df_prediction_items.columns:
            df_prediction_items, y = (df_prediction_items.drop("target"),df_prediction_items.select("target"),)
        else:
            y = None

        df_features = df_prediction_items.with_columns(pl.col("datetime").cast(pl.Date).alias("date"),)

        for add_features in [
            self._add_general_features,
            self._add_client_features,
            self._add_forecast_weather_features,
            self._add_historical_weather_features,
            self._add_target_features,
            self._holidays_features,
            self._log_outliers,
            self._reduce_memory_usage
        ]:
            df_features = add_features(df_features)

        df_features = self._to_pandas(df_features, y)
        df_features = self._additional_features(df_features)
        df_features = self._drop_columns(df_features)
        
        return df_features

### Model

In [9]:
def my_mae(pred, target):
    return np.sum(np.abs((pred - target)))/len(target)

def feval_mae(y_pred, lgb_data2):
    y_true = lgb_data2.get_label()
    return 'mae', my_mae(y_pred, y_true), False
    

# Initialisation & Feature Generation

In [10]:
data_storage = DataStorage()
features_generator = FeaturesGenerator(data_storage=data_storage)

In [11]:
df_train_features = features_generator.generate_features(data_storage.df_data)
df_train_features = df_train_features[df_train_features['target'].notnull()]
df_train_features = df_train_features.drop(["date", "hour"], axis = 1 , errors='ignore')

In [12]:
l_fe = list(df_train_features.drop(columns=["target", "date", "hour"], errors='ignore').columns)
print(len(l_fe), l_fe)
yaml.dump(l_fe, open('models/l_fe', 'w'))

215 ['county', 'is_business', 'product_type', 'is_consumption', 'day', 'weekday', 'month', 'segment', 'sin(dayofyear)', 'cos(dayofyear)', 'sin(hour)', 'cos(hour)', 'eic_count', 'installed_capacity', 'hours_ahead', 'temperature', 'dewpoint', 'cloudcover_high', 'cloudcover_low', 'cloudcover_mid', 'cloudcover_total', '10_metre_u_wind_component', '10_metre_v_wind_component', 'direct_solar_radiation', 'surface_solar_radiation_downwards', 'snowfall', 'total_precipitation', 'hours_ahead_forecast_local_0h', 'temperature_forecast_local_0h', 'dewpoint_forecast_local_0h', 'cloudcover_high_forecast_local_0h', 'cloudcover_low_forecast_local_0h', 'cloudcover_mid_forecast_local_0h', 'cloudcover_total_forecast_local_0h', '10_metre_v_wind_component_forecast_local_0h', 'direct_solar_radiation_forecast_local_0h', 'surface_solar_radiation_downwards_forecast_local_0h', 'snowfall_forecast_local_0h', 'total_precipitation_forecast_local_0h', 'hours_ahead_forecast_48h', 'cloudcover_low_forecast_48h', 'cloudcov

In [13]:
l_fet = list(df_train_features.drop(columns=["county", "is_business", "product_type", 'is_consumption', "target"], errors='ignore').columns)

df_train_features[l_fet] = df_train_features.groupby(["county", "is_business", "product_type", 'is_consumption'])[l_fet].ffill()
df_train_features[l_fet] = df_train_features.groupby(["county", "is_business", "product_type", 'is_consumption'])[l_fet].bfill()

dic_ffill_all_col = df_train_features.drop_duplicates(["county", "is_business", "product_type", 'is_consumption'], keep = 'last').set_index(["county", "is_business", "product_type", 'is_consumption'])[l_fet].to_dict()

In [14]:
l_ca_lstm = ['county','is_business','product_type','is_consumption']
l_fe_lstm = [x for x in l_fe if x not in ['segment']]
print(len(l_fe), len(l_fe_lstm))
yaml.dump(l_fe_lstm, open('models/l_fe_lstm', 'w'))

215 214


In [15]:
df_train_features = df_train_features.reset_index(drop = True)

l_date_id = list(df_train_features.index)
train_index = l_date_id[:int(len(l_date_id)*0.9)]
test_index = l_date_id[int(len(l_date_id)*0.9):]

print((len(train_index), len(test_index)))

tr = df_train_features.iloc[train_index].reset_index(drop=True).copy()
va = df_train_features.iloc[test_index].reset_index(drop=True).copy()   

def duplicateColumns(data):
    dupliCols=[]
    for i in range(0,len(data.columns)):
        col1=data.columns[i]
        for col2 in data.columns[i+1:]:
            if data[col1].equals(data[col2]):
                dupliCols.append(col1+','+col2)
    return dupliCols

duplCols=duplicateColumns(va)
print(duplCols)

(1486711, 165191)
[]


In [16]:
class lstm_Dataset(torch.utils.data.Dataset):
    def __init__(self, X, Y, sequence_length = sequence_length):
        self.sequence_length = sequence_length
        self.X = torch.tensor(X).float()
        self.Y = torch.tensor(Y).float()
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, i): 
        if i >= self.sequence_length - 1:
            i_start = i - self.sequence_length + 1
            x = self.X[i_start:(i + 1), :]
        else:
            padding = self.X[0].repeat(self.sequence_length - i - 1, 1)
            x = self.X[0:(i + 1), :]
            x = torch.cat((padding, x), 0)
        return x, self.Y[i]
    
class by_category_lstm_Dataset(torch.utils.data.Dataset):
    def __init__(self, 
                 X : np.ndarray, 
                 Y : np.ndarray,  
                 l_ca : list, 
                 l_fe : list, 
                 sequence_length = sequence_length):
        
        self.sequence_length = sequence_length
        self.l_cat = l_ca
        self.l_fe = l_fe
        self.l_fe_col_idx_minus_cat = [i[0] for i in enumerate(l_fe) if i[1] not in l_ca]
        self.l_cat_col_idx = [i[0] for i in enumerate(l_fe) if i[1] in l_ca]
        
        #### INT for Categorical var (for pytorch)
        self.cat = torch.tensor(X[:, self.l_cat_col_idx].astype(int))
        self.idx = torch.tensor(np.arange(0, len(self.cat)))
        self.fes = torch.tensor(X[:, self.l_fe_col_idx_minus_cat].astype(float)).float()
        self.Y = torch.tensor(Y).float()
        
    def __len__(self):
        return self.cat.shape[0]
    
    def __getitem__(self, i):
        fes_get = self.fes[(self.idx <= i) & (self.cat == self.cat[i]).all(axis=1)]
        cat_get = self.cat[(self.idx <= i) & (self.cat == self.cat[i]).all(axis=1)]
        size =  len(cat_get)
        if size >= sequence_length - 1:
            i_start = size - sequence_length + 1
            fes_get = fes_get[i_start:(i + 1), :]
            cat_get = cat_get[i_start:(i + 1), :]

        else:
            padding = fes_get[0].repeat(self.sequence_length - size - 1, 1)
            fes_get = fes_get[0:(i + 1), :]
            fes_get = torch.cat((padding, fes_get), 0)
            
            padding = cat_get[0].repeat(self.sequence_length - size - 1, 1)
            cat_get = cat_get[0:(i + 1), :]
            cat_get = torch.cat((padding, cat_get), 0)
        return torch.cat((cat_get, fes_get), 1), self.Y[i]
    

In [17]:
class EarlyStopper:
    def __init__(self, patience=10, min_delta=0.00001):
        self.best_model = None
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')
        
    def get_best_model(self):
        return self.best_model

    def early_stop(self, validation_loss, model):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
            self.best_model = model
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [18]:
def tr_loop(dataloader, model, loss_fn, optimizer, shortcut=0):
    num_batches = len(dataloader)
    total_loss  = 0
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        pred = model(X)
        loss = loss_fn(pred, y)
        total_loss  += loss.item()
        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if pred.std() < 0.000001:
            print("WARNING: std() is zero, stopping")
            break
        if shortcut > 0 and batch == shortcut:
            num_batches = shortcut
            break 
    avg_loss = total_loss / num_batches
    return total_loss#avg_loss

def va_loop(dataloader, model, loss_fn):
    num_batches = len(dataloader)
    total_loss = 0
    model.eval()
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            loss = loss_fn(pred, y)
            total_loss += loss.item()
        scheduler.step(total_loss)
    avg_loss = total_loss / num_batches
    return total_loss#avg_loss
        
def predict(data_loader, model):
    output = torch.tensor([])
    model.eval()
    with torch.no_grad():
        for X, _ in data_loader:
            y_star = model(X)
            output = torch.cat((output, y_star), 0)
    return output.detach().cpu().numpy().flatten()

In [19]:
tr_np = tr[l_fe_lstm].replace([np.inf, -np.inf], np.nan).fillna(0)
va_np = va[l_fe_lstm].replace([np.inf, -np.inf], np.nan).fillna(0)
mms = sklearn.preprocessing.MinMaxScaler()
mms.fit(tr_np)
tr_np = mms.transform(tr_np)
va_np = mms.transform(va_np)
joblib.dump(mms, f'models/MinMaxScaler.l_fe')

tr_y_np = tr['target'].values
va_y_np = va['target'].values
mmst = sklearn.preprocessing.MinMaxScaler()
mmst.fit(np.array(tr_y_np).reshape(-1, 1))

tr_y_np = mmst.transform(np.array(tr_y_np).reshape(-1, 1))
va_y_np = mmst.transform(np.array(va_y_np).reshape(-1, 1))
joblib.dump(mmst, f'models/MinMaxScaler.target')


tr_py = by_category_lstm_Dataset(tr_np, tr_y_np, l_ca_lstm, l_fe_lstm)
va_py = by_category_lstm_Dataset(va_np, va_y_np, l_ca_lstm, l_fe_lstm)

tr_dataloader = torch.utils.data.DataLoader(tr_py, batch_size=batch_size, shuffle=False)
va_dataloader = torch.utils.data.DataLoader(va_py, batch_size=batch_size, shuffle=False)

In [20]:
class condLSTM(torch.nn.Module):
    def __init__(self,
                 l_ca : list, 
                 l_fe : list,
                 hidden_size=256, 
                 output_size=1, 
                 num_layers= 3, 
                 bidir = True,
                 Verbose = True
                ):
        super().__init__()
        self.Attention_on_hidden_layer = True
        self.Verbose = Verbose 
        self.l_ca_size = len(l_ca)
        self.l_fe_minus_cat_size = len([i[1] for i in enumerate(l_fe) if i[1] not in l_ca])
        self.hidden_size = hidden_size
        self.num_layers = num_layers 
        self.output_size = output_size 
        
        self.mult_bidir = 2 if bidir else 1
        self.cond_c = torch.nn.Linear(in_features = self.l_ca_size, out_features = self.hidden_size * self.num_layers * self.mult_bidir)
        self.cond_h = torch.nn.Linear(in_features = self.l_ca_size, out_features = self.hidden_size * self.num_layers * self.mult_bidir)
        
        self.conv1d = torch.nn.Conv1d(in_channels=self.l_fe_minus_cat_size, out_channels= self.l_fe_minus_cat_size, padding =2, kernel_size=5)        
        self.norm1 = torch.nn.BatchNorm1d(self.l_fe_minus_cat_size)
        self.lstm = torch.torch.nn.LSTM(input_size = self.l_fe_minus_cat_size, hidden_size = self.hidden_size, 
                                        num_layers = self.num_layers, 
                                        bidirectional  = bidir, dropout=0.05, batch_first=True)

        if self.Attention_on_hidden_layer : 
            self.norm2 = torch.nn.BatchNorm1d(self.hidden_size * self.mult_bidir)
            self.lrelu1 = torch.nn.LeakyReLU()
            self.attn = torch.nn.MultiheadAttention(embed_dim=self.hidden_size * self.mult_bidir, num_heads=16)
        else :
            self.norm2 = torch.nn.BatchNorm1d(self.hidden_size * self.mult_bidir *2)
            self.lrelu1 = torch.nn.LeakyReLU()
            self.attn = torch.nn.MultiheadAttention(embed_dim=self.hidden_size * self.mult_bidir*2, num_heads=16)
        self.norm3 = torch.nn.BatchNorm1d(self.l_fe_minus_cat_size)
        self.lrelu2 = torch.nn.LeakyReLU()
        self.linear1 = torch.nn.Linear(in_features = self.hidden_size * self.mult_bidir, out_features=int(hidden_size/2) * self.mult_bidir)
        self.linear2 = torch.nn.Linear(in_features=int(hidden_size/2) * self.mult_bidir, out_features=1)

    def forward(self, x):        
        if self.Verbose : print(f'x : {x.shape}')
        if self.Verbose : print(f'x[:, 0, :self.cat_size ] : {x[:, 0, :self.l_ca_size ].shape}')
        cat_c = self.cond_c(x[:, 0, :self.l_ca_size])
        cat_h = self.cond_h(x[:, 0, :self.l_ca_size])
        c0 = torch.reshape(cat_c, (self.num_layers * self.mult_bidir, -1, self.hidden_size))
        h0 = torch.reshape(cat_h, (self.num_layers * self.mult_bidir, -1, self.hidden_size))
        #h0 = torch.zeros(self.num_layers * self.mult_bidir, x.shape[0], self.hidden_size).requires_grad_()
        #c0 = torch.zeros(self.num_layers * self.mult_bidir, x.shape[0], self.hidden_size).requires_grad_()
        #torch.nn.init.xavier_normal_(h0)
        #torch.nn.init.xavier_normal_(c0)
        # Xavier/Glorot Initialization: for Activation => None, hyperbolic Tan (tanh), Logistic(sigmoid), softmax.
        # He Initialization: for Activation => Rectified Linear activation unit(ReLU) and Variants.
        # LeCun Initialization: for Activation => Scaled Exponential Linear Unit(SELU)
        if self.Verbose :print(f'c0 : {c0.shape}')
        if self.Verbose :print(f'h0 : {h0.shape}')
        
        
        x1 = x[:, :, self.l_ca_size:].transpose(1, 2) 
        if self.Verbose :print(f'x after transpose 1 : {x1.shape}')
        x1 = self.conv1d(x1)    
        x1 = self.norm1(x1)
        if self.Verbose :print(f'x after conv1d : {x1.shape}')
        #Turn (batch_size x hidden_size x seq_len) back into (seq_len x batch_size x hidden_size) for LSTM
        x1 = x1.transpose(1, 2)
        
        x, (hn, _) = self.lstm(x1, (h0, c0))
        if self.Verbose : print(f'x after lstm : {x.shape}')
        if self.Verbose : print(f'hn after lstm : {hn.shape}')
        if self.mult_bidir == 2:
            hn = torch.cat((hn[-1,:, :], hn[-2,:, :]),1)
            x = torch.cat((x[:,-1, :], x[:,-2, :]),1)
        else :
            hn = hn[-1,:, :]
            x = x[:, -1, :]
        if self.Verbose : print(f'x after filter / before attn: {x.shape}')
        if self.Verbose : print(f'hn after filter / before attn: {hn.shape}')
        if self.Attention_on_hidden_layer : 
            hn = self.norm2(hn)            
            hn = self.lrelu1(hn)
            x, weight = self.attn(hn, hn, hn)
        else:
            x = self.norm2(x)
            x = self.lrelu1(x)
            x, weight = self.attn(x, x, x)
        if self.Verbose : print(f'x after attn : {x.shape}')
        x = self.lrelu2(x)
        x = self.linear1(x)
        if self.Verbose : print(f'x after linear1 : {x.shape}')
        x = self.linear2(x)
        if self.Verbose : print(f'x after linear2 : {x.shape}'); self.Verbose = False
        return x


In [21]:
torch.manual_seed(2023)
model = condLSTM(l_ca_lstm, l_fe_lstm).float()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.00001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=8, factor=0.5, verbose=True)
early_stopper = EarlyStopper(patience=10, min_delta=0.0001)
loss_fn = torch.nn.L1Loss()

libc.malloc_trim(0)
gc.collect()

history = pd.DataFrame([], columns=["epoch", "train_loss", "test_loss", "lr"])
for epoch in range(n_estimators):
    tr_loss = tr_loop(tr_dataloader, model, loss_fn, optimizer, shortcut=1)                 
    va_loss = va_loop(va_dataloader, model, loss_fn)
    print(f"Epoch {epoch+1:>3d}, Train : {tr_loss:>5f}, Valid : {va_loss:>5f}")
    if early_stopper.early_stop(va_loss, model):  
        model = early_stopper.get_best_model()
        break
    history.loc[len(history),:] = [epoch+1, tr_loss, va_loss, optimizer.param_groups[0]['lr']]

joblib.dump(model, f'models/model.lstm.jolib')
torch.save(model.state_dict(), f'models/model.state_dict')
torch.save(model, 'models/model.full')
pred = predict(va_dataloader, model)     
pred = mmst.inverse_transform(pred.reshape(-1, 1)).ravel()
mae_score = np.sum(np.abs((pred - va['target'].values)))/len(va['target'].values)
print(f'val mae {mae_score}')

x : torch.Size([10240, 6, 214])
x[:, 0, :self.cat_size ] : torch.Size([10240, 4])
c0 : torch.Size([6, 10240, 256])
h0 : torch.Size([6, 10240, 256])
x after transpose 1 : torch.Size([10240, 210, 6])
x after conv1d : torch.Size([10240, 210, 6])
x after lstm : torch.Size([10240, 6, 512])
hn after lstm : torch.Size([6, 10240, 256])
x after filter / before attn: torch.Size([10240, 1024])
hn after filter / before attn: torch.Size([10240, 512])
x after attn : torch.Size([10240, 512])
x after linear1 : torch.Size([10240, 256])
x after linear2 : torch.Size([10240, 1])


In [None]:
va['error'] = target - pred
va['pred'] = pred
va['error2'] = va['error'].abs()
print(va['error2'].mean() , va['error2'].max())
    
mae_score = np.sum(np.abs((pred  - target)))/len(pred)
print(f'val mae {mae_score}')

In [None]:
sns.scatterplot(va, x = 'pred', y = 'target')
plt.show()
corr_error = va[l_fe + ['error', 'target']].corr()
display(corr_error['error'].abs().nlargest(15))
display(corr_error['target'].abs().nlargest(15))

In [None]:
limit = 3090 #va['error2'].mean()
display(va[va['error2']> limit])
sns.scatterplot(va[va['error2']> limit], x = 'pred', y = 'target')
plt.show()
corr_error = va[va['error2']> limit][l_fe + ['error', 'target']].corr()
display(corr_error['error'].abs().nlargest(15))
display(corr_error['target'].abs().nlargest(15))

In [None]:
class LSTM(torch.nn.Module):
    def __init__(self, input_size=len(l_fe), hidden_size=hidden_size, output_size=1, num_layers= num_layers, bidir = True):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers 
        self.output_size = output_size 
        self.test = True
        self.mult_bidir = 2 if bidir else 1
        self.lstm = torch.nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers , dropout=0.05, batch_first=True, bidirectional  = bidir)
        self.linear1 = torch.nn.Linear(in_features =hidden_size * self.mult_bidir, out_features=int(hidden_size/2) * self.mult_bidir)
        self.linear2 = torch.nn.Linear(in_features=int(hidden_size/2) * self.mult_bidir, out_features=1)

    def forward(self, x):
        if self.test : print(f'x : {x.shape}')
        h0 = torch.zeros(self.num_layers * self.mult_bidir, x.shape[0], self.hidden_size).requires_grad_()
        c0 = torch.zeros(self.num_layers * self.mult_bidir, x.shape[0], self.hidden_size).requires_grad_()
        torch.nn.init.xavier_normal_(h0)
        torch.nn.init.xavier_normal_(c0)
        if self.test :print(f'h0 : {h0.shape}')
        if self.test :print(f'c0 : {c0.shape}')
        x, (hn, _) = self.lstm(x, (h0, c0))
        if self.test : print(f'x after lstm : {x.shape}')
        if self.test : print(f'hn after lstm : {hn.shape}')
        x = x[:, -1, :]
        if self.test : print(f'x after filter : {x.shape}')
        x = self.linear1(x)
        if self.test : print(f'x after linear1 : {x.shape}')
        x = self.linear2(x)
        if self.test : print(f'x after linear2 : {x.shape}'); self.test = False
        return x

class LSTMwithAttention(torch.nn.Module):
    def __init__(self, input_size=len(l_fe), hidden_size=hidden_size, output_size=1, num_layers= num_layers, bidir = True):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers 
        self.output_size = output_size 
        self.test = True
        self.mult_bidir = 2 if bidir else 1
        self.lstm = torch.nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers , dropout=0.05, batch_first=True, bidirectional  = bidir)
        self.attn = torch.nn.MultiheadAttention(embed_dim=hidden_size * self.mult_bidir, num_heads=5)
        self.linear1 = torch.nn.Linear(in_features=hidden_size * self.mult_bidir, out_features=int(hidden_size/2) * self.mult_bidir)
        self.linear2 = torch.nn.Linear(in_features=int(hidden_size/2) * self.mult_bidir, out_features=1)

    def forward(self, x):
        if self.test : print(f'x : {x.shape}')
        h0 = torch.zeros(self.num_layers * self.mult_bidir, x.shape[0], self.hidden_size).requires_grad_()
        c0 = torch.zeros(self.num_layers * self.mult_bidir, x.shape[0], self.hidden_size).requires_grad_()
        torch.nn.init.xavier_normal_(h0)
        torch.nn.init.xavier_normal_(c0)
        if self.test :print(f'h0 : {h0.shape}')
        if self.test :print(f'c0 : {c0.shape}')
        x, (hn, _) = self.lstm(x, (h0, c0))
        if self.test : print(f'x after lstm : {x.shape}')
        if self.test : print(f'hn after lstm : {hn.shape}')
        if self.mult_bidir == 2:
            hn = torch.cat((hn[-1,:, :], hn[-2,:, :]),1)
        else :
            hn = hn[-1,:, :]
        if self.test : print(f'hn after filter : {hn.shape}')
        x, weight = self.attn(hn, hn, hn)
        if self.test : print(f'x after attn : {x.shape}')
        x = self.linear1(x)
        if self.test : print(f'x after linear1 : {x.shape}')
        x = self.linear2(x)
        if self.test : print(f'x after linear2 : {x.shape}'); self.test = False
        return x
    
class convLSTMwithAttention(torch.nn.Module):
    def __init__(self, input_size=len(l_fe), hidden_size=hidden_size, output_size=1, num_layers= num_layers, bidir = True):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers 
        self.output_size = output_size 
        self.test = True
        
        self.conv1d = torch.nn.Conv1d(in_channels=input_size, out_channels=hidden_size, padding =2, kernel_size=5)
        #self.conv1d = torch.nn.Conv1d(in_channels=input_size, out_channels=hidden_size, kernel_size=1)
        self.mult_bidir = 2 if bidir else 1
        self.lstm = torch.nn.LSTM(input_size = hidden_size, hidden_size = hidden_size, num_layers = num_layers , dropout=0.05, batch_first=True, bidirectional  = bidir)
        self.attn = torch.nn.MultiheadAttention(embed_dim=hidden_size * self.mult_bidir, num_heads=5)
        self.linear1 = torch.nn.Linear(in_features=hidden_size * self.mult_bidir, out_features=int(hidden_size/2) * self.mult_bidir)
        self.linear2 = torch.nn.Linear(in_features=int(hidden_size/2) * self.mult_bidir, out_features=1)

    def forward(self, x):
        if self.test : print(f'x : {x.shape}')
        # Turn (seq_len x batch_size x input_size) into (batch_size x input_size x seq_len) for CNN
        x = x.transpose(1, 2) 
        if self.test :print(f'x after transpose 1 : {x.shape}')
        x = self.conv1d(x)    
        if self.test :print(f'x after conv1d : {x.shape}')
        #Turn (batch_size x hidden_size x seq_len) back into (seq_len x batch_size x hidden_size) for LSTM
        x = x.transpose(1, 2)
        if self.test :print(f'x after transpose 2 : {x.shape}')

        h0 = torch.zeros(self.num_layers * self.mult_bidir, x.shape[0], self.hidden_size).requires_grad_()
        c0 = torch.zeros(self.num_layers * self.mult_bidir, x.shape[0], self.hidden_size).requires_grad_()
        torch.nn.init.xavier_normal_(h0)
        torch.nn.init.xavier_normal_(c0)
        if self.test :print(f'h0 : {h0.shape}')
        if self.test :print(f'c0 : {c0.shape}')
        x, (hn, _) = self.lstm(x, (h0, c0))
        if self.test : print(f'x after lstm : {x.shape}')
        if self.test : print(f'hn after lstm : {hn.shape}')
        if self.mult_bidir == 2:
            hn = torch.cat((hn[-1,:, :], hn[-2,:, :]),1)
        else :
            hn = hn[-1,:, :]
        if self.test : print(f'hn after filter : {hn.shape}')
        x, weight = self.attn(hn, hn, hn)
        if self.test : print(f'x after attn : {x.shape}')
        x = self.linear1(x)
        if self.test : print(f'x after linear1 : {x.shape}')
        x = self.linear2(x)
        if self.test : print(f'x after linear2 : {x.shape}'); self.test = False
        return x 