In [1]:
import warnings

warnings.filterwarnings("ignore")

import os
import gc
import datetime
import numpy as np
import pandas as pd
import polars as pl


In [8]:
import plotly.express as px
import joblib
from sklearn.ensemble import VotingRegressor
import lightgbm as lgb
import torch

In [3]:
import holidays

> <h4>The idea of using Class Objects for feature generation as an efficient method has been presented in this really <a href="https://www.kaggle.com/code/vitalykudelya/enefit-object-oriented-gbdt">insightful notebook</a> by <a href="https://www.kaggle.com/vitalykudelya">Vitaly Kudelya</a>. The notebook presents various new feature engineering methods and the author's efforts in putting them together are commendable. Recently, they have also proposed a method to predict <i>the difference</i> between the last available 'target_48h' and the 'target'  rather than the 'target' itself, which I really liked. It can be seen <a href='https://www.kaggle.com/code/vitalykudelya/enefit-target-diff'>here</a></h4> 

> <h4>In this notebook, I have tried to build upon this work by:<ul> <li>adding some more features</li><li> tuning the parameters a bit and</li> <li>performed some data analysis</li></ul></h4>

In [4]:
tmp = pl.read_csv("/Users/0ne/Programming/Kaggle/Enefit/data/predict-energy-behavior-of-prosumers/train.csv")
type(tmp.to_pandas())

pandas.core.frame.DataFrame

### Data Handling

In [5]:
class Warehouse:
    # root = "/kaggle/input/predict-energy-behavior-of-prosumers"

    data_columns = ["target","county","is_business","product_type","is_consumption","datetime","row_id",]
    client_columns = ["product_type","county","eic_count","installed_capacity","is_business","date",]
    gas_prices_columns = ["forecast_date", "lowest_price_per_mwh", "highest_price_per_mwh"]
    electricity_prices_columns = ["forecast_date", "euros_per_mwh"]
    forecast_weather_columns = ["latitude","longitude","hours_ahead","temperature","dewpoint","cloudcover_high","cloudcover_low","cloudcover_mid","cloudcover_total","10_metre_u_wind_component","10_metre_v_wind_component","forecast_datetime","direct_solar_radiation","surface_solar_radiation_downwards","snowfall","total_precipitation",]
    historical_weather_columns = ["datetime","temperature","dewpoint","rain","snowfall","surface_pressure","cloudcover_total","cloudcover_low","cloudcover_mid","cloudcover_high","windspeed_10m","winddirection_10m","shortwave_radiation","direct_solar_radiation","diffuse_radiation","latitude","longitude",]
    location_columns = ["longitude", "latitude", "county"]
    target_columns = ["target","county","is_business","product_type","is_consumption","datetime",]

    def __init__(self, env):
        # Depending on the environment, set the different root path.
        self.env = env
        if self.env == 'in kaggle':
            self.root = "/kaggle/input/predict-energy-behavior-of-prosumers"
        elif self.env == 'in colab':
            self.root = "/content"
        else:
            self.root = "../data/predict-energy-behavior-of-prosumers"

        # Load the data to polars.
        self.df_data = pl.read_csv(
            os.path.join(self.root, "train.csv"),
            columns=self.data_columns,
            try_parse_dates=True,
        )
        self.df_client = pl.read_csv(
            os.path.join(self.root, "client.csv"),
            columns=self.client_columns,
            try_parse_dates=True,
        )
        self.df_gas_prices = pl.read_csv(
            os.path.join(self.root, "gas_prices.csv"),
            columns=self.gas_prices_columns,
            try_parse_dates=True,
        )
        self.df_electricity_prices = pl.read_csv(
            os.path.join(self.root, "electricity_prices.csv"),
            columns=self.electricity_prices_columns,
            try_parse_dates=True,
        )
        self.df_forecast_weather = pl.read_csv(
            os.path.join(self.root, "forecast_weather.csv"),
            columns=self.forecast_weather_columns,
            try_parse_dates=True,
        )
        self.df_historical_weather = pl.read_csv(
            os.path.join(self.root, "historical_weather.csv"),
            columns=self.historical_weather_columns,
            try_parse_dates=True,
        )
        self.df_weather_station_to_county_mapping = pl.read_csv(
            os.path.join(self.root, "weather_station_to_county_mapping.csv"),
            columns=self.location_columns,
            try_parse_dates=True,
        )
        self.df_data = self.df_data.filter(
            pl.col("datetime") >= pd.to_datetime("2022-01-01")
        )
        self.df_target = self.df_data.select(self.target_columns)
        self.schema_data = self.df_data.schema
        self.schema_client = self.df_client.schema
        self.schema_gas_prices = self.df_gas_prices.schema
        self.schema_electricity_prices = self.df_electricity_prices.schema
        self.schema_forecast_weather = self.df_forecast_weather.schema
        self.schema_historical_weather = self.df_historical_weather.schema
        self.schema_target = self.df_target.schema

        self.df_weather_station_to_county_mapping = (
            self.df_weather_station_to_county_mapping.with_columns(
                pl.col("latitude").cast(pl.datatypes.Float32),
                pl.col("longitude").cast(pl.datatypes.Float32),
            )
        )

    def update_data(self,df_client_new,df_gas_price_new,df_elec_price_new,df_forecast_new,df_hist_weather_new,df_target_new,):
        df_client_new = pl.from_pandas(df_client_new[self.client_columns], schema_overrides=self.schema_client)
        
        df_gas_price_new = pl.from_pandas(df_gas_price_new[self.gas_prices_columns],schema_overrides=self.schema_gas_prices,)
        
        df_elec_price_new = pl.from_pandas(df_elec_price_new[self.electricity_prices_columns],schema_overrides=self.schema_electricity_prices,)
        
        df_forecast_new = pl.from_pandas(df_forecast_new[self.forecast_weather_columns],schema_overrides=self.schema_forecast_weather,)
        
        df_hist_weather_new = pl.from_pandas(df_hist_weather_new[self.historical_weather_columns],schema_overrides=self.schema_historical_weather,)
        
        df_target_new = pl.from_pandas(df_target_new[self.target_columns], schema_overrides=self.schema_target)

        self.df_client = pl.concat([self.df_client, df_client_new]).unique(["date", "county", "is_business", "product_type"])
        
        self.df_gas_prices = pl.concat([self.df_gas_prices, df_gas_price_new]).unique(["forecast_date"])
        
        self.df_electricity_prices = pl.concat([self.df_electricity_prices, df_elec_price_new]).unique(["forecast_date"])
        
        self.df_forecast_weather = pl.concat([self.df_forecast_weather, df_forecast_new]).unique(["forecast_datetime", "latitude", "longitude", "hours_ahead"])
        
        self.df_historical_weather = pl.concat([self.df_historical_weather, df_hist_weather_new]).unique(["datetime", "latitude", "longitude"])
        
        self.df_target = pl.concat([self.df_target, df_target_new]).unique(["datetime", "county", "is_business", "product_type", "is_consumption"])

    def preprocess_test(self, df_test):
        df_test = df_test.rename(columns={"prediction_datetime": "datetime"})
        df_test = pl.from_pandas(
            df_test[self.data_columns[1:]], schema_overrides=self.schema_data
        )
        return df_test


### Feature Engineering

In [6]:
class FeatureEngineer:
    def __init__(self, data):
        self.data = data
        self.estonian_holidays = list(
            holidays.country_holidays("EE", years=range(2021, 2026)).keys()
        )

    def _general_features(self, df_features):
        df_features = (
            df_features.with_columns(
                pl.col("datetime").dt.ordinal_day().alias("dayofyear"),pl.col("datetime").dt.hour().alias("hour"),
                pl.col("datetime").dt.day().alias("day"),pl.col("datetime").dt.weekday().alias("weekday"),
                pl.col("datetime").dt.month().alias("month"),pl.col("datetime").dt.year().alias("year"),
            )
            .with_columns(pl.concat_str("county","is_business","product_type","is_consumption",separator="_",).alias("segment"),)
            .with_columns(
                (np.pi * pl.col("dayofyear") / 183).sin().alias("sin(dayofyear)"),(np.pi * pl.col("dayofyear") / 183).cos().alias("cos(dayofyear)"),
                (np.pi * pl.col("hour") / 12).sin().alias("sin(hour)"),(np.pi * pl.col("hour") / 12).cos().alias("cos(hour)"),
            )
        )
        return df_features

    def _client_features(self, df_features):
        df_client = self.data.df_client

        df_features = df_features.join(
            df_client.with_columns((pl.col("date") + pl.duration(days=2)).cast(pl.Date)
            ),on=["county", "is_business", "product_type", "date"],how="left",
        )
        return df_features
    
    def is_country_holiday(self, row):
        return (
            datetime.date(row["year"], row["month"], row["day"])
            in self.estonian_holidays
        )

    def _holidays_features(self, df_features):
        df_features = df_features.with_columns(
            pl.struct(["year", "month", "day"])
            .apply(self.is_country_holiday)
            .alias("is_country_holiday")
        )
        return df_features

    def _forecast_weather_features(self, df_features):
        df_forecast_weather = self.data.df_forecast_weather
        df_weather_station_to_county_mapping = (
            self.data.df_weather_station_to_county_mapping
        )

        df_forecast_weather = (
            df_forecast_weather.rename({"forecast_datetime": "datetime"})
            .filter((pl.col("hours_ahead") >= 22) & pl.col("hours_ahead") <= 45)
            .drop("hours_ahead")
            .with_columns(pl.col("latitude").cast(pl.datatypes.Float32),pl.col("longitude").cast(pl.datatypes.Float32),)
            .join(df_weather_station_to_county_mapping,how="left",on=["longitude", "latitude"],).drop("longitude", "latitude"))

        df_forecast_weather_date = (df_forecast_weather.group_by("datetime").mean().drop("county"))

        df_forecast_weather_local = (df_forecast_weather.filter(pl.col("county").is_not_null()).group_by("county", "datetime").mean())

        for hours_lag in [0, 7 * 24]:
            df_features = df_features.join(
                df_forecast_weather_date.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),on="datetime",how="left",suffix=f"_forecast_{hours_lag}h",
            )
            df_features = df_features.join(
                df_forecast_weather_local.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),on=["county", "datetime"],how="left",suffix=f"_forecast_local_{hours_lag}h",
            )

        return df_features

    def _historical_weather_features(self, df_features):
        df_historical_weather = self.data.df_historical_weather
        df_weather_station_to_county_mapping = (self.data.df_weather_station_to_county_mapping)

        df_historical_weather = (
            df_historical_weather.with_columns(pl.col("latitude").cast(pl.datatypes.Float32),pl.col("longitude").cast(pl.datatypes.Float32),
            ).join(df_weather_station_to_county_mapping,how="left",on=["longitude", "latitude"],).drop("longitude", "latitude")
        )

        df_historical_weather_date = (df_historical_weather.group_by("datetime").mean().drop("county"))

        df_historical_weather_local = (
            df_historical_weather.filter(pl.col("county").is_not_null()).group_by("county", "datetime").mean()
        )

        for hours_lag in [2 * 24, 7 * 24]:
            df_features = df_features.join(
                df_historical_weather_date.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),on="datetime",how="left",suffix=f"_historical_{hours_lag}h",
            )
            df_features = df_features.join(
                df_historical_weather_local.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag)
                ),on=["county", "datetime"],how="left",suffix=f"_historical_local_{hours_lag}h",
            )

        for hours_lag in [1 * 24]:
            df_features = df_features.join(
                df_historical_weather_date.with_columns(
                    pl.col("datetime") + pl.duration(hours=hours_lag),
                    pl.col("datetime").dt.hour().alias("hour"),).filter(pl.col("hour") <= 10).drop("hour"),on="datetime",how="left",
                suffix=f"_historical_{hours_lag}h",
            )

        return df_features

    def _target_features(self, df_features):
        df_target = self.data.df_target

        df_target_all_type_sum = (df_target.group_by(["datetime", "county", "is_business", "is_consumption"]).sum().drop("product_type"))

        df_target_all_county_type_sum = (df_target.group_by(["datetime", "is_business", "is_consumption"]).sum().drop("product_type", "county"))
        
        hours_list=[i*24 for i in range(2,15)]

        for hours_lag in hours_list:
            df_features = df_features.join(
                df_target.with_columns(pl.col("datetime") + pl.duration(hours=hours_lag)
                ).rename({"target": f"target_{hours_lag}h"}),
                on=["county","is_business","product_type","is_consumption","datetime",],
                how="left",
            )

        for hours_lag in [2 * 24, 3 * 24, 7 * 24, 14 * 24]:
            df_features = df_features.join(
                df_target_all_type_sum.with_columns(pl.col("datetime") + pl.duration(hours=hours_lag)
                ).rename({"target": f"target_all_type_sum_{hours_lag}h"}),
                on=["county", "is_business", "is_consumption", "datetime"],how="left",
            )

            df_features = df_features.join(
                df_target_all_county_type_sum.with_columns(pl.col("datetime") + pl.duration(hours=hours_lag)).rename({"target": f"target_all_county_type_sum_{hours_lag}h"}),
                on=["is_business", "is_consumption", "datetime"],how="left",
                suffix=f"_all_county_type_sum_{hours_lag}h",
            )

        cols_for_stats = [f"target_{hours_lag}h" for hours_lag in hours_list[:4]]
        
        df_features = df_features.with_columns(
            df_features.select(cols_for_stats).mean(axis=1).alias(f"target_mean"),
            df_features.select(cols_for_stats).transpose().std().transpose().to_series().alias(f"target_std"),
            )

        for target_prefix, lag_nominator, lag_denomonator in [
            ("target", 24 * 7, 24 * 14),("target", 24 * 2, 24 * 9),("target", 24 * 3, 24 * 10),("target", 24 * 2, 24 * 3),
            ("target_all_type_sum", 24 * 2, 24 * 3),("target_all_type_sum", 24 * 7, 24 * 14),
            ("target_all_county_type_sum", 24 * 2, 24 * 3),("target_all_county_type_sum", 24 * 7, 24 * 14),
        ]:
            df_features = df_features.with_columns(
                (pl.col(f"{target_prefix}_{lag_nominator}h")/ (pl.col(f"{target_prefix}_{lag_denomonator}h") + 1e-3)
                ).alias(f"{target_prefix}_ratio_{lag_nominator}_{lag_denomonator}")
            )

        return df_features

    def _reduce_memory_usage(self, df_features):
        df_features = df_features.with_columns(pl.col(pl.Float64).cast(pl.Float32))
        return df_features

    def _drop_columns(self, df_features):
        df_features = df_features.drop("datetime", "hour", "dayofyear")
        return df_features

    def _to_pandas(self, df_features, y):
        cat_cols = ["county","is_business","product_type","is_consumption","segment",]

        if y is not None:
            df_features = pd.concat([df_features.to_pandas(), y.to_pandas()], axis=1)
        else:
            df_features = df_features.to_pandas()

        df_features = df_features.set_index("row_id")
        df_features[cat_cols] = df_features[cat_cols].astype("category")

        return df_features
    
    # added some new features here
    def _additional_features(self,df):
        for col in [
                    'temperature', 
                    'dewpoint', 
                    '10_metre_u_wind_component', 
                    '10_metre_v_wind_component', 
            ]:
            for window in [1]:
                df[f"{col}_diff_{window}"] = df.groupby(["county", 'is_consumption', 'product_type', 'is_business'])[col].diff(window)
        return df
    
    def _log_outliers(self,df):
        l1=['installed_capacity', 'target_mean', 'target_std']
        for i in l1:
            df = df.with_columns([(f"log_{i}", pl.when(df[i] != 0).then(np.log(pl.col(i))).otherwise(0))])
        return df
        

    def generate_features(self, df_prediction_items,isTrain):
        if "target" in df_prediction_items.columns:
            df_prediction_items, y = (
                df_prediction_items.drop("target"),
                df_prediction_items.select("target"),
            )
        else:
            y = None

        df_features = df_prediction_items.with_columns(
            pl.col("datetime").cast(pl.Date).alias("date"),
        )

        for add_features in [
            self._general_features,self._client_features,self._forecast_weather_features,
            self._historical_weather_features,self._target_features,self._holidays_features,
            self._log_outliers,self._reduce_memory_usage,self._drop_columns,]:
            df_features = add_features(df_features)

        df_features = self._to_pandas(df_features, y)
        df_features = self._additional_features(df_features)
       
        return df_features


# Model Hub

In [9]:
device = "gpu" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [22]:
# TODO: device = "gpu" if torch.cuda.is_available() else "cpu"
# TODO: add more models
model_parameters = {"objective": "regression_l1","device": device,"n_estimators": 3000,"learning_rate": 0.05,"colsample_bytree": 0.8,"colsample_bynode": 0.5,"lambda_l1": 3.4,"lambda_l2": 1.4,"max_depth": 15,"num_leaves": 490,"min_data_in_leaf": 48,}


m1 = VotingRegressor([
                (f"clgb_{i}",lgb.LGBMRegressor(**model_parameters, random_state=i),)
                for i in range(12)
            ])

m2 = VotingRegressor([
                (f"plgb_{i}",lgb.LGBMRegressor(**model_parameters, random_state=i),)
                for i in range(12)
            ])

def fit_model(train_feats,hours_lag,model_consumption=m1,model_production=m2):
    mask = train_feats["is_consumption"] == 1
    model_consumption.fit(
        X=train_feats[mask].drop(columns=["target"]),
        y=train_feats[mask]["target"]- train_feats[mask][f"target_{hours_lag}h"].fillna(0),
        )
    gc.collect()
    

    mask = train_feats["is_consumption"] == 0
    model_production.fit(
            X=train_feats[mask].drop(columns=["target"]),
            y=train_feats[mask]["target"]
            - train_feats[mask][f"target_{hours_lag}h"].fillna(0),
        )
    gc.collect()
    

def predict_model(df_features,hours_lag,model_consumption=m1,model_production=m2):
    predictions = np.zeros(len(df_features))

    mask = df_features["is_consumption"] == 1
    predictions[mask.values] = np.clip(
        df_features[mask][f"target_{hours_lag}h"].fillna(0).values+ 
        model_consumption.predict(df_features[mask]),0,np.inf,
        )

    mask = df_features["is_consumption"] == 0
    predictions[mask.values] = np.clip(
        df_features[mask][f"target_{hours_lag}h"].fillna(0).values+ 
        model_production.predict(df_features[mask]),0,np.inf,
        )

    return predictions


In [8]:
import os
import sys

def _is_in_kaggle() -> bool:
  """Whether the current environment is in Kaggle."""
  return str(_dh[0]) == '/kaggle/working'


def _is_in_colab() -> bool:
  """Whether the current environment is in Colab."""
  return 'google.colab' in str(get_ipython())

if _is_in_kaggle():
  my_env = 'in kaggle'
elif _is_in_colab():
  my_env = 'in colab'
else:
  my_env = 'in local'

print(f'Environment: {my_env}')


Environment: in local


# Generating Features

In [9]:
store = Warehouse(env=my_env)
feat_gen = FeatureEngineer(data=store)

In [10]:
# Need to wait for the data to be loaded 
df_train = feat_gen.generate_features(store.df_data,True)
df_train = df_train[df_train['target'].notnull()]

In [12]:
df_train.shape

(1651902, 172)

# EDA

> We will be using Plotly for Data Analysis. 
Plotly has **hover tool capabilities** that allow us to detect any outliers or anomalies in a large number of data points.
The resultant plots are highly interactive and it allows to zoom in and focus on certain regions of the plot for a deeper analysis.
It allows for endless customization of graphs that makes the plot more meaningful and understandable.

## segment-wise energy consumption

In [13]:
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

segment_list = df_train.segment.unique()[:3]

# Filter the dataset for the specific segment
for seg in segment_list:
    consumption_segment = df_train[df_train.segment == seg]

    # Create a line plot using Plotly Express
    fig = px.line(consumption_segment, x='date', y='target', 
              title=f'Target Over Time for Segment {seg}',
              labels={'date': 'Date', 'target': 'Target'},
              template='plotly_dark',line_shape='linear')
    fig.update_traces(line=dict(color='blue', width=1.5))

    # Customize the x-axis date format and tick interval
    fig.update_xaxes(type='date', tickformat='%Y-%m-%d', tickmode='linear', dtick=15)

    # Show the plot
    fig.show()


## FFT Analysis

> <h4>Initially studied it in this <a href="https://www.kaggle.com/code/chaozhuang/enefit-eda-w-fft-ssa-lgbm-voting-regressor">amazing notebook</a> by <a href="https://www.kaggle.com/chaozhuang">CHAO ZHUANG</a>. He has done a very deep analysis and explained everything on the go</h4>

In [14]:
import plotly.graph_objects as go

segment_list = df_train.segment.unique()[:10]
example_df = df_train[np.isin(df_train.segment, segment_list)]
segments = example_df['segment'].unique()

# Define periods in days and calculate corresponding frequencies
periods = {'Annual': 365,'Semiannual': 365 / 2,'Quarterly': 365 / 4,'Monthly': 30,'Biweekly': 14,'Weekly': 7,'Semiweekly': 3.5}
frequencies_for_periods = {k: 1 / v for k, v in periods.items()}

# Initialize the figure for the spectra using Plotly
fig = go.Figure()

# Convert the x-axis to a log scale
fig.update_xaxes(type='log')

# Plot the spectrum for each segment with offset
for i, segment in enumerate(segments):
    segment_data = example_df[example_df['segment'] == segment]['target']
    fft_values = np.fft.fft(segment_data)
    frequencies = np.fft.fftfreq(len(fft_values), d=1)
    magnitudes = np.abs(fft_values)[frequencies > 0]
    normalized_magnitudes = magnitudes / np.max(magnitudes)
    positive_freqs = frequencies[frequencies > 0]

    # Filter out frequencies corresponding to periods longer than 'Semiannual'
    valid_freqs = positive_freqs[positive_freqs > frequencies_for_periods['Semiannual']]
    valid_magnitudes = normalized_magnitudes[positive_freqs > frequencies_for_periods['Semiannual']]

    # Offset each segment's spectrum for clarity
    offset_magnitudes = valid_magnitudes + i

    fig.add_trace(go.Scatter(x=valid_freqs, y=offset_magnitudes, mode='lines', name=f'Segment {segment}'))

# Customize the plot layout
fig.update_layout(
    title='Frequency Spectra of hourly target for Each Segment',
    xaxis_title='Frequency',
    yaxis_title='Normalized Magnitude + Offset',
    xaxis=dict(tickvals=list(frequencies_for_periods.values()), ticktext=list(frequencies_for_periods.keys())),
    showlegend=True
)

# Show the plot
fig.show()


In [15]:
def fft_plots_enefit(name):
    # Initialize the figure for the spectrum using Plotly
    fig = go.Figure()

    # Convert the x-axis to a log scale
    fig.update_xaxes(type='log')

    # Plot the spectrum for the specified segment
    segment_data = example_df[example_df['segment'] == '0_0_1_1'][name]
    fft_values = np.fft.fft(segment_data)
    frequencies = np.fft.fftfreq(len(fft_values), d=1)
    magnitudes = np.abs(fft_values)[frequencies > 0]
    positive_freqs = frequencies[frequencies > 0]

    # Filter out frequencies corresponding to periods longer than 'Semiannual'
    valid_freqs = positive_freqs[positive_freqs > frequencies_for_periods['Semiannual']]
    valid_magnitudes = magnitudes[positive_freqs > frequencies_for_periods['Semiannual']]

    fig.add_trace(go.Scatter(x=valid_freqs, y=valid_magnitudes, mode='lines', name='0_0_1_1'))

    #  Customize the plot layout
    fig.update_layout(
    title=f'{name} frequency spectrum',
    xaxis_title='Frequency',
    yaxis_title='Magnitude',
    xaxis=dict(tickvals=list(frequencies_for_periods.values()), ticktext=list(frequencies_for_periods.keys())),
    showlegend=True,
    )

    # Show the plot
    fig.show()


In [16]:
plot_list=['temperature','direct_solar_radiation']
for i in plot_list:
    fft_plots_enefit(i)
    

# Model Training

In [17]:
#dropping date column now
df_train.drop(columns=['date'],inplace=True)

In [18]:
if 'literal' in df_train.columns:
    df_train.drop(columns=['literal'],inplace=True)

In [19]:
df_train.shape

(1651902, 170)

In [23]:
fit_model(df_train,48)

# Submission API

In [None]:
import enefit

env = enefit.make_env()
iter_test = env.iter_test()

In [None]:
for (
    df_test, 
    df_new_target, 
    df_new_client, 
    df_new_historical_weather,
    df_new_forecast_weather, 
    df_new_electricity_prices, 
    df_new_gas_prices, 
    df_sample_prediction
) in iter_test:

    store.update_data(
        df_client_new=df_new_client,
        df_gas_price_new=df_new_gas_prices,
        df_elec_price_new=df_new_electricity_prices,
        df_forecast_new=df_new_forecast_weather,
        df_hist_weather_new=df_new_historical_weather,
        df_target_new=df_new_target
    )
    df_test = store.preprocess_test(df_test)
    
    df_test_feats = feat_gen.generate_features(df_test,False)
    
    df_test_feats.drop(columns=['date'],inplace=True)
    if 'literal' in df_test_feats.columns:
        df_test_feats.drop(columns=['literal'],inplace=True)
        
    df_sample_prediction["target"] = predict_model(df_test_feats,48)
    
    env.predict(df_sample_prediction)