<a href="https://www.kaggle.com/code/angelicababei/enefitutils?scriptVersionId=162879956" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

This utility script is used for various plotting functions for the Enefit dataset 

Citation:
Kristjan Eljand, Martin Laid, Jean-Baptiste Scellier, Sohier Dane, Maggie Demkin, Addison Howard. (2023). 
Enefit - Predict Energy Behavior of Prosumers. Kaggle. https://kaggle.com/competitions/predict-energy-behavior-of-prosumers

In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

from ipywidgets import interact, widgets, Layout
from statsmodels.tsa.seasonal import STL
from prophet import Prophet
from functools import reduce

from datetime import datetime, timedelta

In [6]:
bool_mapping = {0: False, 1: True}
# Simple plotting
def plot_target(county_value, is_business_value, is_consumption_value, product_type_value, df):
    # Map "is_business' and 'is_consumption' values to True/False
    is_business_value = bool_mapping[is_business_value]
    is_consumption_value = bool_mapping[is_consumption_value]
    
    
    filtered_df = df[(df['county'] == county_value) & (df['is_business'] == is_business_value) & \
                     (df['is_consumption'] == is_consumption_value) & (df['product_type'] == product_type_value)]
    
    if not filtered_df.empty:
        plt.figure(figsize=(10, 6))
        plt.plot(filtered_df['target'])
        plt.title(f'Target Time Series for county={county_value}, business={is_business_value},  consumption={is_consumption_value}, product type={product_type_value}')
        plt.xlabel('Time')
        plt.ylabel('Target Value')
        plt.show()
    else:
        print(f"No matching data for county={county_value}, is_business={is_business_value},  is_consumption={ is_consumption_value}, product_type={product_type_value}")


# Plotting STL decomposition
def plot_stl(county_value, is_business_value, is_consumption_value, product_type_value, df):
    # Map "is_business' and 'is_consumption' values to True/False
    is_business_value = bool_mapping[is_business_value]
    is_consumption_value = bool_mapping[is_consumption_value]
    
    filtered_df = df[(df['county'] == county_value) & (df['is_business'] == is_business_value) & \
                     (df['is_consumption'] == is_consumption_value) & (df['product_type'] == product_type_value)].copy()
    
    if not filtered_df.empty:
        ## In order to get a multiplicative decomposition, we transform the data by 
        ## 1. adding 1 to the data to remove 0 values, and 
        filtered_df['target1']=filtered_df['target']+1
        ## 2. applying a log transformation
        filtered_df['log_target1'] = np.log1p(filtered_df['target1'])
        ndf = pd.DataFrame({
            'ds': filtered_df['datetime'].to_list(),
            'y': filtered_df['log_target1'].to_list()
        })
    
        # Creating and fitting a Prophet model
        model = Prophet(daily_seasonality=True, weekly_seasonality=False, yearly_seasonality=True)
        model.fit(ndf)
        
        
        future = model.make_future_dataframe(periods=0)
        forecast = model.predict(future)
        components = model.predict(ndf)
        trend_component = forecast['trend']
        seasonal_component = forecast['yearly'] + forecast['daily'] # Combine yearly and daily seasonality
        residual_component = ndf['y'] - forecast['yhat']
        fig = model.plot_components(forecast)
        #Strengths
        # Trend
        print("The trend strength is", max(0, 1-residual_component.var()/(residual_component+trend_component).var()))
        #Seasonality
        # Yearly
        yearly_residual = ndf['y'] - trend_component - forecast['yearly']
        print("Yearly seasonality strength is", max(0, 1-yearly_residual.var()/(yearly_residual+forecast['yearly']).var()))
        # Daily
        daily_residual = ndf['y'] - trend_component - forecast['daily']
        print("Daily seasonality strength is", max(0, 1-daily_residual.var()/(daily_residual+forecast['daily']).var()))
        # Plotting the residuals
        fig_res = plt.figure(figsize=(10, 4))
        plt.plot(ndf['y'] - forecast['yhat'])
        plt.title('Residuals')
        plt.xlabel('Time step')
        plt.ylabel('Residuals')
        plt.show()
    else:
        print(f"No matching data for county={county_value}, is_business={is_business_value},  is_consumption={ is_consumption_value}, product_type={product_type_value}")

#Plotting correlations between the generation and consumption timeseries with the same county, is_business, and product_type value.
def corr_gen_cons(county_value, is_business_value, product_type_value, df):
    indexed_df = df.set_index('datetime')
    filtered_df_cons = indexed_df[(indexed_df['county'] == county_value) & (indexed_df['is_business'] == is_business_value) & \
                     (indexed_df['is_consumption'] == 1)  & (indexed_df['product_type'] == product_type_value)]['target'].copy()
    filtered_df_gen = indexed_df[(indexed_df['county'] == county_value) & (indexed_df['is_business'] == is_business_value) & \
                     (indexed_df['is_consumption'] == 0)  & (indexed_df['product_type'] == product_type_value)]['target'].copy()
    data   = pd.merge(filtered_df_cons, filtered_df_gen, left_index=True, right_index=True)
    plt.figure(figsize=(10, 10))
    sns.heatmap(data.corr(), annot=True, cbar=False, cmap="RdBu", vmin=-1, vmax=1)
    plt.title("Correlation Matrix of Features")
    plt.show()
    
#Plotting correlations between timeseries with varying county_values and the same is_business, is_consumption, and product_type values.        
def corr_counties( is_business_value, product_type_value, is_consumption_value, df):
    indexed_df = df.set_index('datetime')
    values=[]
    for county_value in range(15):
        filtered_df = indexed_df[(indexed_df['county'] == county_value) & (indexed_df['is_business'] == is_business_value) & \
                     (indexed_df['is_consumption'] == is_consumption_value)  & (indexed_df['product_type'] == product_type_value)]['target'].copy()
        filtered_df.rename('target'+'_county_'+str(county_value),  inplace=True)
        if filtered_df.shape[0] !=0:
            values.append(filtered_df)
    
    data   = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True), values)
    plt.figure(figsize=(10, 10))
    sns.heatmap(data.corr(), annot=True, cbar=False, cmap="RdBu", vmin=-1, vmax=1)
    plt.title("Correlation Matrix of Features")
    plt.show()
    
    


In [1]:
## Plot class: widgets
bool_mapping = {0: False, 1: True}

class Plotting:
    
    def __init__(self, df):
        self.dataframe = df
        self.county_dropdown = widgets.Dropdown(options=self.dataframe['county'].unique(), description='county:', layout=Layout( margin='0 0 0 250px'))
        self.business_dropdown = widgets.Dropdown(options=bool_mapping.values(), description='business:', layout=Layout( margin='10px 0 0 250px'))
        self.consumption_dropdown = widgets.Dropdown(options=bool_mapping.values(), description='consumption:', layout=Layout( margin='10px 0 0 250px'))
        self.product_type_dropdown = widgets.Dropdown(options=self.dataframe['product_type'].unique(), description='product type:  ', layout=Layout( margin='10px 0 0 250px'))
        self.components={}
        
    def tuple_values(self):
        return self.dataframe['county'].unique(), list(bool_mapping.keys()), list(bool_mapping.keys()), self.dataframe['product_type'].unique()
    
    def series_plot(self):
        unique_county_values, unique_business_values, unique_consumption_values, unique_product_type_values  = self.tuple_values()
        interact(plot_target, county_value=self.county_dropdown, is_business_value=self.business_dropdown, is_consumption_value= self.consumption_dropdown, \
                 product_type_value=self.product_type_dropdown, df=widgets.fixed(self.dataframe))
        
    def stl_decomposition(self):
        unique_county_values, unique_business_values, unique_consumption_values, unique_product_type_values  = self.tuple_values()
        interact(plot_stl, county_value=self.county_dropdown, is_business_value=self.business_dropdown, is_consumption_value= self.consumption_dropdown, \
                 product_type_value=self.product_type_dropdown, df=widgets.fixed(self.dataframe))
        
    def gen_cons_corr(self):
        unique_county_values, unique_business_values, unique_consumption_values, unique_product_type_values  = self.tuple_values()
        interact(corr_gen_cons, county_value=self.county_dropdown, is_business_value=self.business_dropdown, \
                 product_type_value=self.product_type_dropdown, df=widgets.fixed(self.dataframe))
        
    def counties_corr(self):
        unique_county_values, unique_business_values, unique_consumption_values, unique_product_type_values  = self.tuple_values()
        interact(corr_counties,  is_business_value=self.business_dropdown, is_consumption_value = self.consumption_dropdown, \
                 product_type_value=self.product_type_dropdown, df=widgets.fixed(self.dataframe))
   