<span style="color:gray">
Copyright &copy; 2020-2021 by Fraunhofer-Gesellschaft. All rights reserved.<br>
Fraunhofer Institute for Integrated Circuits IIS, Division Engineering of Adaptive Systems EAS<br>
Zeunerstraße 38, 01069 Dresden, Germany
</span>

---

## ESB - Energy Saving by Blockchain

Eurostars – EXP 00119832 / EUS-2019113348

---

## Prediction of Energy Consumption for Variable Customer Portfolios Including Aleatoric Uncertainty Estimation

*Oliver Mey, André Schneider, Olaf Enge-Rosenblatt, Yesnier Bravo, Pit Stenzel*

The notebook is part of a paper submission contributed to the **10th International Conference on Power Science and Engineering (ICPSE 2021)** will be held on Oct. 21-23, 2021 in Yildiz Technical University, Istanbul, Turkey.

---

# B1: Feature Extraction

This notebook loads the available datasets and extracts the features needed as input for the prediction models for a pre-defined date (*2019-02-02*) and a customer (*#20*). The feature extraction uses pre-train

---

<span style="color:gray">
Version 0.3.1 (August 2, 2021)<br>
Authors: Oliver Mey, André Schneider (Fraunhofer IIS)<br>
</span>    

In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import joblib
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import holidays as hd
import seaborn as sns
import tensorflow as tf

from sklearn.preprocessing import OneHotEncoder

%matplotlib inline
sns.set(rc={'figure.figsize':(16, 6)})

### Configuration

In [2]:
path = '..'
timezone = 'Europe/Madrid'
date = '2019-02-02'
customer = 20

### Function Definitions

In [3]:
def to_UTC(data, tz=timezone):
    try:
        data.index = data.index.tz_localize(tz=tz, ambiguous='infer').tz_convert(tz='UTC')
    except:
        pass
    return

def crop(data):
    hour_index = data.index.hour
    t0 = data[hour_index==0].head(1).index
    tn = data[hour_index==23].tail(1).index
    data.drop(data.loc[data.index < t0[0]].index, inplace=True)
    data.drop(data.loc[data.index > tn[0]].index, inplace=True)
    return

def time_from_to(date, t, tz=timezone):
    t0_ = pd.Timestamp(date, tz=tz)+pd.Timedelta(days=t[0])
    tn_ = pd.Timestamp(date, tz=tz)+pd.Timedelta(days=t[1])+pd.Timedelta(hours=23)
    return slice(t0_, tn_)

def day_from_to(date, t, tz=timezone):
    t0_ = pd.Timestamp(date)+pd.Timedelta(days=t[0])
    tn_ = pd.Timestamp(date)+pd.Timedelta(days=t[1])
    return slice(t0_, tn_)

### Class Definitions

#### Data Loader

In [4]:
class DataLoader:
    
    def __init__(self, data_path, model_path):
        self.data_path = data_path
        self.model_path = model_path
        self.categories = ['consumption', 'weather', 'profiles']
        self.scaler_names = ['scaler_consumptions', 'scaler_consumptions_daily_mean',
                             'scaler_weather_daily_mean', 'scaler_day_of_month', 'scaler_month',
                             'scaler_weather_forecast']
        self.files = [self.data_path + '/' + '20201015_' + name + '.xlsx' for name in self.categories]
        return
    
    def load_metadata(self):
        customers = pd.read_excel(self.files[self.categories.index('profiles')])
        customers.columns = ['customer', 'profile']
        profiles = pd.DataFrame(customers['profile'].unique(), columns=['profile'])
        holidays = hd.ES(years=list(range(2010, 2021)), prov="MD")
        return customers, profiles, holidays
    
    def load_data(self):
        consumptions = pd.read_excel(self.files[self.categories.index('consumption')], parse_dates=[0], index_col=0)
        consumptions.columns = pd.DataFrame(consumptions.columns, columns=['customer']).index
        consumptions.index.name = 'time'
        to_UTC(consumptions)
        crop(consumptions)
        weather = pd.read_excel(self.files[self.categories.index('weather')], parse_dates=[0], index_col=0)
        weather.columns = consumptions.columns
        weather.index.name = 'time'
        to_UTC(weather)
        crop(weather)
        return consumptions, weather
    
    def load_scalers(self):
        scalers = [joblib.load(self.model_path + '/' + name) for name in self.scaler_names]
        scalers = dict(zip(self.scaler_names, scalers))
        scale = scalers['scaler_consumptions'].scale_
        offset = scalers['scaler_consumptions'].mean_
        return scalers

#### Feature Extractor

In [5]:
class FeatureExtractor:
    
    def __init__(self, properties):
        self.t_consumption_daily = properties.get('t_consumption_daily', [-13, -1])
        self.t_consumption_hourly = properties.get('t_consumption_hourly', [-2, -1])
        self.t_weather_daily = properties.get('t_weather_daily', [-2, -0])
        self.t_weather_hourly = properties.get('t_weather_hourly', [-2, -0])
        
        scalers = properties.get('scalers')
        self.scaler_consumption = scalers['scaler_consumptions']
        self.scaler_weather_forecast = scalers['scaler_weather_forecast']
        self.scaler_day_of_month = scalers['scaler_day_of_month']
        self.scaler_month = scalers['scaler_month']
        self.encoder = properties.get('encoder')
        return

    def get_days(self, dates, holidays):
        days = pd.DataFrame(pd.to_datetime(dates.date), index=dates, columns=['date'])
        days['day_of_week'] = list(days.index.dayofweek)
        days['day_of_month'] = list(days.index.day)
        days['month'] = list(days.index.month)
        days['day_category'] = days['day_of_week'].replace({0:0,1:1,2:1,3:1,4:2,5:3,6:4})
        days.loc[days['date'].apply(lambda d: d in holidays), 'day_category'] = 4
        days = days.groupby(['date']).first()
        return days
        
    def extract(self, date, customer, consumptions, weathers, holidays):
        days = self.get_days(consumptions.index, holidays)
        consumptions_daily_mean = pd.DataFrame(consumptions.groupby(consumptions.index.date).mean(), 
                                               index=days.index)
        weather_daily_mean = pd.DataFrame(weather.groupby(weather.index.date).mean(), 
                                          index=days.index)
        X1 = consumptions.loc[time_from_to(date, self.t_consumption_hourly),customer].values
        X1 = self.scaler_consumption.transform(np.array(X1).reshape(-1,1)).reshape(-1)
        X2 = weather.loc[time_from_to(date, self.t_weather_hourly),customer].values
        X2 = self.scaler_weather_forecast.transform(np.array(X2).reshape(3,24)).reshape(-1)
        X3 = days.loc[pd.Timestamp(date),'day_of_month']
        X3 = self.scaler_day_of_month.transform(np.array([X3]).reshape(-1,1))[0][0]
        X4 = days.loc[pd.Timestamp(date),'month']
        X4 = self.scaler_month.transform(np.array([X4]).reshape(-1,1))[0][0]
        X5 = days.loc[pd.Timestamp(date),'day_category']
        X5 = self.encoder.transform(np.array(X5).reshape(1, -1)).reshape(-1)
        Xa = np.concatenate([X1, X2, [X3], [X4], X5]).reshape(1,-1)
        X1 = consumptions_daily_mean.loc[day_from_to(date, self.t_consumption_daily), customer].values
        X2 = weather_daily_mean.loc[day_from_to(date, self.t_weather_daily), customer].values
        X3 = self.encoder.transform(np.array([days.loc[pd.Timestamp(date), 'day_category'].astype(np.int32)]).reshape(1,-1))
        Xb = np.concatenate([X1, X2, X3[0]]).reshape(1,-1)
        return [Xa, Xb]

### Loading Data

In [6]:
loader = DataLoader(path + '/data', path + '/models')
consumptions, weather = loader.load_data()
customers, profiles, holidays = loader.load_metadata()
scalers = loader.load_scalers()
encoder = OneHotEncoder(sparse=False)
encoder.fit(np.arange(5).reshape(-1,1))

OneHotEncoder(sparse=False)

In [7]:
properties = {
    't_consumption_daily': [-13, -1],
    't_consumption_hourly': [-2, -1],
    't_weather_daily': [-2, 0],
    't_weather_hourly': [-2, 0],
    'scalers': scalers,
    'encoder': encoder
}

### Extracting Features

In [8]:
extractor = FeatureExtractor(properties)

In [9]:
features = extractor.extract(date, customer, consumptions, weather, holidays)

### Results: 2 Feature Vectors Xa, Xb

In [10]:
Xa, Xb = features[0], features[1]

In [11]:
Xa.shape, Xb.shape

((1, 127), (1, 21))

In [12]:
print(Xa)

[[ 8.33132207e+00  2.90308340e+00  2.72049157e+00  2.58586877e+00
   2.47755158e+00  2.53016279e+00  2.37387655e+00  2.09689402e+00
   1.53983418e+00  3.44621675e+00  2.68180685e+00  9.42842048e+00
   1.02005673e+01  8.28180564e+00  9.73016123e+00  1.57680709e+01
   1.31344158e+01  1.15158475e+01  1.15065632e+01  9.89882657e+00
   9.27522902e+00  7.93673802e+00  9.24892342e+00  9.98857510e+00
   8.96110917e+00  5.58470758e+00  4.32203861e+00  3.12281256e+00
   2.42184560e+00  2.27948586e+00  2.70347029e+00  2.74370239e+00
   2.89225168e+00  5.34331498e+00  8.30965863e+00  8.21062577e+00
   1.19382845e+01  9.47948430e+00  1.07808380e+01  1.38493093e+01
   1.26423463e+01  1.15220370e+01  1.05146872e+01  8.03577088e+00
   9.29998724e+00  9.76110900e+00  1.11754220e+01  1.02686524e+01
  -2.70649044e-02  1.56157472e-02  5.37974926e-02  9.20993149e-02
   1.36802432e-01  1.60638568e-01  1.82433798e-01  1.56729281e-01
  -8.99169761e-03 -1.68757540e-01 -3.83298186e-01 -5.68986924e-01
  -7.48309

In [13]:
print(Xb)

[[ 4.12908333  5.2375      5.686       6.12191667  5.17904167  3.348
   3.69833333  3.639125    4.56720833  5.32829167  5.187625    5.02570833
   5.46291667 14.79333333 11.10458333 12.29333333  0.          0.
   0.          1.          0.        ]]
