# Get Summary Stats on our Load information
To be used in the metadata inclusion parts of our model

In [2]:
import pandas as pd
from datetime import datetime
from sklearn import preprocessing
import pickle 

In [3]:
processed_dir = "../data/processed/"

In [4]:
validation_range = ["2014-10-01 00:00:00", "2014-12-31 23:00:00"]
validation_range = [datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in validation_range]

In [14]:
energy_demand = pd.read_parquet(processed_dir + "EnergyDemandData.parquet")
energy_demand['time'] = pd.to_datetime(energy_demand['time'], format='%Y-%m-%d %H:%M:%S')
energy_demand = energy_demand[energy_demand['time'] < validation_range[0]].reset_index(drop = True)

label_encoder = preprocessing.LabelEncoder() 
energy_demand['season'] = label_encoder.fit_transform(energy_demand['season']) 
energy_demand.head()

Unnamed: 0,time,node,hour,dow,month,year,holiday,season,country,voltage,solar_cosmo,solar_ecmwf,wind_cosmo,wind_ecmwf,load
0,2012-01-01 00:00:00,1,0,6,1,2012,1,3,POR,380,0.0,0.0,0.0142,0.0284,0.279887
1,2012-01-01 01:00:00,1,1,6,1,2012,1,3,POR,380,0.0,0.0,0.0167,0.0336,0.218439
2,2012-01-01 02:00:00,1,2,6,1,2012,1,3,POR,380,0.0,0.0,0.0139,0.0392,0.160968
3,2012-01-01 03:00:00,1,3,6,1,2012,1,3,POR,380,0.0,0.0,0.0195,0.0424,0.112879
4,2012-01-01 04:00:00,1,4,6,1,2012,1,3,POR,380,0.0,0.0,0.0197,0.0475,0.087529


## Summary Stats

In [15]:
# by day of weak 
energy_demand.groupby("dow",as_index = False)['load'].mean()

Unnamed: 0,dow,load
0,0,0.460693
1,1,0.487079
2,2,0.490401
3,3,0.487294
4,4,0.476223
5,5,0.369969
6,6,0.304989


In [16]:
# by season
energy_demand.groupby("season",as_index = False)['load'].mean()

Unnamed: 0,season,load
0,0,0.470889
1,1,0.38678
2,2,0.374788
3,3,0.543024


In [17]:
# by node and season
node_stats = energy_demand.groupby(["node", "season", "dow"]).agg({'load' : ["mean", "var"]})
node_stats.columns = ['_'.join(col).strip() for col in node_stats.columns.values]
del  node_stats.columns.name
node_stats= node_stats.to_dict(orient = "index")

In [18]:
node_stats[('1', 1, 1)]

{'load_mean': 0.4165548411414722, 'load_var': 0.023183522162649076}

In [19]:
with open(processed_dir + "node_stats.pkl", "wb") as f:
    pickle.dump(node_stats, f)

In [20]:
with open(processed_dir + "node_stats.pkl", "rb") as f:
    check = pickle.load(f)

In [21]:
check[('1', 1,1)]

{'load_mean': 0.4165548411414722, 'load_var': 0.023183522162649076}