In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

In [None]:
def rename_cols(s):
    new_name = (s
        .lower()
        .replace(' ', '_')
        .replace('[', '')
        .replace(']', '')
        .replace('/', '_')
        .replace('%','perc')
        .replace('_kw', '_kW')
        .replace('_w', '_W')
        .replace('prediction', 'pred')
        .replace('relative', 'rel')
        .replace('temperature', 'temp')
        .replace('humidity', 'hum')
    )
    return re.sub(r'_c$', '_C', new_name)

In [None]:
#write
# span('2008-01-02 00:00', '2011-12-31 23:00')
#or even shorter
# span('2008-01-02', '2011-12-31')
#instead of
# pd.date_range('2008-01-02 00:00', '2011-12-31 23:00', freq='H')

def span(start, end=None, freq='H'):
    if not end:
        end = start
    pattern = re.compile("^....-..-..$") #matches patterns like YYYY-MM-DD
    if pattern.match(start):
        start += " 00:00"
    if pattern.match(end):
        end += " 23:00"
    return pd.date_range(start=start, end=end, freq=freq)

assert len(span("2008-01-01", "2008-01-01")) == 24
assert len(span("2008-01-01")) == 24

In [None]:
weather = pd.read_csv("../data/citylearn_challenge_2021/weather.csv")
b5 = pd.read_csv("../data/citylearn_challenge_2021/Building_5.csv")
b5 = pd.concat([b5, weather], axis=1)
assert len(b5) == len(weather)

b5 = (b5
.drop(columns=["Heating Load [kWh]"])
.assign(
        datetime = span('2008-01-02', '2011-12-31'),
        holiday = lambda x: x["Day Type"] == 8)
.assign(workday = lambda x: (x.datetime.dt.weekday >= 1) & (x.datetime.dt.weekday <= 5) & (x["Day Type"] != 8) )
.drop(columns=["Month", "Hour", "Day Type"])
.set_index("datetime")
.rename(columns=rename_cols)
)
b5.columns

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.suptitle("Solar generation -vs- direct solar radiation [first day]")
b5.loc[span('2008-01-02'), 'solar_generation_W_kW'].plot(ax=ax1) #7 = Solar Generation [W/kW]
ax1.set_title("Solar generation [W/kW]")
b5.loc[span('2008-01-02'), 'direct_solar_radiation_W_m2'].plot(ax=ax2);
ax2.set_title("Direct solar radiation [W/m2]")

In [None]:
fig, ax = plt.subplots()
fig.suptitle("Solar generation -vs- direct solar radiation [first two months]")
ax.plot('solar_generation_W_kW', data=b5.loc[span('2008-01-02', '2008-02-28'), :], label="Solar generation [W/kW]")
ax.plot('direct_solar_radiation_W_m2', data=b5.loc[span('2008-01-02', '2008-02-28'), :], label="Direct solar radiation [W/m2]")
plt.legend()

# Trends and Seasonalities

In [None]:
import seaborn as sns
# Define plotting parameters and custom color palette 
cmaps_hex = ['#193251','#FF5A36','#1E4485', '#99D04A','#FF5A36', '#DB6668']
#sns.set_palette(palette=cmaps_hex)
sns_c = sns.color_palette(palette=cmaps_hex)

plt.rcParams['figure.figsize'] = [15, 5]
plt.rcParams['figure.dpi'] = 100

In [None]:
def plot_decomposition(decomposition):
    fig, ax = plt.subplots(4, 1, figsize=(12, 12), constrained_layout=True)
    decomposition.observed.plot(c=sns_c[0], ax=ax[0])
    ax[0].set(title='Actual temperature')
    decomposition.trend.plot(c=sns_c[1], ax=ax[1])
    ax[1].set(title='trend')
    decomposition.seasonal.plot(c=sns_c[2], ax=ax[2])
    ax[2].set(title='seasonal')
    decomposition.resid.plot(c=sns_c[3], ax=ax[3])
    ax[3].set(title='residual')
    fig.set_size_inches(20, 10);

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

decomposition_yearlyseason = seasonal_decompose(x=b5.outdoor_drybulb_temp_C, 
                                   model='additive',
                                   period=356*24)
plot_decomposition(decomposition_yearlyseason)

In [None]:
#decomposing daily trend
decomposition_dailyseason = seasonal_decompose(x=b5.outdoor_drybulb_temp_C.head(24*30), 
                                   model='additive',
                                   period=24)
plot_decomposition(decomposition_dailyseason)

In [None]:
#decomposing multiple seasonalities
#this requires the development version of statsmodels

#https://www.statsmodels.org/dev/examples/notebooks/generated/mstl_decomposition.html
#https://www.statsmodels.org/dev/_modules/statsmodels/tsa/stl/mstl.html
#https://towardsdatascience.com/multi-seasonal-time-series-decomposition-using-mstl-in-python-136630e67530
#from statsmodels.tsa.seasonal import DecomposeResult
#from statsmodels.tsa.stl.mstl import MSTL
#
#mstl_model = MSTL(b5.outdoor_drybulb_temp_C, periods=[24, 24 * 365])
#res = mstl_model.fit()
#ax = res.plot()