In [7]:
import os
import pathlib
import datetime
import warnings
from tqdm.notebook import tqdm

import pandas as pd
import numpy as np
import pickle as plk

import matplotlib.pyplot as plt
%matplotlib inline  

import pwlf
import sklearn
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.metrics import mean_absolute_error, mean_squared_error
import sklearn.pipeline

In [3]:
# Mimic drop down
building_dropdown = 'CD Howe'
year_dropdown = 2019

In [4]:
# Import Data
PATH = pathlib.Path().parent.parent
DATA_PATH = PATH.joinpath("../app/data").resolve()

with open(DATA_PATH.joinpath(f'{building_dropdown}/Energy/_energyData.pkl'), 'rb') as f:
    energy_df = plk.load(f)

with open(DATA_PATH.joinpath(f'Supporting/Weather/Ottawa.pkl'), 'rb') as f:
    weather_df = plk.load(f)
                          
with open(DATA_PATH.joinpath(f'Supporting/Weather/Ottawa_TMY.pkl'), 'rb') as f:
    weather_TMY = plk.load(f)
                          
    
df_all = pd.merge(energy_df, weather_df, how='outer', left_index=True, right_index=True)
df_all = df_all[df_all.index < datetime.datetime(2020, 10, 1, 0, 0, 0)]
df = df_all[df_all.index.year == year_dropdown]


In [5]:
def seperate_data_by_Schedule(Data, workday, weekend_schedule=None):
    
    DataWeekend = Data[Data.index.weekday > 4]
    DataWeekday = Data[Data.index.weekday < 5]
    
    DataWeekdayOff = DataWeekday[(DataWeekday.index.hour <= workday[0]) | (DataWeekday.index.hour > workday[1])]
    DataWeekdayOn = DataWeekday[(DataWeekday.index.hour > workday[0]) & (DataWeekday.index.hour <= workday[1])]
    
    if weekend_schedule is None:
        DataOffSchedule = pd.concat([DataWeekend, DataWeekdayOff])
        DataOnSchedule = DataWeekdayOn

    else:
        DataWeekendOff = DataWeekend[(DataWeekend.index.hour <= weekend_schedule[0]) | (DataWeekend.index.hour > weekend_schedule[1])]
        DataWeekendOn = DataWeekend[(DataWeekend.index.hour > weekend_schedule[0]) & (DataWeekend.index.hour <= weekend_schedule[1])]        
        
        DataOffSchedule = pd.concat([DataWeekdayOff, DataWeekendOff])
        DataOnSchedule = pd.concat([DataWeekdayOn, DataWeekendOn])

    return DataOnSchedule, DataOffSchedule

In [6]:
def LinModel(Data, parameter, sch, weekend_schedule=None):
    # clean data for
    linData = Data.copy()
    linData = linData.dropna(subset=['temperature', parameter])
    linData = linData[linData[parameter] != 0]

    linDataOnSchedule, linDataOffSchedule = seperate_data_by_Schedule(linData, sch, weekend_schedule)

    X_On = linDataOnSchedule['temperature'].to_numpy()
    Y_On = linDataOnSchedule[parameter].to_numpy()
    X_Off = linDataOffSchedule['temperature'].to_numpy()
    Y_Off = linDataOffSchedule[parameter].to_numpy()

    ## Find Change Point Models
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        pwlf_On = pwlf.PiecewiseLinFit(linDataOnSchedule['temperature'].to_numpy(), linDataOnSchedule[parameter].to_numpy())
        cpts_On = pwlf_On.fit(2)
        pwlf_Off = pwlf.PiecewiseLinFit(linDataOffSchedule['temperature'].to_numpy(),
                                        linDataOffSchedule[parameter].to_numpy())
        cpts_Off = pwlf_Off.fit(2)

    Y_On_Fit = pwlf_On.predict(X_On)
    Y_Off_Fit = pwlf_Off.predict(X_Off)
    
    y = np.concatenate([Y_On, Y_Off])
    y_hat = np.concatenate([Y_On_Fit, Y_Off_Fit])
    y_bar = np.mean(y)

    RMSE = mean_squared_error(y, y_hat, squared=False)
    
    return pwlf_On, pwlf_Off, RMSE


    
LinModel(df, 'chilledWater', [5, 17])

(<pwlf.pwlf.PiecewiseLinFit at 0x1d91fda4430>,
 <pwlf.pwlf.PiecewiseLinFit at 0x1d91fda4790>,
 1110.9095564229992)

In [None]:
class silly_estimator(BaseEstimator, ClassifierMixin):
    """
    This estimator is not acually that usefull. its only purpose is to show a relatively simple predictor.
    It simply takes a list of all unique y outputs and selects which one to use base on the guess_number
    inputer hyperparameter. It does not do anything with the x inputs.
    """
    def __init__(self, parmeter=None, sch=None, weekend_sch=None):
        '''
        Reqired function. This allows class hyperparameter inputs
        ***Important note: make sure your class names are the same as your input names,
            otherwise the random search will not work.
        '''
        self.parmeter = parmeter
        self.sch = sch
        self.weekend_sch = weekend_sch
        return None
        
    def fit(self, x, y):
        linData = Data.copy()
        linData = linData.dropna(subset=['temperature', parameter])
        linData = linData[linData[parameter] != 0]
        return self
    
    def predict(self, x):
        length, _ = x.shape
        guess = self.options[self.guess_number]
        return np.full((length,), guess).tolist()
    
    def score(self, x, y):
        """
        score is reqired so the random search knows what is the best estimator
        """
        y_pred = self.predict(x)
        return sklearn.metrics.accuracy_score(y, y_pred)