In [7]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import feature_selection as fs
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
import datetime as dt

df = pd.read_csv('S1SubAct_B_m_NoDupes.csv', index_col = "duration")
df.reset_index(drop = True, inplace = True)
df_sensors = pd.read_csv('S1Sensors_preprocessed.csv', index_col = None)
df_sensors = df_sensors[df_sensors.reqEnergy]
df_costs = pd.read_csv('power_costs.csv', index_col = None)

def add_DAY_WDWE_phaseII(ds):
    dayNumKeyWithDAYDict = pd.Series(['Mon','Tue','Wed','Thu','Fri','Sat','Sun'], ['0','1','2','3','4','5','6']).to_dict()
    dayNumKeyWithWDWEDict = pd.Series(['WD','WD','WD','WD','WD','WE','WE'], ['0','1','2','3','4','5','6']).to_dict()
    ds.set_index(ds['timestamp'], inplace = True)
    ds.insert((len(ds.columns)), "DAY", ds.index.dayofweek.astype(str), True)
    ds.insert((len(ds.columns)), "WDWE", ds.index.dayofweek.astype(str), True)
    ds = ds.replace({"DAY": dayNumKeyWithDAYDict})
    ds = ds.replace({"WDWE": dayNumKeyWithWDWEDict})
    ds.reset_index(drop = True, inplace = True)
    ds['Hour'] = ds['timestamp'].dt.hour
    return ds

In [9]:
ds = pd.read_csv('S1SubActivities_temporalFeaturesCLEANSED.csv', index_col = None) 
ds.start = pd.to_datetime(ds.start, format='%Y-%m-%d %H:%M:%S')
ds.end = pd.to_datetime(ds.end, format='%Y-%m-%d %H:%M:%S')

In [10]:
ds_new = ds.copy()
ds_new['Phase'] = "Afternoon"
ds_new.loc[ds_new['HOUR'] < 12, 'Phase'] = "Morning"
ds_new.loc[ds_new['HOUR'] >= 18, 'Phase'] = "Evening"
benchmark_usage = ds_new.groupby(['subAct','WDWE','Phase'])['durationSec'].mean()

In [24]:
def calc_subAct(dataframe, subAct, wattage, df_costs):
    
    df = dataframe.copy()

    Data = df.drop(columns = subAct).values
    target = df[subAct]
    D_train, D_test, t_train, t_test = train_test_split(Data, target, test_size = 0.3,
                                                        random_state=999)
    dt_classifier = DecisionTreeClassifier(max_depth=10, criterion='entropy',
                                           random_state = 999)
    dt_classifier.fit(D_train, t_train)
    confidence = dt_classifier.score(D_test, t_test)
    df['prediction'] = dt_classifier.predict(Data)
    df['intervention'] = (df['prediction'].diff() == -1) & (df[subAct] == 1)
    
    dfIDX = pd.read_csv('S1SubAct_B_m_NoDupes.csv', index_col = None)
    dfIDX.duration = pd.to_datetime(dfIDX.duration, format='%Y-%m-%d %H:%M:%S')
    df['timestamp'] = dfIDX['duration']

    df = add_DAY_WDWE_phaseII(df)
    df['Phase'] = "Afternoon"
    df.loc[df['Hour'] < 12, 'Phase'] = "Morning"
    df.loc[df['Hour'] >= 18, 'Phase'] = "Evening"
    
    # Calculate approx durations
    duration = 0
    duration_col = []

    for row in df.iterrows():
        if row[1][subAct] == 1:
            duration += 1
        else:
            duration = 0
        duration_col.append(duration)

    df['duration'] = duration_col

    cancelled_interventions = 0
    completed_interventions = 0
    possible_intervention = False
    intervening = False

    total_minutes_saved = 0
    total_kwh_saved = 0
    total_dollars_saved = 0

    for row in df.iterrows():
        if row[1]['intervention'] and not intervening:
            possible_intervention = True
        if possible_intervention:
            if row[1][subAct] == 0:
                possible_intervention = False
                cancelled_interventions += 1
            else:
                if row[1]['duration'] > benchmark_usage[subAct][row[1]['WDWE']][row[1]['Phase']] / 60:
                    intervening = True
                    completed_interventions += 1
                    possible_intervention = False
        if intervening:
            if row[1][subAct] == 0:
                intervening = False
            else:
                total_minutes_saved += 1
                kwh_saved = wattage / 60 / 1000
                total_kwh_saved += kwh_saved
                hour = row[1]['Hour']
                wdwe = row[1]['WDWE']
                rate = df_costs[(df_costs['Hour'] == hour) & (df_costs['WDWE'] == wdwe)].iloc[0]['cost_per_kwh']
                dollars_saved = rate * kwh_saved
                total_dollars_saved += dollars_saved

    # Note that this means our 'confidence' value is going to be more conservative than it needs to be
    print("SubAct:",subAct)
    print("Classifier accuracy metric:", round(confidence,3))
    print("Number of completed interventions:", completed_interventions)
    print("Number of interventions cancelled due to not meeting mean number of minutes usage:", 
          cancelled_interventions)
    print("Total minutes saved:", round(total_minutes_saved, 3), "min")
    print("Total minutes saved, accounting for accuracy metric:", round(total_minutes_saved * confidence,3), "min")
    print("Total electricity saved (kwh):", round(total_kwh_saved,3), "kWh")
    print("Total electricity saved, accounting for accuracy metric (kwh):", round(total_kwh_saved * confidence,3), "kWh")
    print("Total money saved: $", round(total_dollars_saved,3), "AUD")
    print("Total money saved accounting for confidence:  $", round(total_dollars_saved * confidence,3), "AUD")
    print()
    

for row in df_sensors.iterrows():
    subAct = row[1]['concat']
    wattage = row[1]['wattage']
    calc_subAct(df, subAct, wattage, df_costs)

SubAct: bathroom_lightswitch
Classifier accuracy metric: 0.847
Number of completed interventions: 11
Number of interventions cancelled due to not meeting mean number of minutes usage: 12
Total minutes saved: 420 min
Total minutes saved, accounting for accuracy metric: 355.914 min
Total electricity saved (kwh): 0.168 kWh
Total electricity saved, accounting for accuracy metric (kwh): 0.142 kWh
Total money saved: $ 0.039 AUD
Total money saved accounting for confidence:  $ 0.033 AUD

SubAct: foyer_lightswitch
Classifier accuracy metric: 0.992
Number of completed interventions: 0
Number of interventions cancelled due to not meeting mean number of minutes usage: 1
Total minutes saved: 0 min
Total minutes saved, accounting for accuracy metric: 0.0 min
Total electricity saved (kwh): 0 kWh
Total electricity saved, accounting for accuracy metric (kwh): 0.0 kWh
Total money saved: $ 0 AUD
Total money saved accounting for confidence:  $ 0.0 AUD

SubAct: kitchen_lightswitch
Classifier accuracy metri