### Imports

In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split
from pickle import dump

### Loading Data

In [2]:
house_data = pd.read_csv("../raw_data/3fc1a2969d4f28745af896b02c562bfc22fbda05.csv")

In [3]:
house_data.head()

Unnamed: 0,DateTime,HvacMode,Event,Schedule,T_ctrl,T_stp_cool,T_stp_heat,Humidity,HumidityExpectedLow,HumidityExpectedHigh,...,Remote_Sensor_7_Temperature,Remote_Sensor_7_Motion,Remote_Sensor_8_Temperature,Remote_Sensor_8_Motion,Remote_Sensor_9_Temperature,Remote_Sensor_9_Motion,Remote_Sensor_10_Temperature,Remote_Sensor_10_Motion,T_out,RH_out
0,2019-01-01 00:00:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,0.0,0.0,...,,,,,,,,,41.0,95.0
1,2019-01-01 00:05:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,0.0,0.0,...,,,,,,,,,41.0,95.0
2,2019-01-01 00:10:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,0.0,0.0,...,,,,,,,,,41.0,95.0
3,2019-01-01 00:15:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,0.0,0.0,...,,,,,,,,,41.0,95.0
4,2019-01-01 00:20:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,0.0,0.0,...,,,,,,,,,41.0,95.0


In [4]:
house_data.shape

(104832, 42)

In [5]:
house_data.isnull().sum()

DateTime                             0
HvacMode                          3085
Event                            49631
Schedule                          3085
T_ctrl                            3085
T_stp_cool                        3085
T_stp_heat                        3085
Humidity                          3085
HumidityExpectedLow               3085
HumidityExpectedHigh              3085
auxHeat1                          3070
auxHeat2                          3070
auxHeat3                          3070
compCool1                         3070
compCool2                         3070
compHeat1                         3070
compHeat2                         3070
fan                               3070
Thermostat_Temperature            3073
Thermostat_Motion                 3073
Remote_Sensor_1_Temperature       3073
Remote_Sensor_1_Motion            3073
Remote_Sensor_2_Temperature     104832
Remote_Sensor_2_Motion          104832
Remote_Sensor_3_Temperature     104832
Remote_Sensor_3_Motion   

### Drop All Columns with No Data

In [6]:
house_data = house_data.drop(labels = ['Remote_Sensor_2_Temperature', 'Remote_Sensor_2_Motion', 'Remote_Sensor_3_Temperature', 'Remote_Sensor_3_Motion', 'Remote_Sensor_4_Temperature', 'Remote_Sensor_4_Motion', 'Remote_Sensor_5_Temperature', 'Remote_Sensor_5_Motion', 'Remote_Sensor_6_Temperature', 'Remote_Sensor_6_Motion', 'Remote_Sensor_7_Temperature', 'Remote_Sensor_7_Motion', 'Remote_Sensor_8_Temperature', 'Remote_Sensor_8_Motion', 'Remote_Sensor_9_Temperature', 'Remote_Sensor_9_Motion', 'Remote_Sensor_10_Temperature', 'Remote_Sensor_10_Motion'], axis = 1)

In [7]:
house_data = house_data.drop(labels = ['auxHeat2', 'auxHeat3', 'compHeat1', 'compCool2', 'compHeat2'], axis = 1)

In [8]:
house_data = house_data.drop(labels = ['HumidityExpectedLow', 'HumidityExpectedHigh'], axis = 1)

In [9]:
house_data.HvacMode.value_counts()

heat    74045
cool    19941
off      6681
auto     1080
Name: HvacMode, dtype: int64

In [10]:
house_data.shape

(104832, 17)

In [11]:
house_data.head()

Unnamed: 0,DateTime,HvacMode,Event,Schedule,T_ctrl,T_stp_cool,T_stp_heat,Humidity,auxHeat1,compCool1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,2019-01-01 00:00:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,0.0,0.0,165.0,70.0,0.0,67.0,0.0,41.0,95.0
1,2019-01-01 00:05:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,30.0,0.0,135.0,70.0,0.0,67.0,0.0,41.0,95.0
2,2019-01-01 00:10:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,300.0,0.0,300.0,70.0,0.0,67.0,0.0,41.0,95.0
3,2019-01-01 00:15:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,135.0,0.0,180.0,70.0,0.0,67.0,0.0,41.0,95.0
4,2019-01-01 00:20:00,heat,Hold,Sleep,70.0,70.0,70.0,37.0,0.0,0.0,120.0,70.0,0.0,67.0,0.0,41.0,95.0


In [12]:
house_data.isnull().sum()

DateTime                           0
HvacMode                        3085
Event                          49631
Schedule                        3085
T_ctrl                          3085
T_stp_cool                      3085
T_stp_heat                      3085
Humidity                        3085
auxHeat1                        3070
compCool1                       3070
fan                             3070
Thermostat_Temperature          3073
Thermostat_Motion               3073
Remote_Sensor_1_Temperature     3073
Remote_Sensor_1_Motion          3073
T_out                           1152
RH_out                          1152
dtype: int64

### Drop All Rows with NaN Values

In [13]:
null_list = house_data[house_data['HvacMode'].isnull()].index.tolist()

In [14]:
house_data = house_data.drop(null_list)

In [15]:
house_data.isnull().sum()

DateTime                           0
HvacMode                           0
Event                          46546
Schedule                           0
T_ctrl                             0
T_stp_cool                         0
T_stp_heat                         0
Humidity                           0
auxHeat1                           0
compCool1                          0
fan                                0
Thermostat_Temperature             0
Thermostat_Motion                  0
Remote_Sensor_1_Temperature        0
Remote_Sensor_1_Motion             0
T_out                           1114
RH_out                          1114
dtype: int64

### Drop All Rows with Missing T_out Values

In [16]:
null_list = house_data[house_data['T_out'].isnull()].index.tolist()

In [17]:
house_data = house_data.drop(null_list)

In [18]:
house_data.isnull().sum()

DateTime                           0
HvacMode                           0
Event                          46481
Schedule                           0
T_ctrl                             0
T_stp_cool                         0
T_stp_heat                         0
Humidity                           0
auxHeat1                           0
compCool1                          0
fan                                0
Thermostat_Temperature             0
Thermostat_Motion                  0
Remote_Sensor_1_Temperature        0
Remote_Sensor_1_Motion             0
T_out                              0
RH_out                             0
dtype: int64

### Rename Null Values in 'Event' to - None

In [19]:
house_data["Event"].fillna("None", inplace = True)

In [20]:
house_data.isnull().sum()

DateTime                       0
HvacMode                       0
Event                          0
Schedule                       0
T_ctrl                         0
T_stp_cool                     0
T_stp_heat                     0
Humidity                       0
auxHeat1                       0
compCool1                      0
fan                            0
Thermostat_Temperature         0
Thermostat_Motion              0
Remote_Sensor_1_Temperature    0
Remote_Sensor_1_Motion         0
T_out                          0
RH_out                         0
dtype: int64

### Drop All Rows Where HVAC Mode is 'Cool' & 'Auto' (Summer Months)

In [21]:
cool_list = house_data[house_data['HvacMode'] == 'cool'].index.tolist()

In [22]:
house_data.shape

(100633, 17)

In [23]:
len(cool_list)

19913

In [24]:
house_data = house_data.drop(cool_list)

In [25]:
auto_list = house_data[house_data['HvacMode'] == 'auto'].index.tolist()

In [26]:
house_data = house_data.drop(auto_list)

In [27]:
house_data.shape

(79646, 17)

In [28]:
auto_list = house_data[house_data['HvacMode'] == 'off'].index.tolist()

In [29]:
house_data = house_data.drop(auto_list)

In [30]:
house_data.HvacMode.value_counts()

heat    72966
Name: HvacMode, dtype: int64

### Drop Cooling Source Column

In [31]:
house_data = house_data.drop(labels = ['compCool1', 'T_stp_cool'], axis = 1)

### DateTime Manipulation

In [32]:
house_data.DateTime.dtype

dtype('O')

In [33]:
house_data['DateTime'] = pd.to_datetime(house_data['DateTime'])

In [34]:
house_data.DateTime

0        2019-01-01 00:00:00
1        2019-01-01 00:05:00
2        2019-01-01 00:10:00
3        2019-01-01 00:15:00
4        2019-01-01 00:20:00
                 ...        
104827   2019-12-31 23:35:00
104828   2019-12-31 23:40:00
104829   2019-12-31 23:45:00
104830   2019-12-31 23:50:00
104831   2019-12-31 23:55:00
Name: DateTime, Length: 72966, dtype: datetime64[ns]

In [35]:
house_data['Month'] = pd.DatetimeIndex(house_data['DateTime']).month

In [36]:
weekDays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

In [37]:
house_data['DayOfWeek'] = pd.DatetimeIndex(house_data['DateTime']).dayofweek

In [38]:
house_data['HourofDay'] = pd.DatetimeIndex(house_data['DateTime']).hour

In [39]:
house_data['Day'] = pd.DatetimeIndex(house_data['DateTime']).day

In [40]:
house_data.columns

Index(['DateTime', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat',
       'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature',
       'Thermostat_Motion', 'Remote_Sensor_1_Temperature',
       'Remote_Sensor_1_Motion', 'T_out', 'RH_out', 'Month', 'DayOfWeek',
       'HourofDay', 'Day'],
      dtype='object')

In [41]:
cols = house_data.columns.tolist()
print(cols)

['DateTime', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'Thermostat_Motion', 'Remote_Sensor_1_Temperature', 'Remote_Sensor_1_Motion', 'T_out', 'RH_out', 'Month', 'DayOfWeek', 'HourofDay', 'Day']


In [42]:
cols = cols[-4:] + cols[:-4]
print(cols)

['Month', 'DayOfWeek', 'HourofDay', 'Day', 'DateTime', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'Thermostat_Motion', 'Remote_Sensor_1_Temperature', 'Remote_Sensor_1_Motion', 'T_out', 'RH_out']


In [43]:
house_data = house_data[cols]

In [44]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,Day,DateTime,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,1,1,0,1,2019-01-01 00:00:00,heat,Hold,Sleep,70.0,70.0,37.0,0.0,165.0,70.0,0.0,67.0,0.0,41.0,95.0
1,1,1,0,1,2019-01-01 00:05:00,heat,Hold,Sleep,70.0,70.0,37.0,30.0,135.0,70.0,0.0,67.0,0.0,41.0,95.0
2,1,1,0,1,2019-01-01 00:10:00,heat,Hold,Sleep,70.0,70.0,37.0,300.0,300.0,70.0,0.0,67.0,0.0,41.0,95.0
3,1,1,0,1,2019-01-01 00:15:00,heat,Hold,Sleep,70.0,70.0,37.0,135.0,180.0,70.0,0.0,67.0,0.0,41.0,95.0
4,1,1,0,1,2019-01-01 00:20:00,heat,Hold,Sleep,70.0,70.0,37.0,0.0,120.0,70.0,0.0,67.0,0.0,41.0,95.0


In [45]:
house_data.drop(labels = ['DateTime'], axis = 1, inplace = True)

In [46]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,Day,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,0.0,165.0,70.0,0.0,67.0,0.0,41.0,95.0
1,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,30.0,135.0,70.0,0.0,67.0,0.0,41.0,95.0
2,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,300.0,300.0,70.0,0.0,67.0,0.0,41.0,95.0
3,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,135.0,180.0,70.0,0.0,67.0,0.0,41.0,95.0
4,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,0.0,120.0,70.0,0.0,67.0,0.0,41.0,95.0


#### Analyzing the Categorical Variables

In [47]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,Day,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,0.0,165.0,70.0,0.0,67.0,0.0,41.0,95.0
1,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,30.0,135.0,70.0,0.0,67.0,0.0,41.0,95.0
2,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,300.0,300.0,70.0,0.0,67.0,0.0,41.0,95.0
3,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,135.0,180.0,70.0,0.0,67.0,0.0,41.0,95.0
4,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,0.0,120.0,70.0,0.0,67.0,0.0,41.0,95.0


#### HVAC Mode

In [48]:
house_data.HvacMode.unique()

array(['heat'], dtype=object)

In [49]:
house_data.HvacMode.value_counts()

heat    72966
Name: HvacMode, dtype: int64

#### Event

In [50]:
house_data.Event.unique()

array(['Hold', 'Smart Home', 'None', 'custom_17123', 'Smart Away'],
      dtype=object)

In [51]:
len(house_data[house_data.Event == 'Demand Response Event'])

0

#### Drop Rows with 'Demand Response Event' Event

In [52]:
custom_list = house_data[house_data['Event'] == 'Demand Response Event'].index.tolist()

In [53]:
house_data = house_data.drop(custom_list)

In [54]:
house_data.Event.unique()

array(['Hold', 'Smart Home', 'None', 'custom_17123', 'Smart Away'],
      dtype=object)

In [55]:
house_data.Event.value_counts()

Hold            42752
None            25451
Smart Home       3058
custom_17123     1469
Smart Away        236
Name: Event, dtype: int64

#### Drop Rows with 'Smart Away', 'Smart Home', & 'custom' Event

In [56]:
custom_list = house_data[house_data['Event'] == 'Smart Away'].index.tolist()

In [57]:
house_data = house_data.drop(custom_list)

In [58]:
house_data.Event.unique()

array(['Hold', 'Smart Home', 'None', 'custom_17123'], dtype=object)

In [59]:
custom_list = house_data[house_data['Event'] == 'Smart Home'].index.tolist()

In [60]:
house_data = house_data.drop(custom_list)

In [61]:
custom_list = house_data[house_data['Event'] == 'custom_17123'].index.tolist()

In [62]:
house_data = house_data.drop(custom_list)

In [63]:
house_data.Event.unique()

array(['Hold', 'None'], dtype=object)

#### Schedule

In [64]:
house_data.Schedule.unique()

array(['Sleep', 'custom_5223', 'Away', 'Home'], dtype=object)

In [65]:
house_data.Schedule.value_counts()

Away           22498
Home           18565
Sleep          17774
custom_5223     9366
Name: Schedule, dtype: int64

In [66]:
custom_list = house_data[house_data['Schedule'] == 'custom_5223'].index.tolist()

In [67]:
house_data = house_data.drop(custom_list)

In [68]:
house_data.Schedule.value_counts()

Away     22498
Home     18565
Sleep    17774
Name: Schedule, dtype: int64

In [69]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,Day,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,0.0,165.0,70.0,0.0,67.0,0.0,41.0,95.0
1,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,30.0,135.0,70.0,0.0,67.0,0.0,41.0,95.0
2,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,300.0,300.0,70.0,0.0,67.0,0.0,41.0,95.0
3,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,135.0,180.0,70.0,0.0,67.0,0.0,41.0,95.0
4,1,1,0,1,heat,Hold,Sleep,70.0,70.0,37.0,0.0,120.0,70.0,0.0,67.0,0.0,41.0,95.0


In [70]:
house_data.to_csv("../workingDataHouse1/clean_data.csv", index = False)

#### Feature Engineering for Cyclical Features (HourofDay, Month & DayofWeek)

In [71]:
house_data.Month.unique()

array([ 1,  2,  3,  4,  5,  6,  9, 10, 11, 12], dtype=int64)

In [72]:
house_data.HourofDay.unique()

array([ 0,  1,  2,  3,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
       21, 22, 23,  4,  5,  6,  7], dtype=int64)

In [73]:
house_data.DayOfWeek.unique()

array([1, 2, 3, 4, 5, 6, 0], dtype=int64)

In [74]:
house_data['hour_sin'] = np.sin(house_data.HourofDay * (2. * np.pi / 24))
house_data['hour_cos'] = np.cos(house_data.HourofDay * (2. * np.pi / 24))
house_data['month_sin'] = np.sin((house_data.Month - 1) * (2. * np.pi / 12))
house_data['month_cos'] = np.cos((house_data.Month - 1) * (2. * np.pi / 12))
house_data['day_sin'] = np.sin(house_data.DayOfWeek) * (2. * np.pi / 7)
house_data['day_cos'] = np.cos(house_data.DayOfWeek) * (2. * np.pi / 7)

In [75]:
cols = house_data.columns.tolist()
print(cols)

['Month', 'DayOfWeek', 'HourofDay', 'Day', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'Thermostat_Motion', 'Remote_Sensor_1_Temperature', 'Remote_Sensor_1_Motion', 'T_out', 'RH_out', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos']


In [76]:
cols = cols[-6:] + cols[:-6]
print(cols)

['hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos', 'Month', 'DayOfWeek', 'HourofDay', 'Day', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'Thermostat_Motion', 'Remote_Sensor_1_Temperature', 'Remote_Sensor_1_Motion', 'T_out', 'RH_out']


In [77]:
house_data = house_data[cols]

In [78]:
#house_data.drop(labels = ['Month', 'DayOfWeek', 'HourofDay'], axis = 1, inplace = True)

In [79]:
house_data.head()

Unnamed: 0,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,Day,...,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,70.0,37.0,0.0,165.0,70.0,0.0,67.0,0.0,41.0,95.0
1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,70.0,37.0,30.0,135.0,70.0,0.0,67.0,0.0,41.0,95.0
2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,70.0,37.0,300.0,300.0,70.0,0.0,67.0,0.0,41.0,95.0
3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,70.0,37.0,135.0,180.0,70.0,0.0,67.0,0.0,41.0,95.0
4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,70.0,37.0,0.0,120.0,70.0,0.0,67.0,0.0,41.0,95.0


In [80]:
house_data.to_csv("../workingDataHouse1/clean_data1.csv", index = False)

### Data Pre-Processing

In [81]:
categorical_vars = ['HvacMode', 'Event', 'Schedule']
numerical_vars = ['T_ctrl', 'T_stp_heat', 'Humidity', 'Thermostat_Temperature', 'T_out', 'RH_out']

#### Standardizing the Numerical Features

In [82]:
sc = StandardScaler()
sc.fit(house_data[numerical_vars])
house_data[numerical_vars] = sc.transform(house_data[numerical_vars])

# save the scaler
dump(sc, open('scaler.pkl', 'wb'))

In [83]:
house_data.head()

Unnamed: 0,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,Day,...,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,0.663321,0.177458,0.0,165.0,0.330465,0.0,67.0,0.0,0.009112,1.808865
1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,0.663321,0.177458,30.0,135.0,0.330465,0.0,67.0,0.0,0.009112,1.808865
2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,0.663321,0.177458,300.0,300.0,0.330465,0.0,67.0,0.0,0.009112,1.808865
3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,0.663321,0.177458,135.0,180.0,0.330465,0.0,67.0,0.0,0.009112,1.808865
4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,...,0.663321,0.177458,0.0,120.0,0.330465,0.0,67.0,0.0,0.009112,1.808865


In [84]:
house_data.reset_index(inplace = True)

#### Ordinal Encoding the 'fan' Feature & 'auxHeat1' Feature

In [85]:
y_auxHeat = house_data['auxHeat1'].to_numpy()
y_fan = house_data['fan'].to_numpy()

In [86]:
oe = OrdinalEncoder()
y_auxHeat = oe.fit_transform(y_auxHeat.reshape(-1, 1))
y_fan = oe.fit_transform(y_fan.reshape(-1, 1))

In [87]:
y_auxHeat = y_auxHeat.reshape(y_auxHeat.shape[0], )
y_fan = y_fan.reshape(y_fan.shape[0], )

In [88]:
y_auxHeat = y_auxHeat.astype(int)
y_auxHeat = y_auxHeat.astype(str)

y_fan = y_fan.astype(int)
y_fan = y_fan.astype(str)

In [89]:
house_data['auxHeat1'] = y_auxHeat
house_data['fan'] = y_fan

In [90]:
house_data.head()

Unnamed: 0,index,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,...,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,0,11,0.330465,0.0,67.0,0.0,0.009112,1.808865
1,1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,2,9,0.330465,0.0,67.0,0.0,0.009112,1.808865
2,2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,20,20,0.330465,0.0,67.0,0.0,0.009112,1.808865
3,3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,9,12,0.330465,0.0,67.0,0.0,0.009112,1.808865
4,4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,0,8,0.330465,0.0,67.0,0.0,0.009112,1.808865


In [91]:
house_data.auxHeat1.unique()

array(['0', '2', '20', '9', '14', '19', '4', '8', '5', '7', '13', '1',
       '12', '6', '15', '17', '11', '10', '18', '3', '16'], dtype=object)

In [92]:
house_data.fan.unique()

array(['11', '9', '20', '12', '8', '2', '18', '17', '0', '19', '13', '16',
       '10', '14', '7', '3', '4', '6', '5', '15', '1'], dtype=object)

In [93]:
house_data.shape

(58837, 25)

In [94]:
house_data.to_csv("../workingDataHouse1/std_data.csv", index = False)

#### One-Hot Encode Categorical Variables

In [95]:
house_data.head()

Unnamed: 0,index,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,...,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,0,11,0.330465,0.0,67.0,0.0,0.009112,1.808865
1,1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,2,9,0.330465,0.0,67.0,0.0,0.009112,1.808865
2,2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,20,20,0.330465,0.0,67.0,0.0,0.009112,1.808865
3,3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,9,12,0.330465,0.0,67.0,0.0,0.009112,1.808865
4,4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,0,8,0.330465,0.0,67.0,0.0,0.009112,1.808865


In [96]:
oh_df = pd.get_dummies(house_data, columns = categorical_vars, drop_first = False)

In [97]:
oh_df.columns

Index(['index', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin',
       'day_cos', 'Month', 'DayOfWeek', 'HourofDay', 'Day', 'T_ctrl',
       'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature',
       'Thermostat_Motion', 'Remote_Sensor_1_Temperature',
       'Remote_Sensor_1_Motion', 'T_out', 'RH_out', 'HvacMode_heat',
       'Event_Hold', 'Event_None', 'Schedule_Away', 'Schedule_Home',
       'Schedule_Sleep'],
      dtype='object')

In [98]:
house_data.head()

Unnamed: 0,index,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,...,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,0,11,0.330465,0.0,67.0,0.0,0.009112,1.808865
1,1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,2,9,0.330465,0.0,67.0,0.0,0.009112,1.808865
2,2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,20,20,0.330465,0.0,67.0,0.0,0.009112,1.808865
3,3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,9,12,0.330465,0.0,67.0,0.0,0.009112,1.808865
4,4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,...,0.663321,0.177458,0,8,0.330465,0.0,67.0,0.0,0.009112,1.808865


In [99]:
oh_df.drop(labels = ['index'], axis = 1, inplace = True) 

In [100]:
cols = oh_df.columns.tolist()
print(cols)

['hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos', 'Month', 'DayOfWeek', 'HourofDay', 'Day', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'Thermostat_Motion', 'Remote_Sensor_1_Temperature', 'Remote_Sensor_1_Motion', 'T_out', 'RH_out', 'HvacMode_heat', 'Event_Hold', 'Event_None', 'Schedule_Away', 'Schedule_Home', 'Schedule_Sleep']


In [101]:
cols = ['hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos', 'Month', 'DayOfWeek', 'Day', 'HourofDay', 
        'HvacMode_heat', 'Event_Hold', 'Event_None', 'Schedule_Away', 'Schedule_Home', 'Schedule_Sleep', 
        'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'Thermostat_Motion', 
        'Remote_Sensor_1_Temperature', 'Remote_Sensor_1_Motion', 'T_out', 'RH_out']

In [102]:
oh_df = oh_df[cols]

In [103]:
oh_df.head()

Unnamed: 0,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,Day,HourofDay,...,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,Thermostat_Motion,Remote_Sensor_1_Temperature,Remote_Sensor_1_Motion,T_out,RH_out
0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,1,0,...,0.663321,0.177458,0,11,0.330465,0.0,67.0,0.0,0.009112,1.808865
1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,1,0,...,0.663321,0.177458,2,9,0.330465,0.0,67.0,0.0,0.009112,1.808865
2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,1,0,...,0.663321,0.177458,20,20,0.330465,0.0,67.0,0.0,0.009112,1.808865
3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,1,0,...,0.663321,0.177458,9,12,0.330465,0.0,67.0,0.0,0.009112,1.808865
4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,1,0,...,0.663321,0.177458,0,8,0.330465,0.0,67.0,0.0,0.009112,1.808865


In [104]:
oh_df.shape

(58837, 27)

In [105]:
oh_df.to_csv("../preprocessed_data/oneHot_data.csv", index = False)

In [None]:
house_data.HvacMode.value_counts()

In [None]:
house_data.auxHeat1.value_counts()