In [95]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split
from pickle import load

In [96]:
house_data = pd.read_csv("../raw_data/d758c1cddadcdb76de2656eb20fdeab3b5959fba.csv")

In [97]:
house_data.head()

Unnamed: 0,DateTime,HvacMode,Event,Schedule,T_ctrl,T_stp_cool,T_stp_heat,Humidity,HumidityExpectedLow,HumidityExpectedHigh,...,Remote_Sensor_7_Temperature,Remote_Sensor_7_Motion,Remote_Sensor_8_Temperature,Remote_Sensor_8_Motion,Remote_Sensor_9_Temperature,Remote_Sensor_9_Motion,Remote_Sensor_10_Temperature,Remote_Sensor_10_Motion,T_out,RH_out
0,2019-01-01 00:00:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,40.0,0.0,0.0,...,,,,,,,,,35.0,97.0
1,2019-01-01 00:05:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,40.0,0.0,0.0,...,,,,,,,,,35.0,97.0
2,2019-01-01 00:10:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,40.0,0.0,0.0,...,,,,,,,,,35.0,97.0
3,2019-01-01 00:15:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,41.0,0.0,0.0,...,,,,,,,,,35.0,97.0
4,2019-01-01 00:20:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,41.0,0.0,0.0,...,,,,,,,,,35.0,97.0


In [98]:
house_data.dtypes

DateTime                         object
HvacMode                         object
Event                            object
Schedule                         object
T_ctrl                          float64
T_stp_cool                      float64
T_stp_heat                      float64
Humidity                        float64
HumidityExpectedLow             float64
HumidityExpectedHigh            float64
auxHeat1                        float64
auxHeat2                        float64
auxHeat3                        float64
compCool1                       float64
compCool2                       float64
compHeat1                       float64
compHeat2                       float64
fan                             float64
Thermostat_Temperature          float64
Thermostat_Motion               float64
Remote_Sensor_1_Temperature     float64
Remote_Sensor_1_Motion          float64
Remote_Sensor_2_Temperature     float64
Remote_Sensor_2_Motion          float64
Remote_Sensor_3_Temperature     float64


In [99]:
house_data.shape

(104832, 42)

In [100]:
house_data.isnull().sum()

DateTime                             0
HvacMode                          3785
Event                            89115
Schedule                          3785
T_ctrl                            3785
T_stp_cool                        3785
T_stp_heat                        3785
Humidity                          3785
HumidityExpectedLow               3785
HumidityExpectedHigh              3785
auxHeat1                          3771
auxHeat2                          3771
auxHeat3                          3771
compCool1                         3771
compCool2                         3771
compHeat1                         3771
compHeat2                         3771
fan                               3771
Thermostat_Temperature            3780
Thermostat_Motion                 3780
Remote_Sensor_1_Temperature       3791
Remote_Sensor_1_Motion            3780
Remote_Sensor_2_Temperature     104832
Remote_Sensor_2_Motion          104832
Remote_Sensor_3_Temperature     104832
Remote_Sensor_3_Motion   

In [101]:
house_data.auxHeat1.value_counts()

0.0      87811
300.0     8197
285.0      301
270.0      292
75.0       283
150.0      279
255.0      278
165.0      276
45.0       272
105.0      272
195.0      268
90.0       265
15.0       261
225.0      261
180.0      260
135.0      255
210.0      254
240.0      252
120.0      249
30.0       248
60.0       227
Name: auxHeat1, dtype: int64

### Drop All Columns with No Data

In [102]:
house_data = house_data.drop(labels = ['Remote_Sensor_1_Temperature', 'Remote_Sensor_1_Motion', 'Remote_Sensor_2_Temperature', 'Remote_Sensor_2_Motion', 'Remote_Sensor_3_Temperature', 'Remote_Sensor_3_Motion', 'Remote_Sensor_4_Temperature', 'Remote_Sensor_4_Motion', 'Remote_Sensor_5_Temperature', 'Remote_Sensor_5_Motion', 'Remote_Sensor_6_Temperature', 'Remote_Sensor_6_Motion', 'Remote_Sensor_7_Temperature', 'Remote_Sensor_7_Motion', 'Remote_Sensor_8_Temperature', 'Remote_Sensor_8_Motion', 'Remote_Sensor_9_Temperature', 'Remote_Sensor_9_Motion', 'Remote_Sensor_10_Temperature', 'Remote_Sensor_10_Motion'], axis = 1)

In [103]:
house_data = house_data.drop(labels = ['auxHeat2', 'auxHeat3', 'compHeat1', 'compCool2', 'compHeat2'], axis = 1)

In [104]:
house_data = house_data.drop(labels = ['HumidityExpectedLow', 'HumidityExpectedHigh'], axis = 1)

In [105]:
house_data.drop(labels = ['Thermostat_Motion'], axis = 1, inplace = True)

In [106]:
house_data.HvacMode.value_counts()

auxHeatOnly    61076
cool           19254
off            18870
heat            1847
Name: HvacMode, dtype: int64

In [107]:
house_data.shape

(104832, 14)

In [108]:
house_data.head()

Unnamed: 0,DateTime,HvacMode,Event,Schedule,T_ctrl,T_stp_cool,T_stp_heat,Humidity,auxHeat1,compCool1,fan,Thermostat_Temperature,T_out,RH_out
0,2019-01-01 00:00:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,40.0,0.0,0.0,0.0,68.0,35.0,97.0
1,2019-01-01 00:05:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,40.0,0.0,0.0,0.0,68.0,35.0,97.0
2,2019-01-01 00:10:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,40.0,0.0,0.0,0.0,68.0,35.0,97.0
3,2019-01-01 00:15:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,41.0,0.0,0.0,0.0,68.0,35.0,97.0
4,2019-01-01 00:20:00,auxHeatOnly,,Sleep,68.0,77.0,59.0,41.0,0.0,0.0,0.0,68.0,35.0,97.0


In [109]:
house_data.isnull().sum()

DateTime                      0
HvacMode                   3785
Event                     89115
Schedule                   3785
T_ctrl                     3785
T_stp_cool                 3785
T_stp_heat                 3785
Humidity                   3785
auxHeat1                   3771
compCool1                  3771
fan                        3771
Thermostat_Temperature     3780
T_out                      1272
RH_out                     1272
dtype: int64

### Drop All Rows with NaN Values

In [110]:
null_list = house_data[house_data['auxHeat1'].isnull()].index.tolist()

In [111]:
house_data = house_data.drop(null_list)

In [112]:
house_data.isnull().sum()

DateTime                      0
HvacMode                     18
Event                     85345
Schedule                     18
T_ctrl                       18
T_stp_cool                   18
T_stp_heat                   18
Humidity                     18
auxHeat1                      0
compCool1                     0
fan                           0
Thermostat_Temperature       15
T_out                      1205
RH_out                     1205
dtype: int64

In [113]:
null_list = house_data[house_data['HvacMode'].isnull()].index.tolist()

In [114]:
house_data = house_data.drop(null_list)

In [115]:
house_data.isnull().sum()

DateTime                      0
HvacMode                      0
Event                     85327
Schedule                      0
T_ctrl                        0
T_stp_cool                    0
T_stp_heat                    0
Humidity                      0
auxHeat1                      0
compCool1                     0
fan                           0
Thermostat_Temperature        3
T_out                      1205
RH_out                     1205
dtype: int64

### Drop All Rows with Missing T_out Values

In [116]:
null_list = house_data[house_data['T_out'].isnull()].index.tolist()

In [117]:
house_data = house_data.drop(null_list)

In [118]:
house_data.isnull().sum()

DateTime                      0
HvacMode                      0
Event                     84309
Schedule                      0
T_ctrl                        0
T_stp_cool                    0
T_stp_heat                    0
Humidity                      0
auxHeat1                      0
compCool1                     0
fan                           0
Thermostat_Temperature        3
T_out                         0
RH_out                        0
dtype: int64

In [119]:
null_list = house_data[house_data['Thermostat_Temperature'].isnull()].index.tolist()

In [120]:
house_data = house_data.drop(null_list)

In [121]:
house_data.isnull().sum()

DateTime                      0
HvacMode                      0
Event                     84306
Schedule                      0
T_ctrl                        0
T_stp_cool                    0
T_stp_heat                    0
Humidity                      0
auxHeat1                      0
compCool1                     0
fan                           0
Thermostat_Temperature        0
T_out                         0
RH_out                        0
dtype: int64

### Rename Null Values in 'Event' to - None

In [122]:
house_data["Event"].fillna("None", inplace = True)

In [123]:
house_data.isnull().sum()

DateTime                  0
HvacMode                  0
Event                     0
Schedule                  0
T_ctrl                    0
T_stp_cool                0
T_stp_heat                0
Humidity                  0
auxHeat1                  0
compCool1                 0
fan                       0
Thermostat_Temperature    0
T_out                     0
RH_out                    0
dtype: int64

In [124]:
null_list = house_data[house_data['T_stp_heat'].isnull()].index.tolist()

In [125]:
house_data = house_data.drop(null_list)

In [126]:
house_data.isnull().sum()

DateTime                  0
HvacMode                  0
Event                     0
Schedule                  0
T_ctrl                    0
T_stp_cool                0
T_stp_heat                0
Humidity                  0
auxHeat1                  0
compCool1                 0
fan                       0
Thermostat_Temperature    0
T_out                     0
RH_out                    0
dtype: int64

### Drop All Rows Where HVAC Mode is 'Cool' & 'Auto' (Summer Months)

In [127]:
cool_list = house_data[house_data['HvacMode'] == 'cool'].index.tolist()

In [128]:
house_data.shape

(99835, 14)

In [129]:
len(cool_list)

19206

In [130]:
house_data = house_data.drop(cool_list)

In [131]:
auto_list = house_data[house_data['HvacMode'] == 'auto'].index.tolist()

In [132]:
house_data = house_data.drop(auto_list)

In [133]:
house_data.shape

(80629, 14)

In [134]:
off_list = house_data[house_data['HvacMode'] == 'off'].index.tolist()

In [135]:
house_data = house_data.drop(off_list)

In [136]:
house_data.shape

(61861, 14)

In [137]:
off_list = house_data[house_data['HvacMode'] == 'heat'].index.tolist()

In [138]:
house_data = house_data.drop(off_list)

In [139]:
house_data.shape

(60014, 14)

### Drop Cooling Source Column

In [140]:
house_data = house_data.drop(labels = ['compCool1', 'T_stp_cool'], axis = 1)

In [141]:
house_data.isnull().sum()

DateTime                  0
HvacMode                  0
Event                     0
Schedule                  0
T_ctrl                    0
T_stp_heat                0
Humidity                  0
auxHeat1                  0
fan                       0
Thermostat_Temperature    0
T_out                     0
RH_out                    0
dtype: int64

In [142]:
null_list = house_data[house_data['T_stp_heat'].isnull()].index.tolist()

In [143]:
house_data = house_data.drop(null_list)

In [144]:
house_data.isnull().sum()

DateTime                  0
HvacMode                  0
Event                     0
Schedule                  0
T_ctrl                    0
T_stp_heat                0
Humidity                  0
auxHeat1                  0
fan                       0
Thermostat_Temperature    0
T_out                     0
RH_out                    0
dtype: int64

### DateTime Manipulation

In [145]:
house_data.DateTime.dtype

dtype('O')

In [146]:
house_data['DateTime'] = pd.to_datetime(house_data['DateTime'])

In [147]:
house_data.DateTime

0        2019-01-01 00:00:00
1        2019-01-01 00:05:00
2        2019-01-01 00:10:00
3        2019-01-01 00:15:00
4        2019-01-01 00:20:00
                 ...        
104827   2019-12-31 23:35:00
104828   2019-12-31 23:40:00
104829   2019-12-31 23:45:00
104830   2019-12-31 23:50:00
104831   2019-12-31 23:55:00
Name: DateTime, Length: 60014, dtype: datetime64[ns]

In [148]:
house_data['Month'] = pd.DatetimeIndex(house_data['DateTime']).month

In [149]:
weekDays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

In [150]:
house_data['DayOfWeek'] = pd.DatetimeIndex(house_data['DateTime']).dayofweek

In [151]:
house_data['HourofDay'] = pd.DatetimeIndex(house_data['DateTime']).hour

In [152]:
house_data.columns

Index(['DateTime', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat',
       'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out',
       'RH_out', 'Month', 'DayOfWeek', 'HourofDay'],
      dtype='object')

In [153]:
cols = house_data.columns.tolist()
print(cols)

['DateTime', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out', 'RH_out', 'Month', 'DayOfWeek', 'HourofDay']


In [154]:
cols = cols[-3:] + cols[:-3]
print(cols)

['Month', 'DayOfWeek', 'HourofDay', 'DateTime', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out', 'RH_out']


In [155]:
house_data = house_data[cols]

In [156]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,DateTime,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,1,1,0,2019-01-01 00:00:00,auxHeatOnly,,Sleep,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
1,1,1,0,2019-01-01 00:05:00,auxHeatOnly,,Sleep,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
2,1,1,0,2019-01-01 00:10:00,auxHeatOnly,,Sleep,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
3,1,1,0,2019-01-01 00:15:00,auxHeatOnly,,Sleep,68.0,59.0,41.0,0.0,0.0,68.0,35.0,97.0
4,1,1,0,2019-01-01 00:20:00,auxHeatOnly,,Sleep,68.0,59.0,41.0,0.0,0.0,68.0,35.0,97.0


In [157]:
house_data.drop(labels = ['DateTime'], axis = 1, inplace = True)

In [158]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
1,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
2,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
3,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,41.0,0.0,0.0,68.0,35.0,97.0
4,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,41.0,0.0,0.0,68.0,35.0,97.0


#### Analyzing the Categorical Variables

In [159]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
1,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
2,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
3,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,41.0,0.0,0.0,68.0,35.0,97.0
4,1,1,0,auxHeatOnly,,Sleep,68.0,59.0,41.0,0.0,0.0,68.0,35.0,97.0


In [160]:
house_data.columns

Index(['Month', 'DayOfWeek', 'HourofDay', 'HvacMode', 'Event', 'Schedule',
       'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan',
       'Thermostat_Temperature', 'T_out', 'RH_out'],
      dtype='object')

In [161]:
house_data.drop(labels = ['HvacMode', 'Event', 'Schedule'], axis = 1, inplace = True)

#### Feature Engineering for Cyclical Features (HourofDay, Month & DayofWeek)

In [162]:
house_data.Month.unique()

array([ 1,  2,  3,  4,  5,  6,  9, 10, 11, 12], dtype=int64)

In [163]:
house_data.HourofDay.unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23], dtype=int64)

In [164]:
house_data.DayOfWeek.unique()

array([1, 2, 3, 4, 5, 6, 0], dtype=int64)

In [165]:
house_data['hour_sin'] = np.sin(house_data.HourofDay * (2. * np.pi / 24))
house_data['hour_cos'] = np.cos(house_data.HourofDay * (2. * np.pi / 24))
house_data['month_sin'] = np.sin((house_data.Month - 1) * (2. * np.pi / 12))
house_data['month_cos'] = np.cos((house_data.Month - 1) * (2. * np.pi / 12))
house_data['day_sin'] = np.sin(house_data.DayOfWeek) * (2. * np.pi / 7)
house_data['day_cos'] = np.cos(house_data.DayOfWeek) * (2. * np.pi / 7)

In [166]:
cols = house_data.columns.tolist()
print(cols)

['Month', 'DayOfWeek', 'HourofDay', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out', 'RH_out', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos']


In [167]:
cols = cols[-6:] + cols[:-6]
print(cols)

['hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos', 'Month', 'DayOfWeek', 'HourofDay', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out', 'RH_out']


In [168]:
house_data = house_data[cols]

In [169]:
house_data.head()

Unnamed: 0,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,68.0,59.0,40.0,0.0,0.0,68.0,35.0,97.0
3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,68.0,59.0,41.0,0.0,0.0,68.0,35.0,97.0
4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,68.0,59.0,41.0,0.0,0.0,68.0,35.0,97.0


### Data Pre-Processing

In [170]:
categorical_vars = ['HvacMode', 'Event', 'Schedule']
numerical_vars = ['T_ctrl', 'T_stp_heat', 'Humidity', 'Thermostat_Temperature', 'T_out', 'RH_out']

#### Standardizing the Numerical Features

In [171]:
sc = StandardScaler()
house_data[numerical_vars] = sc.fit_transform(house_data[numerical_vars])

In [172]:
#sc = load(open('scaler.pkl', 'rb'))
#house_data[numerical_vars] = sc.transform(house_data[numerical_vars])

In [173]:
house_data.head()

Unnamed: 0,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,0.974341,0.0,0.0,-0.3948,0.214569,1.506778
1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,0.974341,0.0,0.0,-0.3948,0.214569,1.506778
2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,0.974341,0.0,0.0,-0.3948,0.214569,1.506778
3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,1.203108,0.0,0.0,-0.3948,0.214569,1.506778
4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,1.203108,0.0,0.0,-0.3948,0.214569,1.506778


In [174]:
house_data.reset_index(inplace = True)

#### Ordinal Encoding the 'fan' Feature & 'auxHeat1' Feature

In [175]:
y_auxHeat = house_data['auxHeat1'].to_numpy()
y_fan = house_data['fan'].to_numpy()

In [176]:
oe = OrdinalEncoder()
y_auxHeat = oe.fit_transform(y_auxHeat.reshape(-1, 1))
y_fan = oe.fit_transform(y_fan.reshape(-1, 1))

In [177]:
y_auxHeat = y_auxHeat.reshape(y_auxHeat.shape[0], )
y_fan = y_fan.reshape(y_fan.shape[0], )

In [178]:
y_auxHeat = y_auxHeat.astype(int)
y_auxHeat = y_auxHeat.astype(str)

y_fan = y_fan.astype(int)
y_fan = y_fan.astype(str)

In [179]:
house_data['auxHeat1'] = y_auxHeat
house_data['fan'] = y_fan

In [180]:
house_data.head()

Unnamed: 0,index,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,0.974341,0,0,-0.3948,0.214569,1.506778
1,1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,0.974341,0,0,-0.3948,0.214569,1.506778
2,2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,0.974341,0,0,-0.3948,0.214569,1.506778
3,3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,1.203108,0,0,-0.3948,0.214569,1.506778
4,4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,-0.000647,-1.150791,1.203108,0,0,-0.3948,0.214569,1.506778


In [181]:
house_data.auxHeat1.unique()

array(['0', '8', '20', '19', '1', '18', '10', '15', '9', '17', '6', '11',
       '13', '5', '3', '16', '4', '7', '12', '2', '14'], dtype=object)

In [182]:
house_data.fan.unique()

array(['0', '8', '20', '2', '1', '10', '18', '9', '12', '15', '11', '13',
       '5', '6', '16', '7', '17', '4', '3', '19', '14'], dtype=object)

In [183]:
house_data.shape

(60014, 18)

In [184]:
house_data.to_csv("../preprocessed_data/std_test_data_d758c1cddadcdb76de2656eb20fdeab3b5959fba.csv", index = False)