In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split
from pickle import load

In [2]:
house_data = pd.read_csv("../raw_data/6c83e118e0c6dde25b3aba4935f902a0291b95a7.csv")

In [3]:
house_data.head()

Unnamed: 0,DateTime,HvacMode,Event,Schedule,T_ctrl,T_stp_cool,T_stp_heat,Humidity,HumidityExpectedLow,HumidityExpectedHigh,...,Remote_Sensor_7_Temperature,Remote_Sensor_7_Motion,Remote_Sensor_8_Temperature,Remote_Sensor_8_Motion,Remote_Sensor_9_Temperature,Remote_Sensor_9_Motion,Remote_Sensor_10_Temperature,Remote_Sensor_10_Motion,T_out,RH_out
0,2019-01-01 00:00:00,heat,,Sleep,68.0,82.0,68.0,44.0,0.0,0.0,...,,,,,,,,,7.0,83.0
1,2019-01-01 00:05:00,heat,,Sleep,68.0,82.0,68.0,44.0,0.0,0.0,...,,,,,,,,,7.0,83.0
2,2019-01-01 00:10:00,heat,,Sleep,68.0,82.0,68.0,44.0,0.0,0.0,...,,,,,,,,,7.0,83.0
3,2019-01-01 00:15:00,heat,,Sleep,68.0,82.0,68.0,44.0,0.0,0.0,...,,,,,,,,,7.0,83.0
4,2019-01-01 00:20:00,heat,,Sleep,68.0,82.0,68.0,45.0,0.0,0.0,...,,,,,,,,,7.0,83.0


In [4]:
house_data.dtypes

DateTime                         object
HvacMode                         object
Event                            object
Schedule                         object
T_ctrl                          float64
T_stp_cool                      float64
T_stp_heat                      float64
Humidity                        float64
HumidityExpectedLow             float64
HumidityExpectedHigh            float64
auxHeat1                        float64
auxHeat2                        float64
auxHeat3                        float64
compCool1                       float64
compCool2                       float64
compHeat1                       float64
compHeat2                       float64
fan                             float64
Thermostat_Temperature          float64
Thermostat_Motion               float64
Remote_Sensor_1_Temperature     float64
Remote_Sensor_1_Motion          float64
Remote_Sensor_2_Temperature     float64
Remote_Sensor_2_Motion          float64
Remote_Sensor_3_Temperature     float64


In [5]:
house_data.shape

(96768, 42)

In [6]:
house_data.isnull().sum()

DateTime                            0
HvacMode                         4238
Event                           82242
Schedule                         4238
T_ctrl                           4238
T_stp_cool                       4469
T_stp_heat                       4238
Humidity                         4238
HumidityExpectedLow              4238
HumidityExpectedHigh             4238
auxHeat1                         4224
auxHeat2                         4224
auxHeat3                         4224
compCool1                        4224
compCool2                        4224
compHeat1                        4224
compHeat2                        4224
fan                              4224
Thermostat_Temperature           4226
Thermostat_Motion                4226
Remote_Sensor_1_Temperature     86947
Remote_Sensor_1_Motion          86947
Remote_Sensor_2_Temperature     96768
Remote_Sensor_2_Motion          96768
Remote_Sensor_3_Temperature     96768
Remote_Sensor_3_Motion          96768
Remote_Senso

### Drop All Columns with No Data

In [7]:
house_data = house_data.drop(labels = ['Remote_Sensor_1_Temperature', 'Remote_Sensor_1_Motion', 'Remote_Sensor_2_Temperature', 'Remote_Sensor_2_Motion', 'Remote_Sensor_3_Temperature', 'Remote_Sensor_3_Motion', 'Remote_Sensor_4_Temperature', 'Remote_Sensor_4_Motion', 'Remote_Sensor_5_Temperature', 'Remote_Sensor_5_Motion', 'Remote_Sensor_6_Temperature', 'Remote_Sensor_6_Motion', 'Remote_Sensor_7_Temperature', 'Remote_Sensor_7_Motion', 'Remote_Sensor_8_Temperature', 'Remote_Sensor_8_Motion', 'Remote_Sensor_9_Temperature', 'Remote_Sensor_9_Motion', 'Remote_Sensor_10_Temperature', 'Remote_Sensor_10_Motion'], axis = 1)

In [8]:
house_data = house_data.drop(labels = ['auxHeat2', 'auxHeat3', 'compHeat1', 'compCool2', 'compHeat2'], axis = 1)

In [9]:
house_data = house_data.drop(labels = ['HumidityExpectedLow', 'HumidityExpectedHigh'], axis = 1)

In [10]:
house_data.drop(labels = ['Thermostat_Motion'], axis = 1, inplace = True)

In [11]:
house_data.HvacMode.value_counts()

off     52295
heat    40235
Name: HvacMode, dtype: int64

In [12]:
house_data.shape

(96768, 14)

In [13]:
house_data.head()

Unnamed: 0,DateTime,HvacMode,Event,Schedule,T_ctrl,T_stp_cool,T_stp_heat,Humidity,auxHeat1,compCool1,fan,Thermostat_Temperature,T_out,RH_out
0,2019-01-01 00:00:00,heat,,Sleep,68.0,82.0,68.0,44.0,0.0,0.0,0.0,68.0,7.0,83.0
1,2019-01-01 00:05:00,heat,,Sleep,68.0,82.0,68.0,44.0,0.0,0.0,0.0,68.0,7.0,83.0
2,2019-01-01 00:10:00,heat,,Sleep,68.0,82.0,68.0,44.0,0.0,0.0,0.0,68.0,7.0,83.0
3,2019-01-01 00:15:00,heat,,Sleep,68.0,82.0,68.0,44.0,0.0,0.0,0.0,68.0,7.0,83.0
4,2019-01-01 00:20:00,heat,,Sleep,68.0,82.0,68.0,45.0,0.0,0.0,0.0,68.0,7.0,83.0


In [14]:
house_data.isnull().sum()

DateTime                      0
HvacMode                   4238
Event                     82242
Schedule                   4238
T_ctrl                     4238
T_stp_cool                 4469
T_stp_heat                 4238
Humidity                   4238
auxHeat1                   4224
compCool1                  4224
fan                        4224
Thermostat_Temperature     4226
T_out                      1230
RH_out                     1230
dtype: int64

### Drop All Rows with NaN Values

In [15]:
null_list = house_data[house_data['auxHeat1'].isnull()].index.tolist()

In [16]:
house_data = house_data.drop(null_list)

In [17]:
house_data.isnull().sum()

DateTime                      0
HvacMode                     23
Event                     78018
Schedule                     23
T_ctrl                       23
T_stp_cool                  254
T_stp_heat                   23
Humidity                     23
auxHeat1                      0
compCool1                     0
fan                           0
Thermostat_Temperature       11
T_out                      1195
RH_out                     1195
dtype: int64

In [18]:
null_list = house_data[house_data['HvacMode'].isnull()].index.tolist()

In [19]:
house_data = house_data.drop(null_list)

In [20]:
house_data.isnull().sum()

DateTime                      0
HvacMode                      0
Event                     77995
Schedule                      0
T_ctrl                        0
T_stp_cool                  231
T_stp_heat                    0
Humidity                      0
auxHeat1                      0
compCool1                     0
fan                           0
Thermostat_Temperature        0
T_out                      1195
RH_out                     1195
dtype: int64

### Drop All Rows with Missing T_out Values

In [21]:
null_list = house_data[house_data['T_out'].isnull()].index.tolist()

In [22]:
house_data = house_data.drop(null_list)

In [23]:
house_data.isnull().sum()

DateTime                      0
HvacMode                      0
Event                     77395
Schedule                      0
T_ctrl                        0
T_stp_cool                  231
T_stp_heat                    0
Humidity                      0
auxHeat1                      0
compCool1                     0
fan                           0
Thermostat_Temperature        0
T_out                         0
RH_out                        0
dtype: int64

In [24]:
null_list = house_data[house_data['Thermostat_Temperature'].isnull()].index.tolist()

In [25]:
house_data = house_data.drop(null_list)

In [26]:
house_data.isnull().sum()

DateTime                      0
HvacMode                      0
Event                     77395
Schedule                      0
T_ctrl                        0
T_stp_cool                  231
T_stp_heat                    0
Humidity                      0
auxHeat1                      0
compCool1                     0
fan                           0
Thermostat_Temperature        0
T_out                         0
RH_out                        0
dtype: int64

### Rename Null Values in 'Event' to - None

In [27]:
house_data["Event"].fillna("None", inplace = True)

In [28]:
house_data.isnull().sum()

DateTime                    0
HvacMode                    0
Event                       0
Schedule                    0
T_ctrl                      0
T_stp_cool                231
T_stp_heat                  0
Humidity                    0
auxHeat1                    0
compCool1                   0
fan                         0
Thermostat_Temperature      0
T_out                       0
RH_out                      0
dtype: int64

In [29]:
null_list = house_data[house_data['T_stp_heat'].isnull()].index.tolist()

In [30]:
house_data = house_data.drop(null_list)

In [31]:
house_data.isnull().sum()

DateTime                    0
HvacMode                    0
Event                       0
Schedule                    0
T_ctrl                      0
T_stp_cool                231
T_stp_heat                  0
Humidity                    0
auxHeat1                    0
compCool1                   0
fan                         0
Thermostat_Temperature      0
T_out                       0
RH_out                      0
dtype: int64

In [32]:
null_list = house_data[house_data['T_stp_cool'].isnull()].index.tolist()

In [33]:
house_data = house_data.drop(null_list)

In [34]:
house_data.isnull().sum()

DateTime                  0
HvacMode                  0
Event                     0
Schedule                  0
T_ctrl                    0
T_stp_cool                0
T_stp_heat                0
Humidity                  0
auxHeat1                  0
compCool1                 0
fan                       0
Thermostat_Temperature    0
T_out                     0
RH_out                    0
dtype: int64

### Drop All Rows Where HVAC Mode is 'Cool' & 'Auto' (Summer Months)

In [35]:
cool_list = house_data[house_data['HvacMode'] == 'cool'].index.tolist()

In [36]:
house_data.shape

(91095, 14)

In [37]:
len(cool_list)

0

In [38]:
house_data = house_data.drop(cool_list)

In [39]:
auto_list = house_data[house_data['HvacMode'] == 'auto'].index.tolist()

In [40]:
house_data = house_data.drop(auto_list)

In [41]:
house_data.shape

(91095, 14)

In [42]:
off_list = house_data[house_data['HvacMode'] == 'off'].index.tolist()

In [43]:
house_data = house_data.drop(off_list)

In [44]:
house_data.shape

(39122, 14)

### Drop Cooling Source Column

In [45]:
house_data = house_data.drop(labels = ['compCool1', 'T_stp_cool'], axis = 1)

In [46]:
house_data.isnull().sum()

DateTime                  0
HvacMode                  0
Event                     0
Schedule                  0
T_ctrl                    0
T_stp_heat                0
Humidity                  0
auxHeat1                  0
fan                       0
Thermostat_Temperature    0
T_out                     0
RH_out                    0
dtype: int64

In [47]:
null_list = house_data[house_data['T_stp_heat'].isnull()].index.tolist()

In [48]:
house_data = house_data.drop(null_list)

In [49]:
house_data.isnull().sum()

DateTime                  0
HvacMode                  0
Event                     0
Schedule                  0
T_ctrl                    0
T_stp_heat                0
Humidity                  0
auxHeat1                  0
fan                       0
Thermostat_Temperature    0
T_out                     0
RH_out                    0
dtype: int64

### DateTime Manipulation

In [50]:
house_data.DateTime.dtype

dtype('O')

In [51]:
house_data['DateTime'] = pd.to_datetime(house_data['DateTime'])

In [52]:
house_data.DateTime

0       2019-01-01 00:00:00
1       2019-01-01 00:05:00
2       2019-01-01 00:10:00
3       2019-01-01 00:15:00
4       2019-01-01 00:20:00
                ...        
96763   2019-12-31 23:35:00
96764   2019-12-31 23:40:00
96765   2019-12-31 23:45:00
96766   2019-12-31 23:50:00
96767   2019-12-31 23:55:00
Name: DateTime, Length: 39122, dtype: datetime64[ns]

In [53]:
house_data['Month'] = pd.DatetimeIndex(house_data['DateTime']).month

In [54]:
weekDays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

In [55]:
house_data['DayOfWeek'] = pd.DatetimeIndex(house_data['DateTime']).dayofweek

In [56]:
house_data['HourofDay'] = pd.DatetimeIndex(house_data['DateTime']).hour

In [57]:
house_data['Day'] = pd.DatetimeIndex(house_data['DateTime']).day

In [58]:
house_data.columns

Index(['DateTime', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat',
       'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out',
       'RH_out', 'Month', 'DayOfWeek', 'HourofDay', 'Day'],
      dtype='object')

In [59]:
cols = house_data.columns.tolist()
print(cols)

['DateTime', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out', 'RH_out', 'Month', 'DayOfWeek', 'HourofDay', 'Day']


In [60]:
cols = cols[-4:] + cols[:-4]
print(cols)

['Month', 'DayOfWeek', 'HourofDay', 'Day', 'DateTime', 'HvacMode', 'Event', 'Schedule', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out', 'RH_out']


In [61]:
house_data = house_data[cols]

In [62]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,Day,DateTime,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,1,1,0,1,2019-01-01 00:00:00,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
1,1,1,0,1,2019-01-01 00:05:00,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
2,1,1,0,1,2019-01-01 00:10:00,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
3,1,1,0,1,2019-01-01 00:15:00,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
4,1,1,0,1,2019-01-01 00:20:00,heat,,Sleep,68.0,68.0,45.0,0.0,0.0,68.0,7.0,83.0


In [63]:
house_data.drop(labels = ['DateTime'], axis = 1, inplace = True)

In [64]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,Day,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,1,1,0,1,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
1,1,1,0,1,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
2,1,1,0,1,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
3,1,1,0,1,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
4,1,1,0,1,heat,,Sleep,68.0,68.0,45.0,0.0,0.0,68.0,7.0,83.0


#### Analyzing the Categorical Variables

In [65]:
house_data.head()

Unnamed: 0,Month,DayOfWeek,HourofDay,Day,HvacMode,Event,Schedule,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,1,1,0,1,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
1,1,1,0,1,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
2,1,1,0,1,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
3,1,1,0,1,heat,,Sleep,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
4,1,1,0,1,heat,,Sleep,68.0,68.0,45.0,0.0,0.0,68.0,7.0,83.0


In [66]:
house_data.columns

Index(['Month', 'DayOfWeek', 'HourofDay', 'Day', 'HvacMode', 'Event',
       'Schedule', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan',
       'Thermostat_Temperature', 'T_out', 'RH_out'],
      dtype='object')

In [67]:
house_data.drop(labels = ['HvacMode', 'Event', 'Schedule'], axis = 1, inplace = True)

#### Feature Engineering for Cyclical Features (HourofDay, Month & DayofWeek)

In [68]:
house_data.Month.unique()

array([ 1,  2,  3,  4,  5,  9, 10, 11, 12], dtype=int64)

In [69]:
house_data.HourofDay.unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23], dtype=int64)

In [70]:
house_data.DayOfWeek.unique()

array([1, 2, 3, 4, 6, 0, 5], dtype=int64)

In [71]:
house_data['hour_sin'] = np.sin(house_data.HourofDay * (2. * np.pi / 24))
house_data['hour_cos'] = np.cos(house_data.HourofDay * (2. * np.pi / 24))
house_data['month_sin'] = np.sin((house_data.Month - 1) * (2. * np.pi / 12))
house_data['month_cos'] = np.cos((house_data.Month - 1) * (2. * np.pi / 12))
house_data['day_sin'] = np.sin(house_data.DayOfWeek) * (2. * np.pi / 7)
house_data['day_cos'] = np.cos(house_data.DayOfWeek) * (2. * np.pi / 7)

In [72]:
cols = house_data.columns.tolist()
print(cols)

['Month', 'DayOfWeek', 'HourofDay', 'Day', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out', 'RH_out', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos']


In [73]:
cols = cols[-6:] + cols[:-6]
print(cols)

['hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_sin', 'day_cos', 'Month', 'DayOfWeek', 'HourofDay', 'Day', 'T_ctrl', 'T_stp_heat', 'Humidity', 'auxHeat1', 'fan', 'Thermostat_Temperature', 'T_out', 'RH_out']


In [74]:
house_data = house_data[cols]

In [75]:
house_data.head()

Unnamed: 0,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,Day,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,68.0,68.0,44.0,0.0,0.0,68.0,7.0,83.0
4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,68.0,68.0,45.0,0.0,0.0,68.0,7.0,83.0


### Data Pre-Processing

In [76]:
categorical_vars = ['HvacMode', 'Event', 'Schedule']
numerical_vars = ['T_ctrl', 'T_stp_heat', 'Humidity', 'Thermostat_Temperature', 'T_out', 'RH_out']

#### Standardizing the Numerical Features

In [77]:
sc = StandardScaler()
house_data[numerical_vars] = sc.fit_transform(house_data[numerical_vars])

In [78]:
#sc = load(open('scaler.pkl', 'rb'))
#house_data[numerical_vars] = sc.transform(house_data[numerical_vars])

In [79]:
import sklearn

In [80]:
sklearn.__version__

'0.22.2.post1'

In [81]:
house_data.head()

Unnamed: 0,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,Day,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.068059,0.0,0.0,0.327427,-1.046529,0.605452
1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.068059,0.0,0.0,0.327427,-1.046529,0.605452
2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.068059,0.0,0.0,0.327427,-1.046529,0.605452
3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.068059,0.0,0.0,0.327427,-1.046529,0.605452
4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.238499,0.0,0.0,0.327427,-1.046529,0.605452


In [82]:
house_data.reset_index(inplace = True)

#### Ordinal Encoding the 'fan' Feature & 'auxHeat1' Feature

In [83]:
y_auxHeat = house_data['auxHeat1'].to_numpy()
y_fan = house_data['fan'].to_numpy()

In [84]:
oe = OrdinalEncoder()
y_auxHeat = oe.fit_transform(y_auxHeat.reshape(-1, 1))
y_fan = oe.fit_transform(y_fan.reshape(-1, 1))

In [85]:
y_auxHeat = y_auxHeat.reshape(y_auxHeat.shape[0], )
y_fan = y_fan.reshape(y_fan.shape[0], )

In [86]:
y_auxHeat = y_auxHeat.astype(int)
y_auxHeat = y_auxHeat.astype(str)

y_fan = y_fan.astype(int)
y_fan = y_fan.astype(str)

In [87]:
house_data['auxHeat1'] = y_auxHeat
house_data['fan'] = y_fan

In [88]:
house_data.head()

Unnamed: 0,index,hour_sin,hour_cos,month_sin,month_cos,day_sin,day_cos,Month,DayOfWeek,HourofDay,Day,T_ctrl,T_stp_heat,Humidity,auxHeat1,fan,Thermostat_Temperature,T_out,RH_out
0,0,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.068059,0,0,0.327427,-1.046529,0.605452
1,1,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.068059,0,0,0.327427,-1.046529,0.605452
2,2,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.068059,0,0,0.327427,-1.046529,0.605452
3,3,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.068059,0,0,0.327427,-1.046529,0.605452
4,4,0.0,1.0,0.0,1.0,0.755303,0.484974,1,1,0,1,0.358703,0.710844,0.238499,0,0,0.327427,-1.046529,0.605452


In [89]:
house_data.auxHeat1.unique()

array(['0', '2', '20', '7', '17', '1', '5', '6', '9', '10', '19', '15',
       '11', '18', '8', '3', '4', '12', '16', '14', '13'], dtype=object)

In [90]:
house_data.fan.unique()

array(['0', '2', '20', '3', '7', '1', '8', '6', '12', '5', '10', '17',
       '4', '18', '11', '9', '14', '16', '13', '19', '15'], dtype=object)

In [91]:
house_data.shape

(39122, 19)

In [92]:
house_data.to_csv("../preprocessed_data/std_test_data_6c83e118e0c6dde25b3aba4935f902a0291b95a7.csv", index = False)