# Dealing with the low frequency data only

In [97]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display
import datetime
import time
import math
import warnings
warnings.filterwarnings("ignore")
import glob

In [98]:
def read_label():
    label = {}
    for i in range(1, 4):
        hi = 'low_freq/house_{}/labels.dat'.format(i)
        label[i] = {}
        with open(hi) as f:
            for line in f:
                splitted_line = line.split(' ')
                label[i][int(splitted_line[0])] = splitted_line[1].strip() + '_' + splitted_line[0]
    return label
labels = read_label()
for i in range(1,4):
    print('House {}: '.format(i), labels[i], '\n')

House 1:  {1: 'mains_1', 2: 'mains_2', 3: 'refrigerator_3', 4: 'dishwaser_4', 5: 'washer_dryer_5', 6: 'microwave_6', 7: 'washer_dryer_7', 8: 'washer_dryer_8'} 

House 2:  {1: 'mains_1', 2: 'mains_2', 3: 'kitchen_outlets_3', 4: 'lighting_4', 5: 'stove_5', 6: 'microwave_6', 7: 'washer_dryer_7', 8: 'kitchen_outlets_8', 9: 'refrigerator_9', 10: 'dishwaser_10', 11: 'disposal_11'} 

House 3:  {1: 'mains_1', 2: 'mains_2', 3: 'refrigerator_3', 4: 'dishwaser_4', 5: 'washer_dryer_5', 6: 'washer_dryer_6', 7: 'microwave_7'} 



In [99]:
def read_merge_data(house):
    path = 'low_freq/house_{}/'.format(house)
    file = path + 'channel_1.dat'
    df = pd.read_table(file, sep = ' ', names = ['unix_time', labels[house][1]], 
                                       dtype = {'unix_time': 'int64', labels[house][1]:'float64'}) 
    
    num_apps = len(glob.glob(path + 'channel*'))
    for i in range(2, num_apps + 1):
        file = path + 'channel_{}.dat'.format(i)
        data = pd.read_table(file, sep = ' ', names = ['unix_time', labels[house][i]], 
                                       dtype = {'unix_time': 'int64', labels[house][i]:'float64'})
        df = pd.merge(df, data, how = 'inner', on = 'unix_time')
    df['timestamp'] = df['unix_time'].astype("datetime64[s]")
    df = df.set_index(df['timestamp'].values)
    df.drop(['unix_time','timestamp'], axis=1, inplace=True)
    return df
df = {}
for i in range(1,4):
    df[i] = read_merge_data(i)

In [100]:
for i in range(1,4):
    print('House {} data has shape: '.format(i), df[i].shape)
    display(df[i].head())

House 1 data has shape:  (406748, 8)


Unnamed: 0,mains_1,mains_2,refrigerator_3,dishwaser_4,washer_dryer_5,microwave_6,washer_dryer_7,washer_dryer_8
2011-04-18 13:22:13,222.2,118.83,6.0,0.0,0.0,5.0,0.0,0.0
2011-04-18 13:22:16,223.17,119.19,6.0,0.0,0.0,5.0,0.0,0.0
2011-04-18 13:22:20,223.6,118.92,6.0,0.0,0.0,5.0,0.0,0.0
2011-04-18 13:22:23,222.91,119.16,6.0,1.0,0.0,5.0,0.0,0.0
2011-04-18 13:22:26,222.94,118.83,6.0,0.0,0.0,5.0,0.0,0.0


House 2 data has shape:  (316840, 11)


Unnamed: 0,mains_1,mains_2,kitchen_outlets_3,lighting_4,stove_5,microwave_6,washer_dryer_7,kitchen_outlets_8,refrigerator_9,dishwaser_10,disposal_11
2011-04-18 05:31:40,15.71,22.61,1.0,8.0,1.0,4.0,4.0,4.0,6.0,1.0,0.0
2011-04-18 05:31:44,15.71,22.61,0.0,8.0,1.0,5.0,5.0,4.0,6.0,0.0,0.0
2011-04-18 05:31:47,15.72,22.61,1.0,8.0,1.0,4.0,4.0,4.0,6.0,1.0,0.0
2011-04-18 05:31:50,15.7,22.57,0.0,8.0,1.0,4.0,4.0,3.0,7.0,0.0,0.0
2011-04-18 05:32:05,15.64,22.61,1.0,8.0,1.0,4.0,4.0,3.0,7.0,1.0,0.0


House 3 data has shape:  (376150, 7)


Unnamed: 0,mains_1,mains_2,refrigerator_3,dishwaser_4,washer_dryer_5,washer_dryer_6,microwave_7
2011-04-16 05:11:30,181.34,577.3,119.0,0.0,0.0,0.0,2.0
2011-04-16 05:11:33,180.74,577.39,118.0,1.0,0.0,0.0,2.0
2011-04-16 05:11:36,182.0,577.88,119.0,0.0,0.0,0.0,2.0
2011-04-16 05:11:40,181.44,578.44,119.0,1.0,0.0,0.0,2.0
2011-04-16 05:11:43,179.95,575.92,117.0,0.0,0.0,0.0,2.0


### House 1 (dat to csv)

In [101]:
ddf = df[1].reset_index() # same as df.reset_index(level=0)
ddf.rename(columns = {'index':'Time'}, inplace = True)
ddf.to_csv('House1.csv', index = False) # this is for house 1 of the low frequency REDD dataset
ddf
house1 = ddf
house1

Unnamed: 0,Time,mains_1,mains_2,refrigerator_3,dishwaser_4,washer_dryer_5,microwave_6,washer_dryer_7,washer_dryer_8
0,2011-04-18 13:22:13,222.20,118.83,6.0,0.0,0.0,5.0,0.0,0.0
1,2011-04-18 13:22:16,223.17,119.19,6.0,0.0,0.0,5.0,0.0,0.0
2,2011-04-18 13:22:20,223.60,118.92,6.0,0.0,0.0,5.0,0.0,0.0
3,2011-04-18 13:22:23,222.91,119.16,6.0,1.0,0.0,5.0,0.0,0.0
4,2011-04-18 13:22:26,222.94,118.83,6.0,0.0,0.0,5.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
406743,2011-05-24 19:56:20,235.73,38.65,186.0,1.0,0.0,4.0,0.0,0.0
406744,2011-05-24 19:56:23,235.03,38.66,187.0,0.0,0.0,4.0,0.0,0.0
406745,2011-05-24 19:56:27,235.46,38.61,190.0,0.0,0.0,4.0,0.0,0.0
406746,2011-05-24 19:56:30,235.98,38.77,189.0,0.0,0.0,4.0,0.0,0.0


In [102]:
dates = {}
dates = [str(time)[:10] for time in ddf.Time.values]
dates = sorted(list(set(dates)))
print('House {0} data contain {1} days from {2} to {3}.'.format(1,len(dates),dates[0], dates[-1]))

House 1 data contain 23 days from 2011-04-18 to 2011-05-24.


### House 3 (dat to csv)

In [103]:
ddf = df[3].reset_index() # same as df.reset_index(level=0)
ddf.rename(columns = {'index':'Time'}, inplace = True)
ddf.to_csv('House3.csv', index = False) # this is for house 1 of the low frequency REDD dataset
house3 = ddf
house3

Unnamed: 0,Time,mains_1,mains_2,refrigerator_3,dishwaser_4,washer_dryer_5,washer_dryer_6,microwave_7
0,2011-04-16 05:11:30,181.34,577.30,119.0,0.0,0.0,0.0,2.0
1,2011-04-16 05:11:33,180.74,577.39,118.0,1.0,0.0,0.0,2.0
2,2011-04-16 05:11:36,182.00,577.88,119.0,0.0,0.0,0.0,2.0
3,2011-04-16 05:11:40,181.44,578.44,119.0,1.0,0.0,0.0,2.0
4,2011-04-16 05:11:43,179.95,575.92,117.0,0.0,0.0,0.0,2.0
...,...,...,...,...,...,...,...,...
376145,2011-05-31 00:19:23,15.47,2.23,8.0,3.0,0.0,0.0,1.0
376146,2011-05-31 00:19:26,15.48,2.23,8.0,3.0,0.0,0.0,2.0
376147,2011-05-31 00:19:30,15.45,2.23,8.0,3.0,0.0,0.0,2.0
376148,2011-05-31 00:19:33,15.42,2.23,8.0,3.0,0.0,0.0,2.0


In [104]:
dates = {}
dates = [str(time)[:10] for time in ddf.Time.values]
dates = sorted(list(set(dates)))
print('House {0} data contain {1} days from {2} to {3}.'.format(3,len(dates),dates[0], dates[-1]))

House 3 data contain 26 days from 2011-04-16 to 2011-05-31.
