In [4]:
import pandas as pd
import os

"""
Data is organized into a dictionary where key=house number and value= pandas dataframe for that house
Each house dataframe is composed of columns=appliance and rows=timestamp
For our 2 appliances I chose lighting and washer/dryer. Once the dataframes are built, I exclude all other appliances from the dataframe
My thought is to train on houses 1-4, test on houses 5-6
I use abbreviation l_wd to stand for lighting/washer_dryer
"""

cwd = os.getcwd()
path = f'{cwd}/'


def gather_all_files(path):

    all_files = []

    for house_num in range(1, 7):
        house_file = f'{path}low_freq/house_{house_num}'
        all_files.append(house_file)

    return all_files


def create_dataframes(all_files):

    house_data_dict = {}
    house_num = 1
    for file in all_files:

        with open(f'{file}/labels.dat') as f:
            labels = f.read().splitlines()
            for i in range(len(labels)):
                labels[i] = labels[i].replace(' ', '_')

        df = pd.read_table(f'{file}/channel_1.dat', sep=' ', names=['timestamp', labels[0]])
        df['timestamp'] = df['timestamp'].astype("datetime64[s]")

        for i in range(1, len(labels)):

            data = pd.read_table(f'{file}/channel_{i+1}.dat', sep=' ', names=['timestamp', labels[i]])
            data['timestamp'] = data['timestamp'].astype("datetime64[s]")
            df = pd.merge(df, data, how='inner', on='timestamp')

        df = df.set_index(df['timestamp'].values)
        df.drop(['timestamp'], axis=1, inplace=True)
        df.index.name = 'timestamp'

        house_data_dict[house_num] = df
        house_num += 1

    return house_data_dict


def select_appliances(house_data_dict):

    reduced_house_data_dict = {}
    for i in range(1, 7):
        df = house_data_dict[i]
        l_wd_cols = [col for col in df.columns if 'lighting' in col]
        l_wd_cols += [col for col in df.columns if 'washer_dryer' in col]
        df = df[l_wd_cols]
        reduced_house_data_dict[i] = df

    return reduced_house_data_dict


all_files = gather_all_files(path)
house_data_dict = create_dataframes(all_files)
reduced_house_data_dict = select_appliances(house_data_dict)

for i in range(1, 7):

    print(f'House {i} Shape: {reduced_house_data_dict[i].shape}')
    print(f'First 3 Rows House {i}: {reduced_house_data_dict[i].head(3)}')

House 1 Shape: (406748, 6)
First 3 Rows House 1:                      9_lighting  17_lighting  18_lighting  10_washer_dryer  \
timestamp                                                                    
2011-04-18 13:22:13        81.0         65.0         46.0              0.0   
2011-04-18 13:22:16        81.0         65.0         46.0              0.0   
2011-04-18 13:22:20        81.0         65.0         46.0              0.0   

                     19_washer_dryer  20_washer_dryer  
timestamp                                              
2011-04-18 13:22:13              0.0              0.0  
2011-04-18 13:22:16              0.0              0.0  
2011-04-18 13:22:20              0.0              0.0  
House 2 Shape: (316840, 2)
First 3 Rows House 2:                      4_lighting  7_washer_dryer
timestamp                                      
2011-04-18 05:31:40         8.0             4.0
2011-04-18 05:31:44         8.0             5.0
2011-04-18 05:31:47         8.0        

In [5]:
df = reduced_house_data_dict[1]
df['lighting_total'] = df['9_lighting'] + df['17_lighting'] + df['18_lighting']
df['washer_dryer_total'] = df['10_washer_dryer'] + df['19_washer_dryer'] + df['20_washer_dryer']
df['energy_total'] = df['lighting_total'] + df['washer_dryer_total']
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0_level_0,9_lighting,17_lighting,18_lighting,10_washer_dryer,19_washer_dryer,20_washer_dryer,lighting_total,washer_dryer_total,energy_total
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2011-04-18 13:22:13,81.0,65.0,46.0,0.0,0.0,0.0,192.0,0.0,192.0
2011-04-18 13:22:16,81.0,65.0,46.0,0.0,0.0,0.0,192.0,0.0,192.0
2011-04-18 13:22:20,81.0,65.0,46.0,0.0,0.0,0.0,192.0,0.0,192.0
2011-04-18 13:22:23,81.0,65.0,46.0,0.0,0.0,0.0,192.0,0.0,192.0
2011-04-18 13:22:26,81.0,65.0,46.0,0.0,0.0,0.0,192.0,0.0,192.0
...,...,...,...,...,...,...,...,...,...
2011-05-24 19:56:20,2.0,0.0,1.0,0.0,0.0,0.0,3.0,0.0,3.0
2011-05-24 19:56:23,2.0,0.0,1.0,0.0,0.0,0.0,3.0,0.0,3.0
2011-05-24 19:56:27,2.0,0.0,1.0,0.0,0.0,0.0,3.0,0.0,3.0
2011-05-24 19:56:30,2.0,0.0,1.0,0.0,0.0,0.0,3.0,0.0,3.0


In [25]:
X = list(df['energy_total'])
X = [[i] for i in X]
y = [df['lighting_total'], df['washer_dryer_total']]
y = list(zip(*y))
X

[[192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [193.0],
 [192.0],
 [193.0],
 [193.0],
 [192.0],
 [192.0],
 [193.0],
 [192.0],
 [192.0],
 [194.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [193.0],
 [193.0],
 [193.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [193.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [193.0],
 [192.0],
 [192.0],
 [191.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [190.0],
 [194.5],
 [192.0],
 [192.0],
 [191.0],
 [192.0],
 [192.0],
 [192.0],
 [193.0],
 [193.0],
 [193.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [191.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [191.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [192.0],
 [191.0],
 [191.0],
 [191.0],
 [191.0],
 [191.0],
 [192.0],
 [191.0],
 [191.0],
 [191.0],
 [191.0],
 [190.0],
 [191.0],


In [28]:
# multivariate cnn example
from numpy import array
from numpy import hstack
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Input
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import GlobalMaxPooling1D

# define model
model = Sequential()

model.add(Input(shape=(1,1)))
model.add(Conv1D(filters=64, kernel_size=2, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=2, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))

model.add(GlobalMaxPooling1D())

model.add(Dense(512))
model.add(Dense(2, activation='relu'))
model.compile(optimizer='adam', loss='mse')
# fit model

model.fit(X,y, epochs=1000, verbose=0)



ValueError: Negative dimension size caused by subtracting 2 from 1 for '{{node conv1d_77/conv1d}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](conv1d_77/conv1d/ExpandDims, conv1d_77/conv1d/ExpandDims_1)' with input shapes: [?,1,1,1], [1,2,1,64].