In [2]:
import os
from __future__ import print_function, division
import pandas as pd
from datetime import timedelta
from time import strftime
import numpy as np

path = '/home/nilm/Desktop/III_NILM/data' # multi_group
os.chdir(path)

In [3]:
"""
    prepare training data from 'multi_group', which can be downloaded from google drive
    collection : data_in_dictionary, training data
    train_builing : IdList, the building for training
    bprob = IdProbility, the probabilities for the training builings. 
    activation_prob = ActivationProbility, the probabilities for the activations
    
""" 

"\n    prepare training data from 'multi_group', which can be downloaded from google drive\n    collection : data_in_dictionary, training data\n    train_builing : IdList, the building for training\n    bprob = IdProbility, the probabilities for the training builings. \n    activation_prob = ActivationProbility, the probabilities for the activations\n    \n"

In [33]:
train_builing = [14, 19, 28, 39]
bprob = []
collection = {} # training data
activation_prob = {} #  ActivationProbility

In [34]:
# prepare the related config
for item in sorted(train_builing):
    pathfile = os.path.join(path,'multi_group',str(item))
    aprob = []
    ActivationsList = os.listdir(pathfile)
    ActivationCollection = {}

    for activation in ActivationsList:
        activation_data = pd.read_csv(pathfile+'/'+activation, index_col=0)
        #print(len(activation_data))
        aprob.append(len(activation_data))
        ActivationCollection[str(activation[:-15])]=activation_data
    bprob.append(sum(aprob))
    collection['id_'+str(item)] =  ActivationCollection
    activation_prob['id_'+ str(item)] = [i/sum(aprob) for i in aprob]
building_prob = [i/sum(bprob) for i in bprob]

In [35]:
class Sequence(object):
    """
    Attributes
    ----------
    input : np.ndarray
    target : np.ndarray
    all_appliances : pd.DataFrame
        Column names are the appliance names.
    metadata : dict
    weights : np.ndarray or None
    """
    def __init__(self, shape, target_channels_in_list):
        self.input = np.zeros(shape, dtype=np.float32)
        self.target = {}
        for target_channel in target_channels_in_list:
            self.target[str(target_channel)] = np.zeros(shape, dtype=np.float32)
       


In [36]:
# the RealSource, which will replace the original one
class RealSource(object):
    
    def __init__(self, data_in_dictionary, target_channels_in_list, seq_length, IdList, IdProbility, ActivationProbility,
                num_seq_per_batch=32):
        self.data_in_dictionary = data_in_dictionary
        self.target_channels_in_list = target_channels_in_list
        self.seq_length = seq_length
        self.IdList = IdList
        self.IdProbility = IdProbility
        self.ActivationProbility = ActivationProbility
        self.num_seq_per_batch = num_seq_per_batch
    
    def _select_building(self, IdList, IdProbility):
        """
        For Example:
        
        _select_building(train_builing, building_prob), where
        train_builing = [1, 14, 19]
        building_prob = [0.5, 0.2, 0.3]
        """
        return np.random.choice(IdList, 1, p=IdProbility)
    
    def _select_activation(self, ActivationList, ActivationProbility):
        """
        For Example:
        
        _select_activation(range(len(activation_prob['id_1'])), activation_prob['id_1']), where
        range(len(activation_prob['id_1'])) = [0,1,2,3,5]
        activation_prob['id_1'] = [0.1, 0.1, 0.3, 0.2, 0.3]
        """
        return np.random.choice(ActivationList, 1, p=ActivationProbility)
    
    def get_seq_and_check(self, data, IdList, IdProbility, ActivationProbility):
        """
        get a batch of data
        For Example:
        get_seq_and_check(collection, train_builing, building_prob, activation_prob)
        collection = data
        train_builing = [1, 14, 19]
        building_prob = [0.5, 0.2, 0.3]
        activation_prob = {'id_1':[0.1, 0.1, 0.3, 0.2, 0.3], 
                            'id_14':[0.1, 0.1, 0.3, 0.2, 0.3],
                            'id_19':[0.1, 0.1, 0.3, 0.2, 0.3]}
        
        Warning:
        ------------------------------------------------------------------------
            Currently, setting max_iter == 120, the gap within select_start and end is self.seq_length*2 points
            If the gap is self.seq_length points, it will not success. The cause needs to be figured out
            In the prototype stage, using main as target
        """
        success_for_enough_data = False
        max_iter_for_enough_data = 0
        while not success_for_enough_data:
            max_iter_for_enough_data +=1
            select_building = self._select_building(IdList, IdProbility)[0]
            select_building = 'id_'+str(select_building)
            activation_prob_for_the_select_building = ActivationProbility[select_building ]
            select_activation = self._select_activation(range(len(activation_prob_for_the_select_building)), 
                                           activation_prob_for_the_select_building)[0]      
            get_seq_before_check = data[select_building][str(select_activation)]
            # double check that the index is datetime format
            get_seq_before_check.index = pd.to_datetime(get_seq_before_check.index)
            if len(get_seq_before_check)>=self.seq_length or  max_iter_for_enough_data >= 32 :
                 success_for_enough_data = True
      
        success = False
        max_iter = 0
        while not success:
            max_iter +=1
            select_start = get_seq_before_check.sample(n=1).index[0]
            end = select_start + timedelta(seconds = 60*self.seq_length*2) 
            if len(get_seq_before_check[select_start:end])>=self.seq_length or max_iter==120:
                success = True
                get_seq_after_check = get_seq_before_check[select_start:end]
        
        if max_iter==120:
            seq = None

        else:
            del get_seq_before_check
            #seq = Sequence(self.seq_length)
            seq = Sequence(self.seq_length, self.target_channels_in_list)
            seq.input = np.array(get_seq_after_check[self.target_channels_in_list[0]].values[:self.seq_length])
            #for target in self.target_channels_in_list[1:]:
            #    seq.target = np.array(get_seq_after_check[target].values[:self.seq_length])
            for target in self.target_channels_in_list[1:]:
                seq.target[str(target)] = np.array(get_seq_after_check[target].values[:self.seq_length])
        return seq
    
    def _get_sequence(self):
        seq=self.get_seq_and_check(data = self.data_in_dictionary, 
                                   IdList = self.IdList, 
                                   IdProbility = self.IdProbility, 
                                   ActivationProbility = self.ActivationProbility)
        return seq
    
    def get_batch(self):
        """
        Returns
        -------
        A Batch object or None if source iterator has hit a StopIteration.
        """

        input_sequences = []
        target_sequences = {}
        none_happened = False
        for target in self.target_channels_in_list[1:]:
                target_sequences[str(target)] = []

        for i in range(self.num_seq_per_batch):
            seq = self._get_sequence()
            
            if seq is None:
                none_happened = True
            else:
                input_sequences.append(seq.input.reshape(self.seq_length,1))
                #target_sequences.append(seq.target.reshape(self.seq_length,1))
                for target in self.target_channels_in_list[1:]:
                    target_sequences[str(target)].append(seq.target[str(target)].reshape(self.seq_length,1))
                
        if not none_happened:
            input_sequences = np.asarray(input_sequences).reshape(self.num_seq_per_batch,self.seq_length,1)
            #target_sequences = np.asarray(target_sequences).reshape(self.num_seq_per_batch,self.seq_length,1)
            for target in self.target_channels_in_list[1:]:
                target_sequences[str(target)] = np.asarray(target_sequences[str(target)]).reshape(self.num_seq_per_batch,self.seq_length,1)
        else:
            input_sequences = None
            target_sequences = None
            
        return input_sequences, target_sequences

    

In [37]:
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Activation, Reshape, Flatten, Conv1D, MaxPooling1D, Dropout, LSTM, TimeDistributed, Bidirectional
from keras.optimizers import RMSprop


MODEL_CONV_FILTERS = 32
MODEL_CONV_KERNEL_SIZE = 18
MODEL_CONV_STRIDES = 1
MODEL_CONV_PADDING = 'same'



seq_length = 60

# conv
x = Input(shape=(60,1))
conv_1 = Conv1D(filters=MODEL_CONV_FILTERS, kernel_size=MODEL_CONV_KERNEL_SIZE, padding=MODEL_CONV_PADDING, activation='relu')(x)
drop_1 = Dropout(0.12)(conv_1)

conv_2 = Conv1D(filters=64, kernel_size=12, padding=MODEL_CONV_PADDING, activation='relu')(drop_1)
drop_2 = Dropout(0.14)(conv_2)

conv_3 = Conv1D(filters=128, kernel_size=7, padding=MODEL_CONV_PADDING, activation='relu')(drop_2)
pool_3 = MaxPooling1D(pool_size=2)(conv_3)
drop_3 = Dropout(0.18)(pool_3)

conv_4 = Conv1D(filters=128, kernel_size=3, padding=MODEL_CONV_PADDING, activation='relu')(drop_3)
pool_4 = MaxPooling1D(pool_size=2)(conv_4)
drop_4 = Dropout(0.2)(pool_4)

# reshape
flat_4 = Flatten()(drop_4)

dense_5 = Dense(1280, activation='relu')(flat_4)
drop_5 = Dropout(0.16)(dense_5)

dense_6 = Dense(960, activation='relu')(drop_5)
drop_6 = Dropout(0.14)(dense_6)

dense_7 = Dense(720, activation='relu')(drop_6)
drop_7 = Dropout(0.12)(dense_7)

reshape_8 = Reshape(target_shape=(seq_length, 12))(drop_7)

outputs_disaggregation = []

for appliance_name in ['fridge','air conditioner']:
    biLSTM_1 = Bidirectional(LSTM(6, return_sequences=True))(reshape_8)
    biLSTM_2 = Bidirectional(LSTM(3, return_sequences=True))(biLSTM_1)
    outputs_disaggregation.append(TimeDistributed(Dense(1, activation='relu'), name=appliance_name.replace(" ", "_"))(biLSTM_2))

model = Model(inputs=x, outputs=outputs_disaggregation)
optimizer = RMSprop(lr=0.001, clipnorm=4)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae', 'mse'])




In [38]:
"""
    target_channel_in_list = ['main','main'] need to be like ['main','fridge','air conditioner']
"""

"\n    target_channel_in_list = ['main','main'] need to be like ['main','fridge','air conditioner']\n"

In [39]:
real = RealSource(data_in_dictionary = collection, 
                  target_channels_in_list = ['main','fridge','air conditioner'], 
                  seq_length=60, 
                  IdList = train_builing, 
                  IdProbility = building_prob,
                  ActivationProbility = activation_prob)
main, target = real.get_batch()

In [41]:
for i in range(5000):
    if i % 100 == 0:
        print('Step : {} , Time : {}\n'.format(i,strftime('%Y-%m-%d_%H_%M')))
    main, target = real.get_batch()
    while main is None or target is None:
        main, target = real.get_batch()
    train_metrics = model.train_on_batch(x=main, y=[target['fridge'], target['air conditioner']])
    if i % 100 == 0:
        for i, metrics_name in enumerate(model.metrics_names):
            print('{}={:.4f}, '.format(metrics_name, train_metrics[i]))
            print('')





Step : 0 , Time : 2018-06-21_23_07

loss=5754.4341, 

fridge_loss=5654.8901, 

air_conditioner_loss=99.5440, 

fridge_mean_absolute_error=42.8420, 

fridge_mean_squared_error=5654.8901, 

air_conditioner_mean_absolute_error=6.8266, 

air_conditioner_mean_squared_error=99.5440, 

Step : 100 , Time : 2018-06-21_23_08

loss=5294.1201, 

fridge_loss=5222.7607, 

air_conditioner_loss=71.3592, 

fridge_mean_absolute_error=41.6634, 

fridge_mean_squared_error=5222.7612, 

air_conditioner_mean_absolute_error=7.0252, 

air_conditioner_mean_squared_error=71.3592, 

Step : 200 , Time : 2018-06-21_23_09

loss=9257.2090, 

fridge_loss=5418.9023, 

air_conditioner_loss=3838.3064, 

fridge_mean_absolute_error=45.9283, 

fridge_mean_squared_error=5418.9033, 

air_conditioner_mean_absolute_error=14.1532, 

air_conditioner_mean_squared_error=3838.3047, 

Step : 300 , Time : 2018-06-21_23_09

loss=6208.2095, 

fridge_loss=6165.5967, 

air_conditioner_loss=42.6127, 

fridge_mean_absolute_error=46.8950, 



Step : 2900 , Time : 2018-06-21_23_27

loss=3882.2036, 

fridge_loss=3833.1074, 

air_conditioner_loss=49.0962, 

fridge_mean_absolute_error=40.8137, 

fridge_mean_squared_error=3833.1055, 

air_conditioner_mean_absolute_error=5.9585, 

air_conditioner_mean_squared_error=49.0963, 

Step : 3000 , Time : 2018-06-21_23_27

loss=4043.9434, 

fridge_loss=4004.0449, 

air_conditioner_loss=39.8985, 

fridge_mean_absolute_error=41.8536, 

fridge_mean_squared_error=4004.0496, 

air_conditioner_mean_absolute_error=4.9355, 

air_conditioner_mean_squared_error=39.8985, 

Step : 3100 , Time : 2018-06-21_23_28

loss=4727.2544, 

fridge_loss=4678.3252, 

air_conditioner_loss=48.9291, 

fridge_mean_absolute_error=45.8106, 

fridge_mean_squared_error=4678.3169, 

air_conditioner_mean_absolute_error=5.6762, 

air_conditioner_mean_squared_error=48.9291, 

Step : 3200 , Time : 2018-06-21_23_29

loss=5956.8047, 

fridge_loss=3678.8855, 

air_conditioner_loss=2277.9189, 

fridge_mean_absolute_error=39.1749,

In [42]:
model.save('multi_group__frige_ac.h5')