In [1]:
import numpy as np
import h5py
import time
import logging
import matplotlib.pyplot as plt
import pandas as pd
import random

In [11]:
    def read_data( hf, target_device, house_list):
        """Load data of houses
        """

        if len(house_list) == 0:
            return [], []

        else:
            aggregates = []
            targets = []
            timestamps=[]
        
            for house in house_list:

                aggregate = hf[house]['aggregate'][:]
                target = hf[house][target_device][:]
                timestamp=hf[house]['unix'][:]
                
                aggregates.append(aggregate)
                targets.append(target)
                timestamps.append(timestamp)
            aggregates = np.concatenate(aggregates, axis=0)
            targets = np.concatenate(targets, axis=0)
            timestamps=np.concatenate(timestamps,axis=0)
            
            return aggregates, targets,timestamps

In [4]:
def extract(application,house,m_off,m_on,m_threshold):
    load_time = time.time()
    target_device=application
    train_house_list=[house]
    hf = h5py.File('data-raw.h5', 'r')
    (train_x, train_y,timestamps) = read_data(hf, target_device, train_house_list)
    index = timestamps
    data = train_y
    chunk = pd.Series(data, index=index)
    min_off_duration=m_off
    min_on_duration=m_on
    border=1
    on_power_threshold=m_threshold
    when_on = chunk >= on_power_threshold


    state_changes = when_on.astype(np.int8).diff()
    
    switch_on_events = np.where(state_changes == 1)[0]
    switch_off_events = np.where(state_changes == -1)[0]
    
    del state_changes

    

    
    if switch_off_events[0] < switch_on_events[0]:
        switch_off_events = switch_off_events[1:]
        
           
    if switch_on_events[-1] > switch_off_events[-1]:
        switch_on_events = switch_on_events[:-1]
        
    assert len(switch_on_events) == len(switch_off_events)
    a=chunk.index[switch_on_events[1:]]
    b=chunk.index[switch_off_events[:-1]]
    c=[]
    d=[]
    for i in range(0,len(a)):
        c.append(a[i])
    off_durations=[]

    for j in range(0,len(b)):
        d.append(b[j])

    if min_off_duration > 0:
        off_durations = np.array(c) - np.array(d)
        above_threshold_off_durations = np.where(
            off_durations >= min_off_duration)[0]
        switch_off_events = switch_off_events[
                np.concatenate([above_threshold_off_durations,
                                [len(switch_off_events)-1]])]
        switch_on_events = switch_on_events[
                np.concatenate([[0], above_threshold_off_durations+1])]
        assert len(switch_on_events) == len(switch_off_events)
  
    activations = []
    n=0
    for on, off in zip(switch_on_events, switch_off_events):
        duration = (chunk.index[ off] - chunk.index[on])
        if duration < min_on_duration:
            continue
        on -= 1 + border
        if on < 0:
            on = 0
        off += border
        activation = chunk.iloc[on:off]
        if not activation.isnull().values.any() and len(activation)<1070:
            activations.append(activation)
    return activations

In [14]:
import os

coefficient=[0.8,0.9,1,1.1,1.2,1.3,1.4] 
dw_activations = {}
house =6
path = 'app_activations/microwave/'
for i in coefficient:
    folder_path = path+'h'+str(house)+'/'
    file_list = os.listdir(folder_path)
    d_activations =[]
    for numpy_file in file_list:
        file_path = os.path.join(folder_path, numpy_file)
        activation_tem=np.load(file_path)
        d_activations.append(activation_tem*i)
    dw_activations[i]=d_activations


In [17]:
def produce_data(application,m_off,m_on,m_threshold,num,ls):
    
    target_device=application
    train_house_list=["house"+str(num)]
    hf = h5py.File('data.h5', 'r')
    (train_x, train_y,timestamps) = read_data(hf, target_device, train_house_list)
  
    train_x=train_x[0:int(len(train_x)*1)]
    aggregation=train_x
    order=[]
    for i in range(0,len(aggregation),8640):
        order.append(i) 
    y=[0]*len(aggregation)
    print(len(y),len(aggregation))
    # print(random.choice(ls))
    for i in range(0, len(order)-1):
        start_index=order[i]  
        end_index=order[i+1]    
        random_hs = random.choice(ls) 
        activate = random.choice(dw_activations[random_hs])
        
        synthetic_index=random.randint(start_index,end_index-len(activate))
        for i in range(0,len(activate)):
            y[synthetic_index+i]=y[synthetic_index+i]+activate[i]
            aggregation[synthetic_index+i]=aggregation[synthetic_index+i]+activate[i]
    print(len(y))
    df=pd.DataFrame({application:y,'Aggregate':aggregation})
    path="data/train"+str(num)+".csv"
    df.to_csv(path, index=False, header=True)

In [18]:
# ls=["house20"]
ls=[0.8,0.9,1,1.1,1.2,1.3,1.4]
for num in [18]:
    produce_data('microwave',30,12,200,num,ls)