In [1]:
import pandas as pd
import numpy as np
from nilmtk.datastore import Key
from nilmtk.utils import check_directory_exists, get_datastore, get_module_directory
from nilm_metadata import convert_yaml_to_hdf5

In [2]:
def convert_data(file_name):
    '''
    Parameters
    ------------
    Takes input file name to be tested as string.
    Data columns should contain following values ['timestamp','R','A','C','F','V','T']
    Converts it into hdf5 Format.
    '''
    df = pd.read_csv(f'{file_name}',names =['timestamp','R','A','C','F','V','T'])
    column_mapping = {
        'F': ('frequency', ""),
        'V': ('voltage', ""),
        'T': ('power', 'active'),
        'C': ('current', ''),
        'R': ('power', 'reactive'),
        'A': ('power', 'apparent'),
    }

    TIMESTAMP_COLUMN_NAME = "timestamp"
    TIMEZONE = "Asia/Kolkata"
    FREQ = "1T"
#     output_name = file_name.split(sep='.')[0]
    LEVEL_NAMES = ['physical_quantity', 'type']
    output_filename = 'test.h5'

    # Open data store
    store = get_datastore(output_filename, format='HDF', mode='w')
    key = Key(building=1, meter=1)
    print('Loading ', 1)
    df.index = pd.to_datetime(df.timestamp.values)
    df = df.tz_convert(TIMEZONE) #  if error occurs use tz_localize for tz naive timestamps
    df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
    df.index = pd.to_datetime(df.index.values)
    df.columns = pd.MultiIndex.from_tuples(
                [column_mapping[x] for x in df.columns],
                names=LEVEL_NAMES
            )
    df = df.apply(pd.to_numeric, errors='ignore')
    df = df.dropna()
    df = df.astype(np.float32)
    df = df.sort_index()
    df = df.resample("1T").mean()
    assert df.isnull().sum().sum() == 0
    store.put(str(key), df)
    store.close()
    convert_yaml_to_hdf5('./metadata', output_filename)

    print("Done converting test data to HDF5!")
    return

In [3]:
convert_data('ac_seconds4.csv')

Loading  1
Done converting YAML metadata to HDF5!
Done converting test data to HDF5!


In [4]:
from combinatorial_optimisation import CO
from fhmm_exact import FHMMExact
from hart_85 import Hart85
from mean import Mean

In [18]:
experiment1 = {
'power': {'mains': ['apparent','active'],'appliance': ['apparent','active']},
  'sample_rate': 60,
  'appliances': ['air conditioner','fridge','washing machine','clothes iron','television'],
  'methods': {"CO":CO({}),"FHMM":FHMMExact({'num_of_states':2}),'Mean':Mean({}),'Hart':Hart85({})},
  'train': {    
    'datasets': {
        'iAWE': {
            'path': './iAWE.h5',
            'buildings': {
                1: {
                    'start_time': '2013-07-13', 
                    'end_time': '2013-08-04'
                    }
                }                
            }
        }
    },
  'test': {
    'datasets': {
        'CAXE': {
            'path': './test.h5',
            'buildings': {
                1: {
                    'start_time': '2020-08-12',
                    'end_time': '2020-08-14'
                    }
                }
            }
        },
        'metrics':['rmse']
    }
}

In [19]:
from api import API

In [20]:
api_results_experiment_1 = API(experiment1)

Started training for  CO
Joint training for  CO
............... Loading Data for training ...................
Loading data for  iAWE  dataset
Loading building ...  1


AttributeError: 'NaTType' object has no attribute 'tz'

In [14]:
df = (api_results_experiment_1.pred_overall['CO'])

In [15]:
df

Unnamed: 0,air conditioner,fridge,washing machine,clothes iron,television
0,0.0,118.0,0.0,0.0,0.0
1,0.0,100.0,0.0,0.0,75.0
2,0.0,100.0,0.0,0.0,75.0
3,0.0,100.0,0.0,0.0,75.0
4,0.0,100.0,0.0,0.0,75.0
...,...,...,...,...,...
110,0.0,0.0,0.0,0.0,0.0
111,0.0,0.0,0.0,0.0,0.0
112,0.0,0.0,0.0,0.0,0.0
113,0.0,0.0,0.0,0.0,0.0


In [16]:
df.columns

Index(['air conditioner', 'fridge', 'washing machine', 'clothes iron',
       'television'],
      dtype='object')

In [21]:
idx_val = []
counter = 0
flag = 0
no_devices = 0
length = df.shape[0]
x = 0
    
while x!= length:
    if df.iloc[x].sum() ==0:
        idx_val.append(x)
    x +=1
print(idx_val)
    

[58, 59, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113]


In [23]:
from nilmtk import DataSet

In [24]:
test = DataSet('./test.h5')



In [27]:
test_df = next(test.buildings[1].elec.mains().load())

In [33]:
test_df[('power', 'active')]

2020-08-12 18:20:00     89.849998
2020-08-12 18:21:00    109.333336
2020-08-12 18:22:00    108.516670
2020-08-12 18:23:00    109.966667
2020-08-12 18:24:00    108.516670
                          ...    
2020-08-12 20:10:00      0.000000
2020-08-12 20:11:00      0.000000
2020-08-12 20:12:00      0.000000
2020-08-12 20:13:00      0.000000
2020-08-12 20:14:00    -32.833332
Freq: T, Name: (power, active), Length: 115, dtype: float32

In [35]:
print(f'Unknown Device found running since : {test_df.index.values[2]} to {test_df.index.values[-1]}')

Unknown Device found running since : 2020-08-12T18:22:00.000000000 to 2020-08-12T20:14:00.000000000


In [36]:
name = input('Name the device:\n')

Name the device:
wallfan office


In [37]:
wall_fan_df = test_df[test_df.index.values[2]:test_df.index.values[-1]]

In [38]:
wall_fan_df

physical_quantity,frequency,voltage,power,power,current,power
type,Unnamed: 1_level_1,Unnamed: 2_level_1,apparent,active,Unnamed: 5_level_1,reactive
2020-08-12 18:22:00,49.563000,251.358994,156.751831,108.516670,0.626500,112.330162
2020-08-12 18:23:00,49.564667,251.067505,158.317169,109.966667,0.633833,113.147499
2020-08-12 18:24:00,49.563168,250.861664,157.220001,108.516670,0.630833,113.026665
2020-08-12 18:25:00,49.564999,250.563660,158.771332,109.933334,0.644000,113.648163
2020-08-12 18:26:00,49.566502,250.349167,149.932159,103.366669,0.603667,107.867332
...,...,...,...,...,...,...
2020-08-12 20:10:00,49.962551,213.661957,0.000000,0.000000,0.010000,0.000000
2020-08-12 20:11:00,49.974342,213.674911,0.000000,0.000000,0.010000,0.000000
2020-08-12 20:12:00,49.972694,213.694992,0.000000,0.000000,0.010000,0.000000
2020-08-12 20:13:00,49.951321,213.550751,0.000000,0.000000,0.010000,0.000000


In [41]:
def missing_elements(L, start, end):
    if end - start <= 1: 
        if L[end] - L[start] > 1:
            yield from range(L[start] + 1, L[end])
        return

    index = start + (end - start) // 2

    # is the lower half consecutive?
    consecutive_low =  L[index] == L[start] + (index - start)
    if not consecutive_low:
        yield from missing_elements(L, start, index)

    # is the upper part consecutive?
    consecutive_high =  L[index] == L[end] - (end - index)
    if not consecutive_high:
        yield from missing_elements(L, index, end)

def main():
    L = idx_val
    print(list(missing_elements(L,0,len(L)-1)))
    if(len(list(missing_elements(L,0,len(L)-1)))>=1):
        
    

main()

[60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76]


In [52]:
seq = []
seqs = []
no_sequence = 0
flag = 0
for i, val in enumerate(idx_val):
    try:
        if val+1 == idx_val[i+1]:
            seq.append(val)
        else:
            seq.append(val)
            seqs.append(seq)
            seq = []
    except:
        seq.append(val)
        seqs.append(seq)
        

In [56]:
(seqs)[0]

[58, 59]

In [59]:
clean_seq = []
temp = []
for x in seqs:
    temp.append(x[0])
    temp.append(x[-1])
    clean_seq.append(temp)
    temp = []

In [60]:
clean_seq

[[58, 59], [77, 113]]

In [70]:
for x in clean_seq:
    start = test_df.index.values[(x[0])]
    end = test_df.index.values[(x[1])]
    name = input(f'Name the unknown Device running between: {start} and {end}')
    unknown_df = test_df[start:end]
    unknown_df.to_csv(f'{name}.csv')

Name the unknown Device running between: 2020-08-12T19:18:00.000000000 and 2020-08-12T19:19:00.000000000a
Name the unknown Device running between: 2020-08-12T19:37:00.000000000 and 2020-08-12T20:13:00.000000000b
