In [62]:
import sys, json
import numpy
from statistics import mean
from time import time, sleep
from datetime import datetime
from libDataLoaders import dataset_loader
from libFolding import Folding
from libPMF import EmpiricalPMF
from libPoisson import PoissonPMF
from libSSHMM import SuperStateHMM, frange
from libAccuracy import Accuracy
import pandas as pd


ε = 0.00021  # 0.00021, magic number, I forget how I calculated this value (maybe 110 / 
             # 524544)
             
mainPath = 'e:/PROF NOBERT/MPHIL Research Area/OneDrive/My Work/Algorithms/AI Projects/NILM_THESIS_CODE/SparseNILM/'

#### TRAINING PARAMETER CONFIGUREATIONS

In [63]:
ids = 'BME,CDE' # ids are the appliances
modeldb = 'BigO_L01' # file name of model                                                                 
precision = 10 #  number; e.g. 10 would convert A to dA.
denoised = 'denoised' #denoised aggregate reads, else noisy.
dataset = 'AMPdsR1_1min_A' #file name of dataset to use (omit file ext).
max_states = 4 #  max number of states a each load can have.
folds = 1 # number usually set to 10, 1 means data same for train/test.
max_obs = 200  # The max observed value, e.g. 200.1 A.'

In [64]:

precision = float(precision)
max_obs = float(max_obs)
denoised = denoised == 'denoised'
max_states = int(max_states)
folds = int(folds)
ids = ids.split(',')
datasets_dir = mainPath + '/datasets/%s.csv'
logs_dir = mainPath + '/logs/%s.log'
models_dir = mainPath + '/models/%s.json'

In [65]:
sshmms = []
train_times = []
folds = Folding(dataset_loader(datasets_dir % dataset, ids, precision, denoised), folds)

Loading AMPds R1 dataset at e:/PROF NOBERT/MPHIL Research Area/OneDrive/My Work/Algorithms/AI Projects/NILM_THESIS_CODE/SparseNILM//datasets/AMPdsR1_1min_A.csv...
	Setting timestamp column TimeStamp as index.
	Modfity data with precision 10.000000 then convert to int...
	Keeping only columns ['BME'].
	Denoising aggregate meter column WHE.
	Calculating unmetered column UNE.

Created 1 fold: 524544.


In [67]:
for (fold, priors, testing) in folds:
    del testing
    tm_start = time()
    # foldName = mainPath + 'folds/fold_' + str(fold) + '.csv'
    # priors.to_csv(foldName)

    print()
    print('Creating load PMFs and finding load states...')
    print('\tMax partitions per load =', max_states)
    pmfs = []
    for id in ids:
        pmfs.append(EmpiricalPMF(id, max_obs * precision, list(priors[id]), True))
        pmfs[-1].quantize(max_states, ε)
        print('norm_bins', pmfs[-1].norm_bins)
        print('bin_peaks', pmfs[-1].bin_peaks)
        print('bin_count', pmfs[-1].bin_count)
        print('maxobs', pmfs[-1].maxobs)
        print('numobs', pmfs[-1].numobs)

        #print('pmf',pmfs[-1].pmf_data)
        print('quantization',pmfs[-1].quantization)

    print()
    print('Creating compressed SSHMM...')
    incro = 1 / precision
    sshmm = SuperStateHMM(pmfs, [i for i in frange(0, max_obs + incro, incro)])

    print('\tConverting DataFrame in to obs/hidden lists...')
    obs_id = list(priors)[0]
    #print(priors)
    obs = list(priors[obs_id])
    hidden = [i for i in priors[ids].to_records(index=False)]

    sshmm.build(obs, hidden)
    sshmms.append(sshmm)

    train_times.append((time() - tm_start) / 60)



************************ VALIDATION ROUND:  1/ 1 ************************

Building priors and testing datasets...

Creating load PMFs and finding load states...
	Max partitions per load = 4
	PMF (Frequency Only) for BME: [403947, 39227, 10988, 1687, 427, 318, 243, 224, 1722, 120, 619, 142, 66, 5, 3, 2, 1, 4, 3, 7, 19, 32, 13, 4, 5, 98, 62, 93, 49902, 4865, 4152, 249, 1907, 701, 1133, 247, 323, 549, 166, 86, 150, 2, 18, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 1, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2]
	Normal PMF for : [0.7700917368228404, 0.07478304965836993, 0.02094771839921913, 0.003216126769155686, 0.0008140403855539288, 0.0006062408491947292, 0.0004632595168374817, 0.000427037579306979, 0.003282851390922401, 0.0002287701317715959, 0.0011800725963884821, 0.0002707113225963885, 0.00012582357247437775

## PREVIEW RESULT OF TRAINING

In [68]:
print()
print('Train Time was', round(sum(train_times), 2), ' min (avg ', round(sum(train_times) / len(train_times), 2), ' min/fold).')

print()
fn = models_dir % modeldb
print('Converting model %s to JSON for storage in %s...' % (modeldb, fn))
fp = open(fn, 'w')
json.dump(sshmms, fp, default=(lambda o: o._asdict()), sort_keys=True, indent=None, separators=(',', ':'))
fp.close()
  
report = []
report.append(['Model DB', modeldb])
report.append(['Run Date', datetime.now()])
report.append(['Dataset', dataset])
report.append(['Precision', precision])
report.append(['Max States', max_states])
report.append(['Denoised?', denoised])
report.append(['Model Noise?', ('UNE' in ids)])
report.append(['Folds', folds.folds])
report.append(['IDs', ' '.join(ids)])
report.append(['Train Time', round(sum(train_times), 2)])
report.append(['Avg Time/Fold', round(sum(train_times) / len(train_times), 2)])
report.append(['Avg Load States', round(sum([mean(sshmm.Km) for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['Sum Load States', round(sum([sum(sshmm.Km) for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['Super-States', round(sum([sshmm.K for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['Loads', sshmms[0].M])
report.append(['Obs', sshmms[0].N])
report.append(['Time Len', folds.data_size])
report.append(['P0 Size', round(sum([sshmm.P0.size() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['P0 Non-Zero', round(sum([sshmm.P0.nonzero() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['P0 Sparsity', round(sum([sshmm.P0.sparsity() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['P0 bytes', round(sum([sshmm.P0.bytes() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['A Size', round(sum([sshmm.A.size() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['A Non-Zero', round(sum([sshmm.A.nonzero() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['A Sparsity', round(sum([sshmm.A.sparsity() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['A bytes', round(sum([sshmm.A.bytes() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['B Size', round(sum([sshmm.B.size() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['B Non-Zero', round(sum([sshmm.B.nonzero() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['B Sparsity', round(sum([sshmm.B.sparsity() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['B bytes', round(sum([sshmm.B.bytes() for sshmm in sshmms]) / len(sshmms), 1)])

print()
print('-------------------------------- CSV REPORTING --------------------------------')
print()
print(','.join([c[0] for c in report]))
print(','.join([str(c[1]) for c in report]))
print()
print('-------------------------------- ------------- --------------------------------')

print()
print('End Time = ', datetime.now(), '(local time)')
print()
print('DONE!!!')
print()



Train Time was 0.46  min (avg  0.23  min/fold).

Converting model BigO_L01 to JSON for storage in e:/PROF NOBERT/MPHIL Research Area/OneDrive/My Work/Algorithms/AI Projects/NILM_THESIS_CODE/SparseNILM//models/BigO_L01.json...

-------------------------------- CSV REPORTING --------------------------------

Model DB,Run Date,Dataset,Precision,Max States,Denoised?,Model Noise?,Folds,IDs,Train Time,Avg Time/Fold,Avg Load States,Sum Load States,Super-States,Loads,Obs,Time Len,P0 Size,P0 Non-Zero,P0 Sparsity,P0 bytes,A Size,A Non-Zero,A Sparsity,A bytes,B Size,B Non-Zero,B Sparsity,B bytes
BigO_L01,2022-02-25 22:47:05.020327,AMPdsR1_1min_A,10.0,4,True,False,1,BME,0.46,0.23,4.0,4.0,4.0,1,2001,524544,4.0,4.0,0.0,320.0,16.0,16.0,0.0,264.0,8004.0,52.0,1.0,424.0

-------------------------------- ------------- --------------------------------

End Time =  2022-02-25 22:47:05.022326 (local time)

DONE!!!



In [69]:
r_pd = pd.DataFrame(report,columns=['Name','Value'])

In [70]:
r_pd

Unnamed: 0,Name,Value
0,Model DB,BigO_L01
1,Run Date,2022-02-25 22:47:05.020327
2,Dataset,AMPdsR1_1min_A
3,Precision,10
4,Max States,4
5,Denoised?,True
6,Model Noise?,False
7,Folds,1
8,IDs,BME
9,Train Time,0.46
