# Packages


In [None]:
import nilmtk
import pandas as pd
import numpy as np
import matplotlib.font_manager
import matplotlib.pyplot as plt
from time import time
import nilmtk
nilmtk.Appliance.allow_synonyms=False
import scipy.stats 
from datetime import datetime, timedelta
from sklearn.model_selection import TimeSeriesSplit
import seaborn as sns
import math
from math import pi
import pickle

#load REFIT dataset
dataset = nilmtk.DataSet('REFIT.h5')

# Define functions to compute ANR and SNR

In [None]:
#get SNR
def SNR (mains, noise):
    list_aux1 = [abs(mains[i]) for i in range(0, len(noise))]
    list_aux2 = [abs(noise[i]) for i in range(0, len(noise))]
    return sum(list_aux1)/sum(list_aux2)

#get SNR in decibel scale
def SNR_dB (mains, noise):
    return 10*math.log10(SNR(mains, noise))

#get ANR
def MANR(app, noise):
    list_aux = [abs(app[i])/abs(noise[i]) for i in range(0, len(noise)) if app[i] >=0 if abs(noise[i]) > 0.005]
    if len(list_aux) == 0:
        print("MANR not defined")
    else:
        return sum(list_aux)/len(list_aux)

# Get noise for the whole experiment period

In [None]:
noise_dict = {}

In [None]:
load, BUILDING, SAMPLE_RATE, d1, d2 = "computer", 1, 60, '2014-06-20', '2015-06-20'

#get total noise in building aggregate between dates d1 and d2
def get_noise(mains, sample_rate, building, d1, d2):
    noise_seq2 = mains[d1:d2].values.flatten().tolist()
    for j in range(1, len(dataset.buildings[building].elec.appliances)):
        APPJ1 = dataset.buildings[building].elec[j+1]
        if APPJ1.appliances[0].type['type'] != 'unknown' or APPJ1.appliances[0].type['type'] != 'appliance':
            dataset_aux = next(APPJ1.load(sample_period = sample_rate))['power'][d1:d2].values.flatten().tolist()
            if len(noise_seq2) != len(dataset_aux):
                print("length of mains and appliance sequences differ")
            else:
                noise_seq2 = [noise_seq2[i] - dataset_aux[i] for i in range(0, len(noise_seq2))]
    return noise_seq2

#for houses 15, 16, 18 and 20 the id has to be subtracted in 1 unit because although there is no house 14 in REFIT dataset, 
#there is no gap at house 14, which is replaced by the data from house 15, and so on.
Houses =  ['1', '5', '6', '14', '15', '17', '19'] 

for i in Houses:
    dataset_mains = next(dataset.buildings[int(i)].elec[1].load(sample_period = SAMPLE_RATE))['power'][d1:d2]
    noise_dict[i] = get_noise(dataset_mains, SAMPLE_RATE, int(i), d1, d2)

# Plot noise 

In [None]:
plt.rcParams["figure.figsize"] = (10, 5)
sns.set_style('whitegrid')
sns.kdeplot(noise_dict['1'], bw=0.5, label = "H1")
sns.kdeplot(noise_dict['5'], bw=0.5, label = "H5")
sns.kdeplot(noise_dict['6'], bw=0.5, label = "H6")
sns.kdeplot(noise_dict['14'], bw=0.5, label = "H15")
sns.kdeplot(noise_dict['15'], bw=0.5, label = "H16")
sns.kdeplot(noise_dict['17'], bw=0.5, label = "H18")
sns.kdeplot(noise_dict['19'], bw=0.5, label = "H20")

plt.xlim(right=500, left=0)

plt.grid(linestyle = '--', linewidth = 1)
plt.title("Density estimation of noise power at 1/60 Hz ", fontsize = 16)
plt.xlabel("Active Power (W)", fontsize = 16)
plt.ylabel("Density", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 14)
plt.legend(fontsize = 12)
plt.show()

# Plot appliance consumption

In [None]:
load, Houses = "computer", ['1', '5', '6', '14', '15', '17', '19']

app_dict = {}
for i in Houses:
    app_dict[i] = next(dataset.buildings[int(i)].elec[load].load(sample_period = 60)).values.flatten()

In [None]:
plt.rcParams["figure.figsize"] = (10, 5)
sns.set_style('whitegrid')
sns.kdeplot(app_dict['1'], bw=0.5, label = "H1")
sns.kdeplot(app_dict['5'], bw=0.5, label = "H5")
sns.kdeplot(app_dict['6'], bw=0.5, label = "H6")
sns.kdeplot(app_dict['14'], bw=0.5, label = "H15")
sns.kdeplot(app_dict['15'], bw=0.5, label = "H16")
sns.kdeplot(app_dict['17'], bw=0.5, label = "H18")
sns.kdeplot(app_dict['19'], bw=0.5, label = "H20")

plt.xlim(right=500, left=0)

plt.grid(linestyle = '--', linewidth = 1)
plt.title("Density estimation of "+str(load)+" power consumption at 1/60 Hz when turned ON", fontsize = 16)
plt.xlabel("Active Power (W)", fontsize = 16)
plt.ylabel("Density", fontsize = 16)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 14)
plt.legend(fontsize = 12)
plt.show()

# Get ANR, SNR and noise for bi-weekly and monthly periods

In [None]:
load, BUILDING, SAMPLE_RATE = "computer", 1, 60
# elec[1] refers to the mains
dataset_mains = next(dataset.buildings[BUILDING].elec[1].load(sample_period = SAMPLE_RATE))['power']

#get total noise in building aggregate between dates d1 and d2
def get_noise2(mains, sample_rate, building, d1, d2):
    noise_seq2 = mains[d1:d2].values.flatten().tolist()
    for j in range(1, len(dataset.buildings[building].elec.appliances)):
        APPJ1 = dataset.buildings[building].elec[j+1]
        if APPJ1.appliances[0].type['type'] != 'unknown' or APPJ1.appliances[0].type['type'] != 'appliance':
            dataset_aux = next(APPJ1.load(sample_period = sample_rate))['power'][d1:d2].values.flatten().tolist()
            if len(noise_seq2) != len(dataset_aux):
                print("length of mains and appliance sequences differ")
            else:
                noise_seq2 = [noise_seq2[i] - dataset_aux[i] for i in range(0, len(noise_seq2))]
    return noise_seq2


#set the dates
set_dates_biweekly = ['2014-06-20', '2014-07-04', '2014-07-18', '2014-08-01', '2014-08-15', '2014-08-29', '2014-09-12', '2014-09-26',
            '2014-10-10', '2014-10-24', '2014-11-07', '2014-11-21', '2014-12-05', '2014-12-19', '2015-01-02', '2015-01-16',
            '2015-01-30', '2015-02-13', '2015-02-27', '2015-03-13', '2015-03-27', '2015-04-10', '2015-04-24', '2015-05-08', 
            '2015-05-22', '2015-06-05', '2015-06-20']

#set_dates_month = ['2014-06-20', '2014-07-20', '2014-08-20', '2014-09-20', '2014-10-20', '2014-11-20', '2014-12-20', '2015-01-20',
#            '2015-02-20', '2015-03-20', '2015-04-20', '2015-05-20', '2015-06-20']

#alter dates set for hourly, bi-weekly and monthly

In [None]:
#get SNR
snr_week = []
for i in range(1, len(set_dates_biweekly)):
    A = get_noise2(dataset_mains, SAMPLE_RATE, BUILDING, set_hours[i-1], set_hours[i])
    B = dataset_mains.between_time(set_hours[i-1], set_hours[i]).values.flatten()
    snr_week.append(SNR_dB(B, A))

In [None]:
#get ANR
anr_week = []
for i in range(1, len(set_dates_biweekly)):
    A = get_noise2(dataset_mains, SAMPLE_RATE, BUILDING, set_hours[i-1], set_hours[i])
    C = dataset_app.between_time(set_hours[i-1], set_hours[i]).values.flatten()
    anr_week.append(MANR(C, A))

In [None]:
#get noise in the aggregate date
noise_week = {}
for i in range(1, len(set_hours)):
    A = get_noise2(dataset_mains, SAMPLE_RATE, BUILDING, set_hours[i-1], set_hours[i])
    noise_week.append(sum(A))

# Get ANR, SNR and noise for each hour of the day

In [None]:
BUILDING, SAMPLE_RATE = 1, 60
d1, d2 = '2014-06-20', '2015-06-20'

#get total noise in building aggregate
def get_noise_hours(mains, sample_rate, building, hour1, hour2):
    noise_seq2 = mains.between_time(hour1, hour2).values.flatten().tolist()
    appliance_list = ["dish washer", "computer", "washing machine", "television"]
    for j in range(0, len(appliance_list)):
        APPJ1 = dataset.buildings[building].elec[appliance_list[j]]
        dataset_aux = next(APPJ1.load(sample_period = sample_rate))['power'][d1:d2].between_time(hour1, hour2).values.flatten().tolist()
        if len(noise_seq2) != len(dataset_aux):
            print("length of mains and selected appliance sequences differ")
        else:
            noise_seq2 = [noise_seq2[i] - dataset_aux[i] for i in range(0, len(noise_seq2))]
    return noise_seq2

dataset_mains = next(dataset.buildings[BUILDING].elec[1].load(sample_period = SAMPLE_RATE))['power'][d1:d2]
dataset_app = next(dataset.buildings[BUILDING].elec["computer"].load(sample_period = SAMPLE_RATE))['power'][d1:d2]

set_hours = ['00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00', '15:00', '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00',
             '23:00', '00:00']

In [None]:
#get SNR
snr_hour = []
for i in range(1, len(set_hours)):
    A = get_noise_hours(dataset_mains, SAMPLE_RATE, BUILDING, set_hours[i-1], set_hours[i])
    B = dataset_mains.between_time(set_hours[i-1], set_hours[i]).values.flatten()
    snr_hour.append(SNR_dB(B, A))

In [None]:
#get ANR
anr_hour = []
for i in range(1, len(set_hours)):
    A = get_noise_hours(dataset_mains, SAMPLE_RATE, BUILDING, set_hours[i-1], set_hours[i])
    C = dataset_app.between_time(set_hours[i-1], set_hours[i]).values.flatten()
    anr_hour.append(MANR(C, A))

In [None]:
#get noise in the aggregate date
noise_hour = []
for i in range(1, len(set_hours)):
    A = get_noise_hours(dataset_mains, SAMPLE_RATE, BUILDING, set_hours[i-1], set_hours[i])
    noise_hour.append(sum(A))

# Plot noise, ANR and SNR

In [None]:
#plot the noise hourly, bi-weekly and monthly

plt.rcParams["figure.figsize"] = (18, 5)
labels, data = noise_hour.keys(), noise_hour.values()
plt.violinplot(data, showmedians = True)

parts = plt.violinplot(
        data, showmeans=False, showmedians=True,
        showextrema=False)

for pc in parts['bodies']:
    pc.set_facecolor('tab:green')
    pc.set_edgecolor('black')
    pc.set_alpha(1)

plt.xticks(range(1, len(labels) + 1), labels, fontsize = 14)
plt.yticks(fontsize = 14)
plt.grid(linestyle = '--', linewidth = 1)
plt.title("Distribution of noise power for each hour", fontsize = 16)
plt.xlabel("Hour", fontsize = 16)
plt.ylabel("Active Power (W)", fontsize = 16)
plt.show()

In [None]:
#plot the ANR for each appliance for each hour of the day 

labels = ["1",  "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
          "20", "21", "22", "23", "24"]

x = np.arange(24) 
width = 0.5

plt.rcParams["figure.figsize"] = (15,5)

fig, ax = plt.subplots()
rects1 = ax.bar(x, anr_hour, width, label='ANR_pc', color = "tab:olive")

ax.set_ylabel('Scores', fontsize = 16)
ax.set_xlabel('Hour', fontsize = 16)
ax.set_title("ANR for the "+str(load)+" for each hour of the day", fontsize = 16)
ax.set_xticks(x)
ax.set_yticks(np.arange(0, 0.15, 0.05))
ax.set_yticklabels( [0, 0.05, 0.1, 0.15], fontsize = 16)
ax.set_xticklabels(labels, fontsize = 16)
ax.legend(fontsize = 16)

ax.grid(linestyle = '--', linewidth = 1)
fig.tight_layout()
plt.show()

In [None]:
#plot SNR for each appliance for biweekly period

labels = ["1",  "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", 
         "23", "24", "25", "26"]

x = np.arange(26) 
width = 0.5

plt.rcParams["figure.figsize"] = (15,5)

fig, ax = plt.subplots()
rects1 = ax.bar(x, noise_hours, width, label='SNR', color = "tab:brown")

ax.set_ylabel('Scores', fontsize = 16)
ax.set_xlabel('Week', fontsize = 16)
ax.set_title("SNR for each week", fontsize = 16)
ax.set_xticks(x)
ax.set_yticks(np.arange(0, 2.5, 0.5))
ax.set_yticklabels([0, 0.5, 1, 1.5, 2, 2.5] , fontsize = 16)
ax.set_xticklabels(labels, fontsize = 16)
ax.legend(fontsize = 16)

ax.grid(linestyle = '--', linewidth = 1)
fig.tight_layout()
plt.show()

In [None]:
#plot SNR for each appliance for each month

labels = ["1",  "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"]

x = np.arange(12) 
width = 0.5

plt.rcParams["figure.figsize"] = (15,5)

fig, ax = plt.subplots()
rects1 = ax.bar(x, noise_hours, width, label='SNR', color = "tab:brown")

ax.set_ylabel('Scores', fontsize = 16)
ax.set_xlabel('Month', fontsize = 16)
ax.set_title("SNR for each month", fontsize = 16)
ax.set_xticks(x)
ax.set_yticks(np.arange(0, 3, 0.5))
ax.set_yticklabels([0, 0.5, 1, 1.5, 2, 2.5, 3] , fontsize = 16)
ax.set_xticklabels(labels, fontsize = 16)
ax.legend(fontsize = 16)

ax.grid(linestyle = '--', linewidth = 1)
fig.tight_layout()
plt.show()