PyNILM - Exploratory Data Analysis
---



## Preparing Environment

In [None]:
# # COLAB VERSION
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

In [None]:
import warnings
warnings.filterwarnings(action="ignore")

# TODO: Create PyNilm python package
import sys
sys.path.append('../') # enabling pynilm import (src content)

import numpy as np

from tqdm import tqdm
from pathlib import Path
from nilmtk.utils import print_dict
from collections import Counter
from matplotlib import pyplot as plt
from plotly import express as px
from plotly import io as pio
pio.renderers.default = 'vscode'

## Experiment Details - REDD

### Defining Experiment Configuration

In [None]:
# Dataset file
redd_filepath = '../data/redd.h5'
redd_filepath

In [None]:
# experiment params
sample_period = 2
building_redd = 3
appliances_redd = ['washer dryer', 'microwave', 'dish washer', 'fridge']

### Loading Data - `Data Wrapper`

In [None]:
from src.data import DataWrapper

# Load data series between date start and finish
redd_train = DataWrapper(
    dataset_path=redd_filepath,
    building=building_redd, 
    appliances=appliances_redd,
    sample_period=sample_period,
    start='2011-04-01 00:00:00',
    end='2011-04-29 23:59:59',
    return_mode='dataframe',
    windows_size=None,
    windows_stride=None,
    get_activations=True,
    activations_type=int,
    debug=False
    )

In [None]:
# Window dataframe sample
redd_train.data

### Aggregated Power Consumption (Mains)

In [None]:
redd_train.data[['mains']].plot(figsize=(20, 5));

### Appliances Power Consumption

In [None]:
fig, axes = plt.subplots(1, len(appliances_redd), figsize=(20,5))

for a, ax in zip(appliances_redd, axes):
    redd_train.data[[a]].plot(ax=ax)
    ax.set_title(a.title())

plt.suptitle("Appliances Power Consumption")
plt.tight_layout()
plt.show()

### Power Consumption Overview 

In [None]:
fig = px.line(
    redd_train.data, 
    title=f"""Building #{building_redd} Power Consumption of {redd_train.dataset.metadata['name']} database<br><sup>From `{redd_train.start}` To `{redd_train.end}` (sample period = {sample_period})</sup>""",
)
fig.update_layout(
    xaxis_title='Datetime',
    yaxis_title="Power",
    legend_title="Legend",
)

fig.show(renderer='vscode')

### Windowing Time Series

#### Global Dataframe

In [None]:
# Generating windows from a single dataframe, containing ALL appliance data
redd_train = DataWrapper(
    dataset_path=redd_filepath,
    building=building_redd, 
    appliances=appliances_redd,
    sample_period=sample_period,
    start='2011-04-01 00:00:00',
    end='2011-04-29 23:59:59',
    return_mode='dataframe', 
    windows_size=1000, # --------------> HERE!!!
    windows_stride=1000, # --------------> HERE!!!
    get_activations=True,
    activations_type=int,
    debug=False
    )

In [None]:
print('Activation distribution by appliance:\n')
for a, s in redd_train.activations.items():
    print(a, '=', dict(Counter(s)))
print('\n------------------------------------------------\n')

# Generating status
print('Mains/Appliance Activations examples:\n\n')
for a in appliances_redd:
    print(f'{a.title()}:')
    
    samples = 0
    for i in range(len(redd_train.data)):
        df = redd_train.data[i][[redd_train.mains_label, a]]
        status = redd_train.activations[a][i]
        
        if status and samples < 3:
            df.plot()
            plt.title(f'Window Sample #{i} | Status = {"ON" if status else "OFF"}')
            plt.show()
            samples += 1

In [None]:
import os
import imageio

# Saving GIF based on windows
filenames = []
for i, window in enumerate(redd_train.data[:1000]):
    window.plot()
    plt.title(f'window {i+1}')
    # gif
    filename = f'images/window_{i+1}.png'
    filenames.append(filename)
    # gif - save frame
    plt.savefig(filename)
    plt.close()
    
# gif - build it
with imageio.get_writer(
    f'images/sample{redd_train.sample_period}_windows{redd_train.windows_size}_stride{redd_train.windows_stride}.gif', 
    mode='I') as writer:
    for filename in filenames:
        image = imageio.imread(filename)
        writer.append_data(image)
# gif - Remove files
for filename in set(filenames):
    os.remove(filename)

#### Individual Dataframes (dict)

In [None]:
# Generating windows from individual appliance dataframes, 
#  containing mains and appliance power consumptions information
redd_train = DataWrapper(
    dataset_path=redd_filepath,
    building=building_redd, 
    appliances=appliances_redd,
    sample_period=sample_period,
    start='2011-04-01 00:00:00',
    end='2011-04-29 23:59:59',
    return_mode='dict', # --------------> HERE!!!
    windows_size=1000, # --------------> HERE!!!
    windows_stride=1000, # --------------> HERE!!!
    get_activations=False,
    activations_type=int,
    debug=False
    )

In [None]:
print('Windows by Appliance:\n')
for k, v in redd_train.data.items():
    print(k, '=>', len(v), 'window samples')

## Additional Research

### Optimizing RP parameters - `Time Delay` and `Embedding Dimension`

In [None]:
from src.timeseries import RecurrencePlot

# RP parameters
time_delay = 1
embedding_dimension = 2

# Sample data
redd_train = DataWrapper(
    dataset_path=redd_filepath,
    building=building_redd, 
    appliances=appliances_redd,
    sample_period=sample_period,
    start='2011-04-01 00:00:00',
    end='2011-04-29 23:59:59',
    return_mode='dataframe',
    windows_size=None,
    windows_stride=None,
    get_activations=True,
    activations_type=int,
    debug=False
    )

In [None]:
# Recurrence Plot Wrapper
rp_mains = RecurrencePlot(
    data=redd_train.data['mains'].values
    )

##### `Time Delay` - Mutual Information Method

In [None]:
delay, mutual_information = rp_mains.calculate_mutual_information(delay_range=20, debug=True)
delay, mutual_information

##### `Threshold` - False Nearest Neighbor Method

In [None]:
embedding_dimension, false_neighbors = rp_mains.calculate_embedding_dimension(max_dimensions=15, debug=True)
embedding_dimension, false_neighbors