## How to carry out your own NILM experiment

The NILM experiment will be  on nilmtk.api as nilmtk-contrib does.

First, you should import **nilmtk.api** and **specific algorithms from nilmtk.disaggregate**

In [None]:
from nilmtk.api import API
import warnings
warnings.filterwarnings("ignore")

from nilmtk.disaggregate import SGN
from nilmtk.disaggregate import DAE
from nilmtk.disaggregate import Seq2Point
from nilmtk.disaggregate import BiLSTM
from nilmtk.disaggregate import EnerGAN
from nilmtk.disaggregate import Seq2Seq
from nilmtk.disaggregate import AttentionCNN

from nilmtk.disaggregate import MUL_DAE
from nilmtk.disaggregate import MUL_BiLSTM
from nilmtk.disaggregate import MUL_Seq2Point

In [None]:
#Convert the eco data
from nilmtk.dataset_converters import convert_eco

# Specify the timezone as a string, e.g., 'Europe/Zurich'
timezone = 'Europe/Zurich'

convert_eco(r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data\eco', r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data_hdf5\eco\data_cleaned.h5',timezone)

In [None]:
#Convert the refit data
from nilmtk.dataset_converters import convert_refit

convert_refit(r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data\refit\RAW_DATA_CLEAN', r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data_hdf5\refit\data_refit.h5')

### Metadata information regarding appliances in each building

Then, specify the **experiment configuration** as described in annotation.

In [None]:
#Data Exploration
from nilmtk import DataSet
import nilmtk
from nilmtk.utils import print_dict

nilmtk.Appliance.allow_synonyms = False

refit = DataSet(r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data_hdf5\refit\data_refit.h5')

elec = refit.buildings[8].elec
elec

### Visualizing the training data (REFIT)

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Function to read the appliance names mapping from the HOUSES_Labels.txt
def read_appliance_mappings(filepath):
    # Read the mapping from a file
    mappings_df = pd.read_csv(filepath, index_col='House_id')
    # Create a dictionary to hold mappings for each house
    appliance_mappings = {}
    for index, row in mappings_df.iterrows():
        # Iterate over each appliance and add to the house's mapping
        appliance_mappings[index] = {
            f'Appliance{i}': row[f'Appliance{i}']
            for i in range(1, 10) # Assuming there are 9 appliances max
            if pd.notnull(row[f'Appliance{i}'])
        }
    return appliance_mappings

# Load the appliance mappings
appliance_mappings = read_appliance_mappings(r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data\refit\RAW_DATA_CLEAN\HOUSES_Labels')

# Function to plot for a specific house_id
def plot_house(house_id, appliance_mappings, data_filepath):
    # Load the CSV file, assuming 'Time' is your datetime column
    df = pd.read_csv(data_filepath)

    # Convert the 'Time' column to datetime and set as index
    df['Time'] = pd.to_datetime(df['Time'])
    df.set_index('Time', inplace=True)

    # Get the mapping for the specified house_id
    appliance_mapping = appliance_mappings[house_id]

    # Downsample the data to a reasonable frequency, e.g., hourly means
    df_resampled = df.resample('H').mean()

    # Number of appliances plus one for the aggregate data
    num_plots = len(appliance_mapping) + 1

    # Create a subplot for each appliance plus the aggregate data
    fig = make_subplots(rows=num_plots, cols=1, shared_xaxes=False, vertical_spacing=0.03,
                        subplot_titles=['Aggregate'] + list(appliance_mapping.values()))

    # Add the 'Aggregate' line chart
    fig.add_trace(
        go.Scatter(x=df_resampled.index, y=df_resampled['Aggregate'], mode='lines', name='Aggregate'),
        row=1, col=1
    )

    # Add traces for each appliance
    row_index = 2
    for key, value in appliance_mapping.items():
        fig.add_trace(
            go.Scatter(x=df_resampled.index, y=df_resampled[key], mode='lines', name=value),
            row=row_index, col=1
        )
        row_index += 1

    # Update layout for the figure
    fig.update_layout(
        template='plotly_dark',
        font=dict(family="Arial", size=12, color='white'),
        height=200 * num_plots,
        width=1000,
        title_text=f"Energy Consumption Visualization for House {house_id}",
        showlegend=True
    )

    # Update x-axis and y-axis titles
    for i in range(num_plots):
        axis_y = f'yaxis{i+1}' if i > 0 else 'yaxis'        
        fig['layout'][axis_y].update(title='Power (W)')

    # Show the interactive plot
    fig.show()

# Usage
house_id = 8  # Replace with your house_id
data_filepath = r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data\refit\RAW_DATA_CLEAN\CLEAN_House'+str(house_id)+'.csv'
plot_house(house_id, appliance_mappings, data_filepath)

### Visualizing the testing data (ECO)

In [None]:
import os
import pandas as pd
import re
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta

import os
import re
import pandas as pd
from datetime import datetime

def append_smart_meter_data(directory):
    # Define the column names based on the provided information
    col_names = [
        'powerallphases', 'powerl1', 'powerl2', 'powerl3', 'currentneutral',
        'currentl1', 'currentl2', 'currentl3', 'voltagel1', 'voltagel2',
        'voltagel3', 'phaseanglevoltagel2l1', 'phaseanglevoltagel3l1',
        'phaseanglecurrentvoltagel1', 'phaseanglecurrentvoltagel2',
        'phaseanglecurrentvoltagel3'
    ]

    # Initialize an empty DataFrame to hold all the data
    all_data = pd.DataFrame()

    # List all files in the directory
    files = os.listdir(directory)
    csv_files = [f for f in files if f.endswith('.csv')]

    # Sort files to maintain chronological order
    csv_files.sort()

    # Read each file and append the data to the all_data DataFrame
    for file_name in csv_files:
        file_path = os.path.join(directory, file_name)
        
        # Extract the date from the file name (assuming the format is YYYY-MM-DD.csv)
        date_str = file_name.split('.')[0]
        date = datetime.strptime(date_str, '%Y-%m-%d')

        # Read the CSV file without headers
        daily_data_smart = pd.read_csv(file_path, header=None, names=col_names)

        # Create a timestamp for each second of the day
        timestamps = pd.date_range(date, periods=len(daily_data_smart), freq='S')

        # Assign the timestamp as the index and create a 'timestamp' column
        daily_data_smart.index = timestamps
        daily_data_smart['timestamp'] = timestamps

        # Resample the data to hourly averages and keep the first timestamp of the hour
        daily_data_smart = daily_data_smart.resample('H').agg({**{col: 'mean' for col in col_names}, 'timestamp': 'first'})

        # Append the daily data to the all_data DataFrame
        all_data = pd.concat([all_data, daily_data_smart])

    # Reset index in the final DataFrame
    all_data.reset_index(drop=True, inplace=True)
    return all_data

# Updated appliance mapping function that reads from a .txt file
def map_appliance_data_from_txt(file_path):
    with open(file_path, 'r') as file:
        plug_data_str = file.read()
    return map_appliance_data(plug_data_str)

# Function to parse the appliance data
def map_appliance_data(plug_data_str):
    lines = plug_data_str.strip().split('\n')
    appliance_data = {}
    for line in lines:
        match = re.match(r"(\d+):\s+([A-Za-z\s]+)\(no\. days:\s+(\d+),\s+coverage:\s+(\d+\.\d+)%\)", line)
        if match:
            appliance_number = match.group(1)
            appliance_type = match.group(2)
            no_days = int(match.group(3))
            coverage = float(match.group(4))
            appliance_data[appliance_number] = {
                "device": appliance_type.strip(),
                "no_days": no_days,
                "coverage": coverage
            }
    return appliance_data

# Function to read all appliance data from a building
def read_all_appliance_data(building_path, num_appliances):
    appliance_data = {}
    for i in range(1, num_appliances + 1):
        appliance_number = str(i).zfill(2)  # Assuming appliance number is zero-padded
        appliance_path = os.path.join(building_path, appliance_number)
        appliance_data[appliance_number] = read_appliance_data(appliance_path)
    return appliance_data

# Function to read appliance level data from .csv files and add timestamps
def read_appliance_data(appliance_path):
    file_list = [f for f in os.listdir(appliance_path) if f.endswith('.csv')]
    data_frames = []
    for file_name in file_list:
        # Extract the date from the file name (assuming the format is YYYY-MM-DD.csv)
        date_str = file_name.split('.')[0]
        date = datetime.strptime(date_str, '%Y-%m-%d')

        # Read the CSV file without headers
        daily_data = pd.read_csv(os.path.join(appliance_path, file_name), header=None)
        
        # Create a timestamp for each second of the day
        timestamps = pd.date_range(date, periods=86400, freq='S')

        # Give names to the columns
        daily_data.columns = ['power']
        daily_data['timestamp'] = timestamps

        # Set the timestamp as the index
        daily_data.set_index('timestamp', inplace=True)

        # Resample the data to hourly averages
        daily_data = daily_data.resample('H').mean()

        # Append the daily data to the list of dataframes
        data_frames.append(daily_data)

    # Concatenate all daily dataframes
    appliance_data = pd.concat(data_frames)
    
    # Reset index in the final DataFrame if you want the timestamp as a column
    appliance_data.reset_index(inplace=True)
    
    return appliance_data

# Function to plot aggregated and appliance data
import plotly.graph_objs as go
from plotly.subplots import make_subplots

def plot_aggregated_and_appliance_data(aggregated_data, appliance_data_dict, appliance_mapping):
    # Number of appliances
    num_appliances = len(appliance_data_dict)
    
    # Create subplots: one for aggregated data and one for each appliance
    fig = make_subplots(rows=num_appliances + 1, cols=1, 
                        subplot_titles=['Aggregated Data'] + [f'{v["device"]}' for k, v in appliance_mapping.items()],
                        shared_xaxes=False, vertical_spacing=0.03)
    
    # Extract device names into a simple mapping dictionary
    device_name_mapping = {key: value['device'] for key, value in appliance_mapping.items()}

    # Add aggregated data to the first subplot
    fig.add_trace(
        go.Scatter(x=aggregated_data['timestamp'], y=aggregated_data['powerallphases'], 
                   name='Aggregated', mode='lines'),
        row=1, col=1
    )

    # Add appliance data to their respective subplots
    appliance_row = 2
    for appliance_number, data in appliance_data_dict.items():
        fig.add_trace(
            go.Scatter(x=data.iloc[:, 0], y=data.iloc[:, 1], 
                       name=device_name_mapping[appliance_number], mode='lines'),
            row=appliance_row, col=1
        )
        appliance_row += 1  # Move to the next subplot for the next appliance

    # Update xaxis properties for the last subplot
    fig.update_xaxes(title_text="Time", row=num_appliances + 1, col=1)

    # Update yaxis properties for all subplots
    for r in range(1, num_appliances + 2):
        fig.update_yaxes(title_text="Power (Watts)", row=r, col=1)

    # Update layout to include 'Arial' font and adjust the template
    fig.update_layout(
        height=200 * (num_appliances + 1),  # adjust the height based on the number of appliances
        width=1000,
        title_text="Aggregated and Appliance Power Data",
        showlegend=True,
        template="plotly_dark",
        font=dict(family="Arial", size=12, color="white")
    )

    # Show figure
    fig.show()

# Example usage:
building_number = "01"
metadata_path = os.path.join(r"C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data\eco", building_number + '_doc.txt')

# Map appliance data using the updated function
appliance_mapping = map_appliance_data_from_txt(metadata_path)

# Get the number of appliances from the last appliance mapped
num_appliances = int(list(appliance_mapping.keys())[-1])

# Path to the appliance data directory
building_appliance_data_path = os.path.join(r"C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data\eco", building_number + '_plugs_csv', building_number)

# Path to the smart meter data directory
smart_meter_data_path = os.path.join(r"C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data\eco\01_sm_csv", building_number)

# Read smart_meter_data data
smart_meter_data = append_smart_meter_data(smart_meter_data_path)

# Read all appliance data
all_appliance_data = read_all_appliance_data(building_appliance_data_path, num_appliances)

# Plot the data
plot_aggregated_and_appliance_data(smart_meter_data, all_appliance_data,  appliance_mapping)

In [None]:
e = {
  # Specify power type, sample rate and disaggregated appliance
  'power': {
    'mains': ['active'],
    'appliance': ['active']
  },
  'sample_rate': 900,
  'appliances': {'washing machine'},
  # Universally no pre-training
  'pre_trained':False,
  # Specify algorithm hyperparameters
  'methods':{"Seq2Point":Seq2Point({'n_epochs':2,'batch_size':512}), "SGN":SGN({'n_epochs':2,'batch_size':512}), "DAE":DAE({'n_epochs':2,'batch_size':512}), "BiLSTM":BiLSTM({'n_epochs':2,'batch_size':512})},
  # Specify train and test data
 'train': {
    'datasets':{
      'refit': {
        'path': r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data_hdf5\refit\data_refit.h5',
        'buildings': {
              8: {
                    'start_time': '2014-03-07',
                    'end_time': '2015-03-06'
              }
          }
        },      
    }
  },
  'test': {
    'datasets':{
      'refit': {
        'path': r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\data_hdf5\refit\data_refit.h5',
        'buildings': {
              8: {
                    'start_time': '2015-03-07',
                    'end_time': '2015-04-06'
              }
          }
        },
        
  },
  # Specify evaluation metrics
  'metrics': ['mae', 'f1score', 'recall', 'precision', 'nep' , 'omae', 'MCC', ]
  }
}

API(e)

From above results, we can know the **inferring and training time consumption** of specific neural network on the given dataset as well as **emory usage**. **Classification metrics(namely recall, precision, and MCC)** and **regression metrics(mae, omae, nep)** will also be reported.And **detailed energy disaggregation result and ground truth** can be found in .csv file under the same folder.

In [None]:
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Import the data from Excel
file_path = r'C:\Users\char\Documents\GitHub\NeuralNILM_Pytorch\Results_v1.4.xlsx'  # Make sure to use the correct path to your Excel file

# Load data from each sheet
df_mae = pd.read_excel(file_path, sheet_name='mae', engine='openpyxl')
df_f1 = pd.read_excel(file_path, sheet_name='f1', engine='openpyxl')
df_nep = pd.read_excel(file_path, sheet_name='nep', engine='openpyxl')

# Assuming 'Validation Type' column exists and contains 'Intra' or 'Cross'
# Filter the dataframes based on the type of validation
df_mae_intra = df_mae[df_mae['Type'] == 1]
df_f1_intra = df_f1[df_f1['Type'] == 1]
df_nep_intra = df_nep[df_nep['Type'] == 1]

df_mae_cross = df_mae[df_mae['Type'] == 2]
df_f1_cross = df_f1[df_f1['Type'] == 2]
df_nep_cross = df_nep[df_nep['Type'] == 2]

# Assuming the structure of the sheets is the same, we can create a function
def create_metric_subplot(df_intra, df_cross, metric_name, appliances, algorithms, show_legend=False):
    traces = []
    for appliance in appliances:
        intra_data = df_intra[df_intra['Appliance'] == appliance]
        cross_data = df_cross[df_cross['Appliance'] == appliance]
        for algo in algorithms:
            color = algorithm_colors.get(algo, 'rgba(0, 0, 0, 0.8)')
            # Intra-dataset traces
            traces.append(
                go.Scatter(
                    x=intra_data['Sample Period'],
                    y=intra_data[algo],
                    mode='lines+markers',
                    name=f"{algo} Intra",
                    legendgroup=f"{algo} Intra",
                    line=dict(color=color, width=2, dash='solid'),
                    showlegend=show_legend
                )
            )
            # Cross-dataset traces
            traces.append(
                go.Scatter(
                    x=cross_data['Sample Period'],
                    y=cross_data[algo],
                    mode='lines+markers',
                    name=f"{algo} Cross",
                    legendgroup=f"{algo} Cross",
                    line=dict(color=color, width=2, dash='dot'),
                    showlegend=show_legend
                )
            )
    return traces

def create_figure(df_mae_intra, df_mae_cross, df_f1_intra, df_f1_cross, df_nep_intra, df_nep_cross, appliances, algorithms, title):
    # Transpose the layout so we have metrics in rows and appliances in columns
    fig = make_subplots(
        rows=3,  # One row for each metric
        cols=len(appliances),  # One column for each appliance
        subplot_titles=[f"{appliance}" for appliance in appliances],
        shared_yaxes=True,
        horizontal_spacing=0.03,
        vertical_spacing=0.04
    )

    # Add traces to the figure with the transposed layout
    metrics_dfs_intra = {'MAE': df_mae_intra, 'F1': df_f1_intra, 'NEP': df_nep_intra}
    metrics_dfs_cross = {'MAE': df_mae_cross, 'F1': df_f1_cross, 'NEP': df_nep_cross}
    for row, metric in enumerate(['MAE', 'F1', 'NEP'], start=1):
        for col, appliance in enumerate(appliances, start=1):
            show_legend = (row == 1 and col == 1)  # Show legend only in the first subplot
            df_intra = metrics_dfs_intra[metric]
            df_cross = metrics_dfs_cross[metric]
            for trace in create_metric_subplot(df_intra, df_cross, metric, [appliance], algorithms, show_legend):
                fig.add_trace(trace, row=row, col=col)

    # Update layout for the figure
    fig.update_layout(
        title_text=title,
        height=250 * 3,  # Fixed height for 3 metrics
        width=200 * len(appliances),  # Adjust the width depending on the number of appliances
        font=dict(family="Times New Roman",size=12),
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.05,  # Adjust legend position
            xanchor="center",
            x=0.5
        ),
        template="simple_white"
    )
    
    # Loop through each annotation (subplot title) and set the font size
    for annotation in fig.layout.annotations:
        annotation.font = dict(
        family="Times New Roman",
        size=12)
        
    # Define your y-axis labels
    y_axis_labels = ['<b>MAE</b> (W)', '<b>F1-score</b> (-)', '<b>NEP</b> (-)']  # Add more labels as needed
    
    # Update y-axes with labels
    for i, label in enumerate(y_axis_labels, start=1):
        fig.update_yaxes(title_text=label, row=i, col=1)

    return fig

algorithms = ['Seq2Point', 'SGN', 'DAE', 'BiLSTM']
title = ""
algorithm_colors = {
    'Seq2Point': 'orange',
    'DAE': 'red',
    'SGN': 'blue',
    'BiLSTM': 'green'}

# Call create_figure with both intra and cross dataset validation dataframes
fig = create_figure(
    df_mae_intra, df_mae_cross,
    df_f1_intra, df_f1_cross,
    df_nep_intra, df_nep_cross,
    df_mae_intra['Appliance'].unique(),
    algorithms,
    title  # or title_cross, depending on your title preference
)

fig.show()# Get unique appliances and algorithms for subplot dimensions