# Disaggregation of appliances from generated load profiles

In [1]:
# General libraries
import numpy as np
import pandas as pd

pd.set_option("display.precision", 10)

%matplotlib inline

In [2]:
DATA_PATH = "data/test"

In [3]:
df_import = pd.read_csv(
    f"{DATA_PATH}/DeviceProfiles.HH1.Electricity.csv",
    delimiter=";",
    parse_dates=["Time"],
)
df_import.head()

Unnamed: 0,Electricity.Timestep,Time,"HH1 - Living room - Living Room Light (Energy Saving Lamp, 20W) [kWh]",HH1 - Kitchen - Kitchen Light (20W) [kWh],HH1 - Bath - Bathroom Light (20W) [kWh],HH1 - Bath - Bathroom Mirror Light 10 W (LED) [kWh],HH1 - Bedroom - Bedroom Light (20W) [kWh],HH1 - Children's room - Children Room Light Device (20W) [kWh],HH1 - Kitchen - AFK BM-2N [kWh],HH1 - Kitchen - Egg Cooker / Russell Hobbs 14048-56 Stylo [kWh],...,HH1 - Living room - TV Medion MD20123_DE_A [kWh].1,HH1 - Living room - CD/DVD Player / Philips DVDR 725 H [kWh].1,HH1 - Living room - Home Server 50 W [kWh],HH1 - Bath - Electric Razor Braun Cruzer 5 [kWh],HH1 - Living room - Laptop Sony Vaio SVE151G11M [kWh].1,HH1 - Living room - Router O2 Box 6431 [kWh],HH1 - Children's room - TV Medion MD20123_DE_A [kWh].1,HH1 - Children's room - Sony PlayStation 3 [kWh].1,HH1 - Kitchen - Vacuum Cleaner Robot / iRobot Roomba 555 [kWh].1,"HH1 - Vehicle Pool - Car 2, 22kW Charging Power, avg. Speed 30 km/h [kWh]"
0,0,2021-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.87e-08,3.186e-07,1.46825e-05,2.629e-07,8.054e-07,1.7839e-06,6.82e-08,1.1869e-06,9.846e-07,0
1,1,2021-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.96e-08,2.929e-07,1.26536e-05,2.629e-07,7.597e-07,1.7839e-06,8.87e-08,1.2944e-06,8.186e-07,0
2,2,2021-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.33e-08,2.887e-07,1.51636e-05,2.629e-07,8.192e-07,1.7839e-06,8.45e-08,1.2379e-06,9.079e-07,0
3,3,2021-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.08e-08,2.756e-07,1.3741e-05,2.629e-07,8.052e-07,1.7815e-06,8.01e-08,1.2559e-06,8.616e-07,0
4,4,2021-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.13e-08,3.276e-07,1.36991e-05,2.629e-07,7.128e-07,1.7815e-06,7.93e-08,1.2993e-06,8.399e-07,0


In [4]:
appliance_columns = [
    "HH1 - Kitchen - Single Stove Plate [kWh]",
    "HH1 - Kitchen - Nespresso Coffee Machine, Single Cup [kWh]",
    "HH1 - Kitchen - Microwave / Panasonic NN 5259 [kWh]",
    "HH1 - Kitchen - Dishwasher NEFF SD6P1F (2011) [kWh]",
    "HH1 - Kitchen - Washing Machine / Bosch WAE 28143 [kWh]",
]

In [5]:
df_selected = df_import[["Electricity.Timestep", "Time"] + appliance_columns].copy()
df_selected

Unnamed: 0,Electricity.Timestep,Time,HH1 - Kitchen - Single Stove Plate [kWh],"HH1 - Kitchen - Nespresso Coffee Machine, Single Cup [kWh]",HH1 - Kitchen - Microwave / Panasonic NN 5259 [kWh],HH1 - Kitchen - Dishwasher NEFF SD6P1F (2011) [kWh],HH1 - Kitchen - Washing Machine / Bosch WAE 28143 [kWh]
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
1,1,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
2,2,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
3,3,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
4,4,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
1123195,1123195,2021-01-13 23:59:00,0.0,0.0,0.0,0.0,0.0
1123196,1123196,2021-01-13 23:59:00,0.0,0.0,0.0,0.0,0.0
1123197,1123197,2021-01-13 23:59:00,0.0,0.0,0.0,0.0,0.0
1123198,1123198,2021-01-13 23:59:00,0.0,0.0,0.0,0.0,0.0


In [7]:
def count_non_zero_rows(dataframe, column):
    return len(dataframe) - dataframe[column].isin([0]).sum()

In [8]:
count_non_zero_rows(df_selected, appliance_columns[1])

4034

In [9]:
target_names = ["Stove", "Coffee_machine", "Microwave", "Dishwasher", "Washing_machine"]
name_mappings = dict(zip(appliance_columns, target_names))
name_mappings

{'HH1 - Kitchen - Single Stove Plate [kWh]': 'Stove',
 'HH1 - Kitchen - Nespresso Coffee Machine, Single Cup [kWh]': 'Coffee_machine',
 'HH1 - Kitchen - Microwave / Panasonic NN 5259 [kWh]': 'Microwave',
 'HH1 - Kitchen - Dishwasher NEFF SD6P1F (2011) [kWh]': 'Dishwasher',
 'HH1 - Kitchen - Washing Machine / Bosch WAE 28143 [kWh]': 'Washing_machine'}

In [10]:
df = df_selected.rename(columns=name_mappings).copy()
# del(df_selected)

In [11]:
df.head(61)

Unnamed: 0,Electricity.Timestep,Time,Stove,Coffee_machine,Microwave,Dishwasher,Washing_machine
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
1,1,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
2,2,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
3,3,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
4,4,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
56,56,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
57,57,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
58,58,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
59,59,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0


## Create power column

In [13]:
df["Power"] = df[list(name_mappings.values())].sum(axis=1)
df

Unnamed: 0,Electricity.Timestep,Time,Stove,Coffee_machine,Microwave,Dishwasher,Washing_machine,Target
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
2,2,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
3,3,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
4,4,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
15638395,15638395,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0
15638396,15638396,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0
15638397,15638397,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0
15638398,15638398,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0


## Fix timestamp seconds

In [12]:
df["Time"] = [
    dt.replace(second=(df["Electricity.Timestep"][i] % 60))
    for i, dt in enumerate(df["Time"])
]
df

Unnamed: 0,Electricity.Timestep,Time,Stove,Coffee_machine,Microwave,Dishwasher,Washing_machine
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
1,1,2021-01-01 00:00:01,0.0,0.0,0.0,0.0,0.0
2,2,2021-01-01 00:00:02,0.0,0.0,0.0,0.0,0.0
3,3,2021-01-01 00:00:03,0.0,0.0,0.0,0.0,0.0
4,4,2021-01-01 00:00:04,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
1123195,1123195,2021-01-13 23:59:55,0.0,0.0,0.0,0.0,0.0
1123196,1123196,2021-01-13 23:59:56,0.0,0.0,0.0,0.0,0.0
1123197,1123197,2021-01-13 23:59:57,0.0,0.0,0.0,0.0,0.0
1123198,1123198,2021-01-13 23:59:58,0.0,0.0,0.0,0.0,0.0


## Export data for further preprocessing

In [None]:
df.to_csv(f"{DATA_PATH}/data_reduced.csv")

# End of preprocessing for training

# Create noisy inputs for evaluation
Training is done on the sum of all individual device load profiles. Ideally, the model should recognize devices even if noise is introduced by other appliances to create a more realistic setting

In [18]:
df_sum_profiles = pd.read_csv(
    f"{DATA_PATH}/SumProfiles.Electricity.csv",
    delimiter=";",
)
df_sum_profiles

Unnamed: 0,Electricity.Timestep,Time,Sum [kWh]
0,0,01.01.2021 00:00,0.0000472315
1,1,01.01.2021 00:00,0.0000453972
2,2,01.01.2021 00:00,0.0000473517
3,3,01.01.2021 00:00,0.0000453586
4,4,01.01.2021 00:00,0.0000454614
...,...,...,...
1123195,1123195,13.01.2021 23:59,0.0000245179
1123196,1123196,13.01.2021 23:59,0.0000254437
1123197,1123197,13.01.2021 23:59,0.0000250264
1123198,1123198,13.01.2021 23:59,0.0000237630


In [19]:
df_sum_profiles = df_sum_profiles.drop(columns=["Electricity.Timestep", "Time"])
df_sum_profiles

Unnamed: 0,Sum [kWh]
0,0.0000472315
1,0.0000453972
2,0.0000473517
3,0.0000453586
4,0.0000454614
...,...
1123195,0.0000245179
1123196,0.0000254437
1123197,0.0000250264
1123198,0.0000237630


In [20]:
df["Power"] = df_sum_profiles["Sum [kWh]"]
df

Unnamed: 0,Electricity.Timestep,Time,Stove,Coffee_machine,Microwave,Dishwasher,Washing_machine,Target,Power
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0000472315,0.0000472315
1,1,2021-01-01 00:00:01,0.0,0.0,0.0,0.0,0.0,0.0000453972,0.0000453972
2,2,2021-01-01 00:00:02,0.0,0.0,0.0,0.0,0.0,0.0000473517,0.0000473517
3,3,2021-01-01 00:00:03,0.0,0.0,0.0,0.0,0.0,0.0000453586,0.0000453586
4,4,2021-01-01 00:00:04,0.0,0.0,0.0,0.0,0.0,0.0000454614,0.0000454614
...,...,...,...,...,...,...,...,...,...
1123195,1123195,2021-01-13 23:59:55,0.0,0.0,0.0,0.0,0.0,0.0000245179,0.0000245179
1123196,1123196,2021-01-13 23:59:56,0.0,0.0,0.0,0.0,0.0,0.0000254437,0.0000254437
1123197,1123197,2021-01-13 23:59:57,0.0,0.0,0.0,0.0,0.0,0.0000250264,0.0000250264
1123198,1123198,2021-01-13 23:59:58,0.0,0.0,0.0,0.0,0.0,0.0000237630,0.0000237630


In [25]:
df.to_csv(f"{DATA_PATH}/data_reduced_noisy.csv")