# Disaggregation of appliances from generated load profiles

In [16]:
# General libraries
import numpy as np
import pandas as pd

pd.set_option("display.precision", 10)

%matplotlib inline

In [17]:
DATA_PATH = "data/6_months"

In [18]:
df_import = pd.read_csv(
    f"{DATA_PATH}/DeviceProfiles.HH1.Electricity.csv",
    delimiter=";",
    parse_dates=["Time"],
)
df_import.head()

KeyboardInterrupt: 

In [None]:
appliance_columns = [
    "HH1 - Kitchen - Single Stove Plate [kWh]",
    "HH1 - Kitchen - Nespresso Coffee Machine, Single Cup [kWh]",
    "HH1 - Kitchen - Microwave / Panasonic NN 5259 [kWh]",
    "HH1 - Kitchen - Dishwasher NEFF SD6P1F (2011) [kWh]",
    "HH1 - Kitchen - Washing Machine / Bosch WAE 28143 [kWh]",
]

In [7]:
df_selected = df_import[["Electricity.Timestep", "Time"] + appliance_columns].copy()
df_selected

Unnamed: 0,Electricity.Timestep,Time,HH1 - Kitchen - Single Stove Plate [kWh],"HH1 - Kitchen - Nespresso Coffee Machine, Single Cup [kWh]",HH1 - Kitchen - Microwave / Panasonic NN 5259 [kWh],HH1 - Kitchen - Dishwasher NEFF SD6P1F (2011) [kWh],HH1 - Kitchen - Washing Machine / Bosch WAE 28143 [kWh]
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
1,1,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
2,2,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
3,3,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
4,4,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
15638395,15638395,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0
15638396,15638396,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0
15638397,15638397,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0
15638398,15638398,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0


In [8]:
def count_non_zero_rows(dataframe, column):
    return len(dataframe) - dataframe[column].isin([0]).sum()

In [9]:
count_non_zero_rows(df_selected, appliance_columns[1])

49353

In [10]:
target_names = ["Stove", "Coffee_machine", "Microwave", "Dishwasher", "Washing_machine"]
name_mappings = dict(zip(appliance_columns, target_names))
name_mappings

{'HH1 - Kitchen - Single Stove Plate [kWh]': 'Stove',
 'HH1 - Kitchen - Nespresso Coffee Machine, Single Cup [kWh]': 'Coffee_machine',
 'HH1 - Kitchen - Microwave / Panasonic NN 5259 [kWh]': 'Microwave',
 'HH1 - Kitchen - Dishwasher NEFF SD6P1F (2011) [kWh]': 'Dishwasher',
 'HH1 - Kitchen - Washing Machine / Bosch WAE 28143 [kWh]': 'Washing_machine'}

In [11]:
df = df_selected.rename(columns=name_mappings).copy()
# del(df_selected)

In [12]:
df.head(61)

Unnamed: 0,Electricity.Timestep,Time,Stove,Coffee_machine,Microwave,Dishwasher,Washing_machine
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
1,1,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
2,2,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
3,3,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
4,4,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
56,56,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
57,57,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
58,58,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
59,59,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0


## Create power column

In [13]:
df["Power"] = df[list(name_mappings.values())].sum(axis=1)
df

Unnamed: 0,Electricity.Timestep,Time,Stove,Coffee_machine,Microwave,Dishwasher,Washing_machine,Power
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
2,2,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
3,3,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
4,4,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
15638395,15638395,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0
15638396,15638396,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0
15638397,15638397,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0
15638398,15638398,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0


## Fix timestamp seconds

In [12]:
df["Time"] = [
    dt.replace(second=(df["Electricity.Timestep"][i] % 60))
    for i, dt in enumerate(df["Time"])
]
df

Unnamed: 0,Electricity.Timestep,Time,Stove,Coffee_machine,Microwave,Dishwasher,Washing_machine
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0
1,1,2021-01-01 00:00:01,0.0,0.0,0.0,0.0,0.0
2,2,2021-01-01 00:00:02,0.0,0.0,0.0,0.0,0.0
3,3,2021-01-01 00:00:03,0.0,0.0,0.0,0.0,0.0
4,4,2021-01-01 00:00:04,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
1123195,1123195,2021-01-13 23:59:55,0.0,0.0,0.0,0.0,0.0
1123196,1123196,2021-01-13 23:59:56,0.0,0.0,0.0,0.0,0.0
1123197,1123197,2021-01-13 23:59:57,0.0,0.0,0.0,0.0,0.0
1123198,1123198,2021-01-13 23:59:58,0.0,0.0,0.0,0.0,0.0


## Export data for further preprocessing

In [None]:
df.to_csv(f"{DATA_PATH}/data_reduced.csv")

# End of preprocessing for training

# Create noisy inputs for evaluation
Training is done on the sum of all individual device load profiles. Ideally, the model should recognize devices even if noise is introduced by other appliances to create a more realistic setting

In [5]:
df_sum_profiles = pd.read_csv(
    f"{DATA_PATH}/SumProfiles.Electricity.csv",
    delimiter=";",
)
df_sum_profiles

Unnamed: 0,Electricity.Timestep,Time,Sum [kWh]
0,0,01.01.2021 00:00,0.0000230970
1,1,01.01.2021 00:00,0.0000269284
2,2,01.01.2021 00:00,0.0000253307
3,3,01.01.2021 00:00,0.0000237392
4,4,01.01.2021 00:00,0.0000221426
...,...,...,...
15638395,15638395,30.06.2021 23:59,0.0000450566
15638396,15638396,30.06.2021 23:59,0.0000462342
15638397,15638397,30.06.2021 23:59,0.0000452220
15638398,15638398,30.06.2021 23:59,0.0000417102


In [6]:
df_sum_profiles = df_sum_profiles.drop(columns=["Electricity.Timestep", "Time"])
df_sum_profiles

Unnamed: 0,Sum [kWh]
0,0.0000230970
1,0.0000269284
2,0.0000253307
3,0.0000237392
4,0.0000221426
...,...
15638395,0.0000450566
15638396,0.0000462342
15638397,0.0000452220
15638398,0.0000417102


In [14]:
df["Power"] = df_sum_profiles["Sum [kWh]"]
df

Unnamed: 0,Electricity.Timestep,Time,Stove,Coffee_machine,Microwave,Dishwasher,Washing_machine,Power
0,0,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0000230970
1,1,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0000269284
2,2,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0000253307
3,3,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0000237392
4,4,2021-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0000221426
...,...,...,...,...,...,...,...,...
15638395,15638395,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0000450566
15638396,15638396,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0000462342
15638397,15638397,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0000452220
15638398,15638398,2021-06-30 23:59:00,0.0,0.0,0.0,0.0,0.0,0.0000417102


In [15]:
df.to_csv(f"{DATA_PATH}/data_reduced_noisy.csv")