In [4]:
import os, math, random
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.dates as md
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import warnings

warnings.filterwarnings("ignore")

-------------------
# INIT

In [5]:
DATADIR="original_datas"
EXPORT="datas"
household_list=["building4","building5","building7"]

device_dict_4={"000D6F00029C2AEB":"total_outlets","000D6F0003561F98":"total_lights","000D6F0003562DF4":"TV_kitchen"
             ,"000D6F000353AE48":"TV_living_room","000D6F00029C2934":"fridge_w_freezer","000D6F0003562CA8":"electric_oven"
             ,"000D6F0003562D88":"PC_w_printer","000D6F0003562C9D":"washing_machine",
             "000D6F00029C1C2B":"hood"}

device_dict_5={"000D6F00029C2918":"TV_plasma","000D6F00029C286E":"lamp","000D6F00029C44E4":"toaster"
             ,"000D6F00029C46A3":"stove","000D6F00029C246A":"iron","000D6F00029C24E1":"PC_w_printer"
             ,"000D6F00029C46A6":"TV_LCD","000D6F00029C271E":"washing_machine",
             "000D6F00029C46A4":"fridge_w_freezer"}

device_dict_7={"000D6F00029C2542":"TV_w_decoder","000D6F00036BC42A":"electric_oven","000D6F0003BD8082":"dishwasher"
             ,"000D6F0003BD8C8E":"hood","000D6F0003BD76C4":"fridge_w_freezer","000D6F0003BD8103":"TV_kitchen"
             ,"000D6F0003BD8293":"ADSL_modem","000D6F0003BD6E92":"freezer",
             "000D6F0003B9C636":"PC_w_printer"}

device_dicts=[device_dict_4,device_dict_5,device_dict_7]

------------------
# Extrapolate datas

In [6]:
for idx,household in enumerate(household_list):
    print(household)
    device_code=[code for code in device_dicts[idx].keys()]
    device_name=[value for value in device_dicts[idx].values()]

    files=os.listdir(f"{DATADIR}/{household}")

    total_file=pd.read_csv(f"{DATADIR}/{household}/{files[1]}")
    total_file.drop(total_file[(total_file['timestamp'] == 'timestamp') | (total_file['timestamp'].isna())].index, inplace=True)
    total_file['timestamp'] = total_file['timestamp'].apply(lambda x: datetime.fromtimestamp(float(x)))
    total_file.set_index("timestamp", drop=True, inplace=True)
    total_file=total_file[device_code]
    total_file.columns=[device_dicts[idx][columns] for columns in total_file.columns]
    tots = len(files[2:])

    i=0
    for file in files[2:]:
        i+=1
        print(f"Current file: {i}/{tots}", end="                 \r")
        single_file=pd.read_csv(f"{DATADIR}/{household}/{file}",usecols=range(len(device_code)+1))
        single_file.drop(single_file[(single_file['timestamp'] == 'timestamp') | (single_file['timestamp'].isna())].index, inplace=True)
        single_file['timestamp'] = single_file['timestamp'].apply(lambda x: datetime.fromtimestamp(float(x)))
        single_file.set_index("timestamp", drop=True, inplace=True)
        single_file=single_file[device_code]
        single_file.columns=[device_dicts[idx][columns] for columns in single_file.columns]
        total_file=pd.concat([total_file,single_file])
    
    total_file.replace({r"^[ ]*[0-9]*[.][0-9]*[.][0-9]*[ ]*$": 0}, inplace=True, regex=True)
    total_file.replace({r"[a-zA-Z]": 0}, inplace=True, regex=True)
    total_file.fillna(0,inplace=True)
    total_file.sort_index(inplace=True)
    total_file=total_file.astype(float)
    #Data is active power (W) for every second: resample to calculate average active power (W) for every minute:
    total_file=total_file.resample("60S").mean()
    #print(total_file.head(10))
    
    if not os.path.isdir(f"{EXPORT}/{household}"):
        os.mkdir(f"{EXPORT}/{household}")

    for device in device_name:
        #convert values from W (per minute) to kWh -> FORMULA: W / 1000 * (1 / 60)
        total_file[device] = total_file[device].apply(lambda x: x/1000/60)
        total_file[device].to_csv(f"{EXPORT}/{household}/{device}.csv")
    
    print("\n")
    
print("Extraction complete!")

building4
Current file: 265/265                 

building5
Current file: 393/393                 

building7
Current file: 132/132                 

Extraction complete!
