In [15]:
import pandas as pd
import os
from sklearn.impute import SimpleImputer

- Got data from https://traces.cs.umass.edu/index.php/Smart/Smart
- Used 2016 data since it has consistent time gaps
- Convert from minute recordings of kW to kWh by grouping by hour and taking the mean

In [93]:
data_folder = "../data/apartment/2016/"
# csv_files = [f for f in os.listdir(data_folder) if ".csv" in f]

df = pd.read_csv(f"{data_folder}Apt100_2016.csv", header=None)
df.columns = ["date", "consumption"]
df.date = pd.to_datetime(df.date).dt.floor('h')
df = df.groupby("date").mean().reset_index()
df

Unnamed: 0,date,consumption
0,2016-01-01 00:00:00,1.657284
1,2016-01-01 01:00:00,1.590161
2,2016-01-01 02:00:00,1.554063
3,2016-01-01 03:00:00,1.246576
4,2016-01-01 04:00:00,1.836924
...,...,...
8391,2016-12-15 16:00:00,1.116533
8392,2016-12-15 17:00:00,1.360014
8393,2016-12-15 18:00:00,0.954215
8394,2016-12-15 19:00:00,1.569412


In [127]:
weather_2015 = pd.read_csv("../data/apartment-weather/apartment2015.csv")
weather_2016 = pd.read_csv("../data/apartment-weather/apartment2016.csv")
weather = pd.concat([weather_2015, weather_2016])
weather = weather.drop(columns=["icon", "summary"])
weather = SimpleImputer(missing_values=pd.NA, strategy='mean').set_output(transform="pandas").fit_transform(weather)
weather['date'] = pd.to_datetime(weather.time, unit='s')
weather = weather.drop(columns="time")
weather

Unnamed: 0,temperature,humidity,visibility,apparentTemperature,pressure,windSpeed,cloudCover,windBearing,precipIntensity,dewPoint,precipProbability,date
0,11.94,0.76,9.95,2.40,1020.81,5.94,0.03,225.0,0.0,5.78,0.0,2015-01-01 05:00:00
1,13.04,0.73,9.95,3.96,1019.65,5.72,0.04,196.0,0.0,6.14,0.0,2015-01-01 06:00:00
2,15.46,0.69,9.80,7.00,1019.15,5.55,0.10,209.0,0.0,7.16,0.0,2015-01-01 07:00:00
3,16.73,0.65,9.70,6.61,1018.56,7.56,0.06,217.0,0.0,7.04,0.0,2015-01-01 08:00:00
4,16.96,0.62,9.75,6.73,1018.00,7.75,0.06,226.0,0.0,6.33,0.0,2015-01-01 09:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...
8779,29.62,0.71,10.00,24.18,1028.14,5.11,0.00,292.0,0.0,21.46,0.0,2017-01-01 00:00:00
8780,26.61,0.78,10.00,26.61,1029.15,1.60,0.00,306.0,0.0,20.81,0.0,2017-01-01 01:00:00
8781,24.42,0.82,10.00,18.56,1030.22,4.64,0.00,299.0,0.0,19.80,0.0,2017-01-01 02:00:00
8782,23.20,0.85,9.89,23.20,1031.12,2.41,0.00,221.0,0.0,19.34,0.0,2017-01-01 03:00:00


In [128]:
df_weather = weather[weather.date.isin(df.date)]
full_df = pd.merge(df, df_weather)
full_df

Unnamed: 0,date,consumption,temperature,humidity,visibility,apparentTemperature,pressure,windSpeed,cloudCover,windBearing,precipIntensity,dewPoint,precipProbability
0,2016-01-01 00:00:00,1.657284,36.24,0.68,10.00,30.36,1017.50,7.40,0.760000,281.0,0.0,26.56,0.0
1,2016-01-01 01:00:00,1.590161,36.78,0.66,10.00,31.04,1017.30,7.35,0.310000,283.0,0.0,26.68,0.0
2,2016-01-01 02:00:00,1.554063,36.55,0.64,10.00,29.70,1017.25,9.32,0.310000,278.0,0.0,25.69,0.0
3,2016-01-01 03:00:00,1.246576,36.61,0.63,10.00,31.10,1017.24,6.91,0.310000,279.0,0.0,25.33,0.0
4,2016-01-01 04:00:00,1.836924,36.27,0.62,10.00,30.06,1017.04,7.97,0.141463,278.0,0.0,24.66,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8415,2016-12-15 16:00:00,1.116533,21.10,0.37,9.73,5.24,1007.76,21.35,0.170000,293.0,0.0,-1.02,0.0
8416,2016-12-15 17:00:00,1.360014,20.04,0.41,8.09,4.71,1007.87,18.85,0.240000,298.0,0.0,0.41,0.0
8417,2016-12-15 18:00:00,0.954215,19.56,0.37,8.77,3.77,1008.44,19.70,0.400000,302.0,0.0,-2.54,0.0
8418,2016-12-15 19:00:00,1.569412,19.03,0.31,9.85,2.10,1009.23,22.57,0.290000,305.0,0.0,-6.80,0.0


In [129]:
full_df.drop(columns="date").to_numpy()

array([[ 1.65728417e+00,  3.62400000e+01,  6.80000000e-01, ...,
         0.00000000e+00,  2.65600000e+01,  0.00000000e+00],
       [ 1.59016111e+00,  3.67800000e+01,  6.60000000e-01, ...,
         0.00000000e+00,  2.66800000e+01,  0.00000000e+00],
       [ 1.55406250e+00,  3.65500000e+01,  6.40000000e-01, ...,
         0.00000000e+00,  2.56900000e+01,  0.00000000e+00],
       ...,
       [ 9.54215000e-01,  1.95600000e+01,  3.70000000e-01, ...,
         0.00000000e+00, -2.54000000e+00,  0.00000000e+00],
       [ 1.56941167e+00,  1.90300000e+01,  3.10000000e-01, ...,
         0.00000000e+00, -6.80000000e+00,  0.00000000e+00],
       [ 4.77777780e-03,  1.79400000e+01,  2.60000000e-01, ...,
         0.00000000e+00, -1.09900000e+01,  0.00000000e+00]])