In [1]:
# Config Data Structure
import pandas as pd
from datetime import datetime as dt
from pymongo import MongoClient as mc
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from collections import Counter

mongo_uri = "mongodb://localhost:27017"
client = mc(mongo_uri)
keti_db = client.keti_pattern_recognition

household_col = keti_db.household_info
weather_col = keti_db.weather_info

In [2]:
# TimeSlot In
hh_db_datas = household_col.find_one({"uid": "아파트1-104-1206"})
hh_db_datas

uid_in, timeslot = hh_db_datas['uid'], hh_db_datas['timeslot']

datelist = [
    dt.strptime(ts['time'], "%Y-%m-%d T%H:%M %z").date()
    for ts in timeslot
]
datelist = list(set(datelist))
datelist.sort()

ts_datas = {}
start_idx = 0
end_idx = 96
enl = 1

for date in datelist:
    ts_datas[date] = [ts['power'] *
                      enl for ts in timeslot[start_idx:end_idx]]
    start_idx = end_idx
    end_idx = end_idx + 96

ts_datas = pd.DataFrame(ts_datas).T
hh_datas = ts_datas.reset_index().copy()

hh_datas.rename(columns={"index": "date"}, inplace=True)
hh_datas['date'] = pd.to_datetime(hh_datas['date'])

hh_datas.set_index('date', inplace=True)

# Merging
merge_size = 4
merge_datas = pd.DataFrame()
for date in hh_datas.index:
    merge_ts = []
    new_ts_size = round(len(hh_datas.loc[date]) / merge_size)
    
    for idx in range(0,new_ts_size):
        merge_ts.append(
            hh_datas.loc[date][merge_size * idx:merge_size * (idx + 1)].sum()
        )
    merge_datas[date] = merge_ts
    
merge_datas

Unnamed: 0,2018-05-01,2018-05-02,2018-05-03,2018-05-04,2018-05-05,2018-05-06,2018-05-07,2018-05-08,2018-05-09,2018-05-10,...,2019-04-21,2019-04-22,2019-04-23,2019-04-24,2019-04-25,2019-04-26,2019-04-27,2019-04-28,2019-04-29,2019-04-30
0,0.341,0.275,0.183,0.309,0.305,0.397,0.347,0.345,0.312,0.321,...,0.182,0.06,0.063,0.066,0.149,0.164,0.13,0.29,0.056,0.045
1,0.337,0.201,0.235,0.308,0.179,0.409,0.178,0.272,0.191,0.208,...,0.209,0.038,0.049,0.062,0.052,0.063,0.046,0.267,0.053,0.044
2,0.324,0.176,0.167,0.309,0.18,0.4,0.173,0.206,0.183,0.203,...,0.197,0.05,0.041,0.046,0.067,0.065,0.042,0.244,0.062,0.059
3,0.319,0.21,0.165,0.309,0.172,0.384,0.176,0.204,0.173,0.189,...,0.194,0.06,0.064,0.051,0.054,0.039,0.06,0.276,0.063,0.058
4,0.235,0.199,0.163,0.311,0.171,0.276,0.178,0.173,0.17,0.184,...,0.046,0.054,0.061,0.064,0.041,0.056,0.033,0.232,0.061,0.033
5,0.169,0.202,0.164,0.217,0.174,0.206,0.182,0.18,0.17,0.172,...,0.066,0.035,0.035,0.064,0.066,0.063,0.054,0.135,0.087,0.054
6,0.2,0.199,0.162,0.218,0.169,0.2,0.18,0.188,0.207,0.169,...,0.068,0.062,0.057,0.047,0.063,0.071,0.059,0.093,0.05,0.059
7,0.171,0.252,0.173,0.212,0.164,0.199,0.232,0.179,0.215,0.216,...,0.049,0.06,0.063,0.049,0.039,0.046,0.028,0.093,0.046,0.049
8,0.17,0.213,0.226,0.242,0.162,0.199,0.221,0.212,0.244,0.169,...,0.05,0.04,0.046,0.064,0.059,0.062,0.059,0.066,0.051,0.04
9,0.172,0.173,0.178,0.187,0.166,0.2,0.217,0.171,0.204,0.17,...,0.141,0.05,0.046,0.063,0.065,0.049,0.047,0.089,0.06,0.058


In [14]:
# Config Training Datas

wt_db_datas = weather_col.find()
wt_datas = pd.DataFrame()
for wt in wt_db_datas:
    tmp = pd.DataFrame()
    tmp['date'] = [wt['date']]
    tmp['weather'] = [wt['weather']]
    tmp['avg_ta'] = [wt['avgTa']]
    tmp['avg_rhm'] = [wt['avgRhm']]
    
    wt_datas = pd.concat([wt_datas, tmp])

# weather 정수 인코딩
weather_count = Counter(wt_datas['weather'])
weather_integer = dict()
rank = 1
for key, count in weather_count.most_common():
    weather_integer[key] = rank
    rank += 1

wt_datas['weather_no'] = [weather_integer[weather] for weather in wt_datas['weather']]
wt_datas

def get_season_no(month):
    if month in [3,4,5]:
        return 0 # 봄
    elif month in [6,7,8]:
        return 1 # 여름
    elif month in [9,10,11]:
        return 2 # 가을
    elif month in [12,1,2]:
        return 3 # 겨울
    
# Date, Season Utils
wt_datas['season_no'] = [get_season_no(weather.month) for weather in wt_datas['date']] 
wt_datas['day_no'] = [weather.weekday() for weather in wt_datas['date']] 

sample_weather_col = ['season_no','day_no','weather_no','avg_ta', 'avg_rhm']
wt_datas[sample_weather_col]

Unnamed: 0,season_no,day_no,weather_no,avg_ta,avg_rhm
0,0,1,2,20.4,72.8
0,0,2,3,15.1,90.4
0,0,3,3,11.2,62.4
0,0,4,1,14.1,45.5
0,0,5,1,18.3,46.8
...,...,...,...,...,...
0,0,4,3,8.4,82.1
0,0,5,6,11.6,52.6
0,0,6,3,12.9,48.8
0,0,0,6,13.0,52.9


In [43]:
# Config Sample Datas - Padding

# Data Preprocessing
samples = list()
for col in merge_datas:
    timeslot = merge_datas[col].values.tolist()
    weather = wt_datas[wt_datas['date'] == col][sample_weather_col].values.tolist()[0]
    for time in range(1,25):
        samples.append(weather + timeslot[:time])
print("Tranining Sample Size : {}".format(len(samples)))

# Padding
SAMPLE_MAX_LEN = max([len(s) for s in samples])
print("Tranining Sample MAX_LEN : {}".format(SAMPLE_MAX_LEN))

Tranining Sample Size : 8760
Tranining Sample MAX_LEN : 29


In [23]:
client.close()