In [36]:
# Config Data Structure
import pandas as pd
from datetime import datetime as dt
from pymongo import MongoClient as mc
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from collections import Counter
import pprint as pp
import random as ran
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN, LSTM
from tensorflow.keras.utils import to_categorical
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(rc={'figure.figsize': (15.7, 13.27)})
plt.rcParams['figure.figsize'] = 15.7,13.27
plt.rcParams['font.family'] = 'AppleGothic'

mongo_uri = "mongodb://localhost:27017"
client = mc(mongo_uri)
keti_db = client.keti_pattern_recognition

household_col = keti_db.household_info
cluster_col = keti_db.cluster_info
weather_col = keti_db.weather_info

In [37]:
uid = "아파트2-2-302"

# TimeSlot In
hh_db_datas = household_col.find_one({"uid": uid})
hh_db_datas

uid_in, timeslot = hh_db_datas['uid'], hh_db_datas['timeslot']

datelist = [
    dt.strptime(ts['time'], "%Y-%m-%d T%H:%M %z").date()
    for ts in timeslot
]
datelist = list(set(datelist))
datelist.sort()

ts_datas = {}
start_idx = 0
end_idx = 96
enl = 1

for date in datelist:
    ts_datas[date] = [ts['power'] *
                      enl for ts in timeslot[start_idx:end_idx]]
    start_idx = end_idx
    end_idx = end_idx + 96

ts_datas = pd.DataFrame(ts_datas).T
hh_datas = ts_datas.reset_index().copy()

hh_datas.rename(columns={"index": "date"}, inplace=True)
hh_datas['date'] = pd.to_datetime(hh_datas['date'])

hh_datas.set_index('date', inplace=True)

# Merging
merge_size = 4
merge_datas = pd.DataFrame()
for date in hh_datas.index:
    merge_ts = []
    new_ts_size = round(len(hh_datas.loc[date]) / merge_size)
    
    for idx in range(0,new_ts_size):
        merge_ts.append(
            round(hh_datas.loc[date][merge_size * idx:merge_size * (idx + 1)].sum(), 3) * 1000
        )

    if len(list(set(merge_ts))) >= 3:
        merge_datas[date] = merge_ts
#     merge_datas[date] = merge_ts

    
def get_season_no(month):
    if month in [3,4,5]:
        return 1 # 봄
    elif month in [6,7,8]:
        return 2 # 여름
    elif month in [9,10,11]:
        return 3 # 가을
    elif month in [12,1,2]:
        return 4 # 겨울
    
separate_datas_col = list(filter(lambda data: get_season_no(data.month) == 3, merge_datas.columns))
# print(separate_datas_col)
merge_datas = merge_datas[separate_datas_col]
merge_datas

Unnamed: 0,2018-09-01,2018-09-02,2018-09-03,2018-09-04,2018-09-05,2018-09-06,2018-09-07,2018-09-08,2018-09-09,2018-09-10,...,2018-11-21,2018-11-22,2018-11-23,2018-11-24,2018-11-25,2018-11-26,2018-11-27,2018-11-28,2018-11-29,2018-11-30
0,238.0,176.0,352.0,352.0,205.0,262.0,214.0,210.0,167.0,152.0,...,720.0,734.0,668.0,801.0,732.0,1021.0,719.0,742.0,712.0,769.0
1,228.0,122.0,352.0,306.0,186.0,169.0,166.0,145.0,158.0,150.0,...,718.0,711.0,657.0,760.0,664.0,1019.0,969.0,925.0,706.0,691.0
2,224.0,136.0,350.0,304.0,203.0,168.0,146.0,180.0,239.0,140.0,...,888.0,718.0,763.0,709.0,630.0,1005.0,1010.0,676.0,680.0,683.0
3,236.0,124.0,335.0,190.0,188.0,166.0,146.0,202.0,160.0,176.0,...,931.0,1152.0,518.0,1174.0,687.0,1367.0,822.0,718.0,661.0,672.0
4,520.0,121.0,346.0,172.0,194.0,146.0,161.0,187.0,154.0,210.0,...,804.0,859.0,619.0,798.0,1142.0,1477.0,850.0,732.0,668.0,678.0
5,324.0,117.0,338.0,182.0,195.0,126.0,161.0,198.0,139.0,201.0,...,720.0,809.0,636.0,696.0,791.0,1530.0,860.0,756.0,706.0,751.0
6,409.0,220.0,329.0,256.0,210.0,136.0,261.0,154.0,146.0,223.0,...,688.0,743.0,677.0,246.0,1209.0,1600.0,698.0,935.0,659.0,733.0
7,331.0,245.0,345.0,164.0,248.0,121.0,195.0,115.0,181.0,207.0,...,915.0,694.0,1133.0,235.0,1158.0,1224.0,592.0,983.0,829.0,750.0
8,496.0,249.0,242.0,128.0,227.0,150.0,185.0,139.0,203.0,206.0,...,780.0,737.0,784.0,984.0,1118.0,1166.0,588.0,802.0,1055.0,727.0
9,445.0,719.0,241.0,138.0,206.0,126.0,208.0,122.0,221.0,289.0,...,600.0,585.0,702.0,840.0,1053.0,1171.0,587.0,693.0,1014.0,728.0


In [38]:
cluster_info = cluster_col.find_one({"uid": uid})
infos = cluster_info['info']
filter_idx_list = [dt.strptime(info['date'], "%Y-%m-%d") for info in infos]

# clustering에서 아웃라이어 판정되서 제거됐던 데이터 필터링
merge_datas = merge_datas[filter_idx_list]
merge_datas

Unnamed: 0,2018-09-01,2018-09-03,2018-09-04,2018-09-05,2018-09-06,2018-09-07,2018-09-08,2018-09-09,2018-09-10,2018-09-11,...,2018-11-19,2018-11-20,2018-11-21,2018-11-22,2018-11-23,2018-11-24,2018-11-27,2018-11-28,2018-11-29,2018-11-30
0,238.0,352.0,352.0,205.0,262.0,214.0,210.0,167.0,152.0,243.0,...,847.0,847.0,720.0,734.0,668.0,801.0,719.0,742.0,712.0,769.0
1,228.0,352.0,306.0,186.0,169.0,166.0,145.0,158.0,150.0,177.0,...,1061.0,920.0,718.0,711.0,657.0,760.0,969.0,925.0,706.0,691.0
2,224.0,350.0,304.0,203.0,168.0,146.0,180.0,239.0,140.0,159.0,...,1132.0,852.0,888.0,718.0,763.0,709.0,1010.0,676.0,680.0,683.0
3,236.0,335.0,190.0,188.0,166.0,146.0,202.0,160.0,176.0,135.0,...,1099.0,855.0,931.0,1152.0,518.0,1174.0,822.0,718.0,661.0,672.0
4,520.0,346.0,172.0,194.0,146.0,161.0,187.0,154.0,210.0,122.0,...,1110.0,874.0,804.0,859.0,619.0,798.0,850.0,732.0,668.0,678.0
5,324.0,338.0,182.0,195.0,126.0,161.0,198.0,139.0,201.0,167.0,...,1071.0,897.0,720.0,809.0,636.0,696.0,860.0,756.0,706.0,751.0
6,409.0,329.0,256.0,210.0,136.0,261.0,154.0,146.0,223.0,184.0,...,1583.0,903.0,688.0,743.0,677.0,246.0,698.0,935.0,659.0,733.0
7,331.0,345.0,164.0,248.0,121.0,195.0,115.0,181.0,207.0,181.0,...,1109.0,644.0,915.0,694.0,1133.0,235.0,592.0,983.0,829.0,750.0
8,496.0,242.0,128.0,227.0,150.0,185.0,139.0,203.0,206.0,210.0,...,580.0,631.0,780.0,737.0,784.0,984.0,588.0,802.0,1055.0,727.0
9,445.0,241.0,138.0,206.0,126.0,208.0,122.0,221.0,289.0,185.0,...,714.0,634.0,600.0,585.0,702.0,840.0,587.0,693.0,1014.0,728.0


In [65]:
# Cluster Pattern 생성
label_list = [info['label'] for info in infos]
label_list = list(set(label_list))
label_list

cluster_dict = dict()

dates = [
    dt.strptime(filter_info['date'], "%Y-%m-%d") for filter_info in
    list(filter(lambda info: info['label'] == label_list[3], infos))
]
merge_datas[dates]

Unnamed: 0,2018-09-04,2018-09-05,2018-09-06,2018-09-07,2018-09-08,2018-09-09,2018-09-10,2018-09-11,2018-09-12,2018-09-13,...,2018-10-04,2018-10-05,2018-10-08,2018-10-10,2018-10-11,2018-10-12,2018-10-13,2018-10-14,2018-10-18,2018-10-19
0,352.0,205.0,262.0,214.0,210.0,167.0,152.0,243.0,200.0,167.0,...,294.0,147.0,154.0,107.0,98.0,102.0,170.0,115.0,354.0,217.0
1,306.0,186.0,169.0,166.0,145.0,158.0,150.0,177.0,186.0,119.0,...,294.0,129.0,141.0,104.0,104.0,101.0,169.0,131.0,344.0,188.0
2,304.0,203.0,168.0,146.0,180.0,239.0,140.0,159.0,180.0,138.0,...,284.0,148.0,201.0,99.0,98.0,102.0,165.0,108.0,567.0,190.0
3,190.0,188.0,166.0,146.0,202.0,160.0,176.0,135.0,198.0,133.0,...,275.0,193.0,124.0,103.0,102.0,98.0,169.0,126.0,461.0,184.0
4,172.0,194.0,146.0,161.0,187.0,154.0,210.0,122.0,184.0,139.0,...,164.0,170.0,103.0,103.0,99.0,101.0,174.0,131.0,304.0,216.0
5,182.0,195.0,126.0,161.0,198.0,139.0,201.0,167.0,156.0,157.0,...,132.0,118.0,119.0,107.0,99.0,97.0,261.0,127.0,283.0,187.0
6,256.0,210.0,136.0,261.0,154.0,146.0,223.0,184.0,186.0,151.0,...,142.0,326.0,103.0,109.0,102.0,102.0,186.0,125.0,291.0,278.0
7,164.0,248.0,121.0,195.0,115.0,181.0,207.0,181.0,192.0,300.0,...,154.0,265.0,105.0,107.0,98.0,97.0,164.0,156.0,294.0,364.0
8,128.0,227.0,150.0,185.0,139.0,203.0,206.0,210.0,187.0,245.0,...,220.0,189.0,576.0,105.0,99.0,101.0,247.0,198.0,260.0,367.0
9,138.0,206.0,126.0,208.0,122.0,221.0,289.0,185.0,184.0,292.0,...,184.0,214.0,189.0,100.0,99.0,102.0,310.0,132.0,201.0,459.0
