In [47]:
# Config Data Structure
import pandas as pd
from datetime import datetime as dt
from pymongo import MongoClient as mc
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from collections import Counter
import pprint as pp
import random as ran

mongo_uri = "mongodb://localhost:27017"
client = mc(mongo_uri)
keti_db = client.keti_pattern_recognition

household_col = keti_db.household_info
weather_col = keti_db.weather_info

In [48]:
# TimeSlot In
hh_db_datas = household_col.find_one({"uid": "아파트1-104-1206"})
hh_db_datas

uid_in, timeslot = hh_db_datas['uid'], hh_db_datas['timeslot']

datelist = [
    dt.strptime(ts['time'], "%Y-%m-%d T%H:%M %z").date()
    for ts in timeslot
]
datelist = list(set(datelist))
datelist.sort()

ts_datas = {}
start_idx = 0
end_idx = 96
enl = 1

for date in datelist:
    ts_datas[date] = [ts['power'] *
                      enl for ts in timeslot[start_idx:end_idx]]
    start_idx = end_idx
    end_idx = end_idx + 96

ts_datas = pd.DataFrame(ts_datas).T
hh_datas = ts_datas.reset_index().copy()

hh_datas.rename(columns={"index": "date"}, inplace=True)
hh_datas['date'] = pd.to_datetime(hh_datas['date'])

hh_datas.set_index('date', inplace=True)

# Merging
merge_size = 4
merge_datas = pd.DataFrame()
for date in hh_datas.index:
    merge_ts = []
    new_ts_size = round(len(hh_datas.loc[date]) / merge_size)
    
    for idx in range(0,new_ts_size):
        merge_ts.append(
            hh_datas.loc[date][merge_size * idx:merge_size * (idx + 1)].sum()
        )
    merge_datas[date] = merge_ts
    
merge_datas

Unnamed: 0,2018-05-01,2018-05-02,2018-05-03,2018-05-04,2018-05-05,2018-05-06,2018-05-07,2018-05-08,2018-05-09,2018-05-10,...,2019-04-21,2019-04-22,2019-04-23,2019-04-24,2019-04-25,2019-04-26,2019-04-27,2019-04-28,2019-04-29,2019-04-30
0,0.341,0.275,0.183,0.309,0.305,0.397,0.347,0.345,0.312,0.321,...,0.182,0.06,0.063,0.066,0.149,0.164,0.13,0.29,0.056,0.045
1,0.337,0.201,0.235,0.308,0.179,0.409,0.178,0.272,0.191,0.208,...,0.209,0.038,0.049,0.062,0.052,0.063,0.046,0.267,0.053,0.044
2,0.324,0.176,0.167,0.309,0.18,0.4,0.173,0.206,0.183,0.203,...,0.197,0.05,0.041,0.046,0.067,0.065,0.042,0.244,0.062,0.059
3,0.319,0.21,0.165,0.309,0.172,0.384,0.176,0.204,0.173,0.189,...,0.194,0.06,0.064,0.051,0.054,0.039,0.06,0.276,0.063,0.058
4,0.235,0.199,0.163,0.311,0.171,0.276,0.178,0.173,0.17,0.184,...,0.046,0.054,0.061,0.064,0.041,0.056,0.033,0.232,0.061,0.033
5,0.169,0.202,0.164,0.217,0.174,0.206,0.182,0.18,0.17,0.172,...,0.066,0.035,0.035,0.064,0.066,0.063,0.054,0.135,0.087,0.054
6,0.2,0.199,0.162,0.218,0.169,0.2,0.18,0.188,0.207,0.169,...,0.068,0.062,0.057,0.047,0.063,0.071,0.059,0.093,0.05,0.059
7,0.171,0.252,0.173,0.212,0.164,0.199,0.232,0.179,0.215,0.216,...,0.049,0.06,0.063,0.049,0.039,0.046,0.028,0.093,0.046,0.049
8,0.17,0.213,0.226,0.242,0.162,0.199,0.221,0.212,0.244,0.169,...,0.05,0.04,0.046,0.064,0.059,0.062,0.059,0.066,0.051,0.04
9,0.172,0.173,0.178,0.187,0.166,0.2,0.217,0.171,0.204,0.17,...,0.141,0.05,0.046,0.063,0.065,0.049,0.047,0.089,0.06,0.058


In [52]:
# Set Test Datas
# 계절별 10% 랜덤하게
def get_season_no(month):
    if month in [3,4,5]:
        return 1 # 봄
    elif month in [6,7,8]:
        return 2 # 여름
    elif month in [9,10,11]:
        return 3 # 가을
    elif month in [12,1,2]:
        return 4 # 겨울
    
test_merge_datas = pd.DataFrame();

for i in range(1,5):
    filter_list = list(filter(lambda date: get_season_no(date.month) == i, merge_datas.columns))
    test_list_idx = list()
    while True:
        filter_data = filter_list[ran.randrange(0,len(filter_list))]
        if filter_data not in test_list_idx:
            test_list_idx.append(filter_data)
            
        if len(test_list_idx) >= (len(filter_list) * 15 / 100):
            break;
    test_merge_datas = pd.concat([test_merge_datas, merge_datas[test_list_idx]], axis=1)
    merge_datas.drop(test_list_idx, axis=1, inplace=True)

test_merge_datas

Unnamed: 0,2019-04-27,2019-04-11,2019-03-16,2018-05-05,2019-03-10,2018-05-26,2018-05-10,2019-03-14,2019-03-08,2019-04-14,...,2018-12-05,2019-02-21,2019-02-12,2019-02-10,2018-12-02,2018-12-30,2019-01-22,2019-02-13,2018-12-16,2019-01-30
0,0.13,0.095,0.046,0.305,0.304,0.31,0.321,0.034,0.135,0.308,...,0.01,0.094,0.221,0.306,0.011,0.032,0.322,0.157,0.047,0.226
1,0.046,0.098,0.041,0.179,0.285,0.255,0.208,0.051,0.159,0.276,...,0.011,0.067,0.129,0.292,0.01,0.033,0.163,0.13,0.033,0.085
2,0.042,0.07,0.058,0.18,0.296,0.211,0.203,0.101,0.091,0.138,...,0.01,0.059,0.132,0.268,0.01,0.032,0.125,0.132,0.033,0.094
3,0.06,0.088,0.046,0.172,0.3,0.207,0.189,0.127,0.08,0.078,...,0.01,0.077,0.122,0.106,0.01,0.048,0.114,0.121,0.032,0.077
4,0.033,0.095,0.041,0.171,0.285,0.202,0.184,0.13,0.077,0.092,...,0.01,0.076,0.128,0.06,0.01,0.033,0.132,0.13,0.036,0.102
5,0.054,0.079,0.058,0.174,0.193,0.202,0.172,0.14,0.096,0.087,...,0.01,0.048,0.169,0.078,0.01,0.033,0.125,0.155,0.046,0.113
6,0.059,0.079,0.043,0.169,0.131,0.172,0.169,0.055,0.103,0.065,...,0.01,0.076,0.123,0.066,0.011,0.033,0.153,0.107,0.032,0.084
7,0.028,0.096,0.043,0.164,0.13,0.193,0.216,0.097,0.062,0.116,...,0.01,0.101,0.118,0.077,0.01,0.042,0.121,0.124,0.033,0.092
8,0.059,0.086,0.057,0.162,0.155,0.181,0.169,0.072,0.072,0.13,...,0.011,0.058,0.101,0.067,0.01,0.038,0.097,0.095,0.032,0.059
9,0.047,0.07,0.04,0.166,0.103,0.176,0.17,0.092,0.084,0.108,...,0.01,0.065,0.106,0.175,0.01,0.033,0.103,0.112,0.048,0.069


In [53]:
merge_datas

Unnamed: 0,2018-05-02,2018-05-03,2018-05-07,2018-05-08,2018-05-09,2018-05-11,2018-05-12,2018-05-14,2018-05-15,2018-05-16,...,2019-04-20,2019-04-21,2019-04-22,2019-04-23,2019-04-24,2019-04-25,2019-04-26,2019-04-28,2019-04-29,2019-04-30
0,0.275,0.183,0.347,0.345,0.312,0.182,0.17,0.304,0.181,0.207,...,0.06,0.182,0.06,0.063,0.066,0.149,0.164,0.29,0.056,0.045
1,0.201,0.235,0.178,0.272,0.191,0.175,0.171,0.218,0.171,0.203,...,0.071,0.209,0.038,0.049,0.062,0.052,0.063,0.267,0.053,0.044
2,0.176,0.167,0.173,0.206,0.183,0.172,0.206,0.218,0.172,0.226,...,0.041,0.197,0.05,0.041,0.046,0.067,0.065,0.244,0.062,0.059
3,0.21,0.165,0.176,0.204,0.173,0.168,0.213,0.213,0.171,0.217,...,0.046,0.194,0.06,0.064,0.051,0.054,0.039,0.276,0.063,0.058
4,0.199,0.163,0.178,0.173,0.17,0.167,0.205,0.21,0.17,0.199,...,0.06,0.046,0.054,0.061,0.064,0.041,0.056,0.232,0.061,0.033
5,0.202,0.164,0.182,0.18,0.17,0.166,0.202,0.21,0.17,0.199,...,0.055,0.066,0.035,0.035,0.064,0.066,0.063,0.135,0.087,0.054
6,0.199,0.162,0.18,0.188,0.207,0.167,0.2,0.206,0.171,0.199,...,0.033,0.068,0.062,0.057,0.047,0.063,0.071,0.093,0.05,0.059
7,0.252,0.173,0.232,0.179,0.215,0.228,0.17,0.252,0.273,0.285,...,0.057,0.049,0.06,0.063,0.049,0.039,0.046,0.093,0.046,0.049
8,0.213,0.226,0.221,0.212,0.244,0.186,0.172,0.204,0.21,0.198,...,0.06,0.05,0.04,0.046,0.064,0.059,0.062,0.066,0.051,0.04
9,0.173,0.178,0.217,0.171,0.204,0.223,0.181,0.202,0.203,0.201,...,0.044,0.141,0.05,0.046,0.063,0.065,0.049,0.089,0.06,0.058


In [12]:
# Config Training Datas
wt_db_datas = weather_col.find()
wt_datas = pd.DataFrame()
for wt in wt_db_datas:
    tmp = pd.DataFrame()
    tmp['date'] = [wt['date']]
    tmp['weather'] = [wt['weather']]
    tmp['avg_ta'] = [wt['avgTa']]
    tmp['avg_rhm'] = [wt['avgRhm']]
    
    wt_datas = pd.concat([wt_datas, tmp])

# weather 정수 인코딩
weather_count = Counter(wt_datas['weather'])
weather_integer = dict()
rank = 1
for key, count in weather_count.most_common():
    weather_integer[key] = rank
    rank += 1
pp.pprint(weather_integer)
wt_datas['weather_no'] = [weather_integer[weather] for weather in wt_datas['weather']]
wt_datas

def get_season_no(month):
    if month in [3,4,5]:
        return 1 # 봄
    elif month in [6,7,8]:
        return 2 # 여름
    elif month in [9,10,11]:
        return 3 # 가을
    elif month in [12,1,2]:
        return 4 # 겨울
    
# Date, Season Utils
wt_datas['season_no'] = [get_season_no(weather.month) for weather in wt_datas['date']] 
wt_datas['day_no'] = [weather.weekday() + 1 for weather in wt_datas['date']] 

sample_weather_col = ['season_no','day_no','weather_no','avg_ta', 'avg_rhm']
sample_weather_col_2 = ['date','season_no','day_no','weather','weather_no','avg_ta', 'avg_rhm']
wt_datas[sample_weather_col_2]

{'눈': 5,
 '박무': 4,
 '비': 3,
 '소나기': 7,
 '안개': 10,
 '안개비': 11,
 '연무': 2,
 '진눈깨비': 12,
 '채운': 8,
 '특이사항 없음': 1,
 '햇무리': 6,
 '황사': 9}


Unnamed: 0,date,season_no,day_no,weather,weather_no,avg_ta,avg_rhm
0,2018-05-01,1,2,연무,2,20.4,72.8
0,2018-05-02,1,3,비,3,15.1,90.4
0,2018-05-03,1,4,비,3,11.2,62.4
0,2018-05-04,1,5,특이사항 없음,1,14.1,45.5
0,2018-05-05,1,6,특이사항 없음,1,18.3,46.8
...,...,...,...,...,...,...,...
0,2019-04-26,1,5,비,3,8.4,82.1
0,2019-04-27,1,6,햇무리,6,11.6,52.6
0,2019-04-28,1,7,비,3,12.9,48.8
0,2019-04-29,1,1,햇무리,6,13.0,52.9


In [55]:
# Config Sample Datas - Padding
# Data Preprocessing
def get_samples(datas):
    samples = list()
    for col in datas:
        timeslot = datas[col].values.tolist()
        weather = wt_datas[wt_datas['date'] == col][sample_weather_col].values.tolist()[0]
        for time in range(1,25):
            samples.append(weather + timeslot[:time])

    print("Samples Before Padding Process")
    print(np.array(samples))
    print("\nTranining Sample Size : {}".format(len(samples)))
    
    return samples

def get_padding_samples(samples):    
    # Padding
    pad_samples = list()
    SAMPLE_MAX_LEN = max([len(s) for s in samples])
    print("Tranining Sample MAX_LEN : {}".format(SAMPLE_MAX_LEN))
    for sample in samples:
        err = SAMPLE_MAX_LEN - len(sample)
        if err == 0:
            pad_samples.append(sample)
        else:
            pad_data = [0 for i in range(0, err)]
            pad_samples.append(pad_data + sample)
    print("\nFinal Samples")
    print(np.array(pad_samples))
    
    return pad_samples
    
test_samples = get_samples(test_merge_datas)
test_samples = get_padding_samples(test_samples)

training_samples = get_samples(merge_datas)
training_samples = get_padding_samples(training_samples)

Samples Before Padding Process
[list([1, 6, 6, '11.6', '52.6', 0.13])
 list([1, 6, 6, '11.6', '52.6', 0.13, 0.046])
 list([1, 6, 6, '11.6', '52.6', 0.13, 0.046, 0.042]) ...
 list([4, 3, 2, '1.8', '54.6', 0.226, 0.08499999999999999, 0.094, 0.077, 0.10200000000000001, 0.113, 0.084, 0.092, 0.059, 0.069, 0.075, 0.063, 0.07400000000000001, 0.068, 0.06799999999999999, 0.077, 0.067, 0.07699999999999999, 0.067, 0.072, 0.21400000000000002, 0.301])
 list([4, 3, 2, '1.8', '54.6', 0.226, 0.08499999999999999, 0.094, 0.077, 0.10200000000000001, 0.113, 0.084, 0.092, 0.059, 0.069, 0.075, 0.063, 0.07400000000000001, 0.068, 0.06799999999999999, 0.077, 0.067, 0.07699999999999999, 0.067, 0.072, 0.21400000000000002, 0.301, 0.28400000000000003])
 list([4, 3, 2, '1.8', '54.6', 0.226, 0.08499999999999999, 0.094, 0.077, 0.10200000000000001, 0.113, 0.084, 0.092, 0.059, 0.069, 0.075, 0.063, 0.07400000000000001, 0.068, 0.06799999999999999, 0.077, 0.067, 0.07699999999999999, 0.067, 0.072, 0.21400000000000002, 0.30

  print(np.array(samples))



[list([1, 3, 3, '15.1', '90.4', 0.27499999999999997])
 list([1, 3, 3, '15.1', '90.4', 0.27499999999999997, 0.201])
 list([1, 3, 3, '15.1', '90.4', 0.27499999999999997, 0.201, 0.17600000000000002])
 ...
 list([1, 2, 6, '15.5', '52.8', 0.045, 0.044, 0.059, 0.058, 0.033, 0.054, 0.059000000000000004, 0.049, 0.04, 0.057999999999999996, 0.056, 0.03, 0.057999999999999996, 0.06, 0.038, 0.048, 0.062, 0.051000000000000004, 0.038, 0.06099999999999999, 0.059000000000000004, 0.03])
 list([1, 2, 6, '15.5', '52.8', 0.045, 0.044, 0.059, 0.058, 0.033, 0.054, 0.059000000000000004, 0.049, 0.04, 0.057999999999999996, 0.056, 0.03, 0.057999999999999996, 0.06, 0.038, 0.048, 0.062, 0.051000000000000004, 0.038, 0.06099999999999999, 0.059000000000000004, 0.03, 0.055999999999999994])
 list([1, 2, 6, '15.5', '52.8', 0.045, 0.044, 0.059, 0.058, 0.033, 0.054, 0.059000000000000004, 0.049, 0.04, 0.057999999999999996, 0.056, 0.03, 0.057999999999999996, 0.06, 0.038, 0.048, 0.062, 0.051000000000000004, 0.038, 0.0609999

In [59]:
# Set Tranining Data
training_samples = np.array(training_samples)

train_X = training_samples[:,:-1]
train_Y = training_samples[:,-1]

# Set Test Data
test_samples = np.array(test_samples)
test_X = test_samples[:,:-1]
test_y = test_samples[:,-1]

print("Input Data For Training")
print(np.array(train_X))
print()
print("Output Data For Training")
print(np.array(train_Y))

print("Input Data For Test")
print(np.array(test_X))
print()
print("Output Data For Test")
print(np.array(test_y))

Input Data For Training
[['0' '0' '0' ... '3' '15.1' '90.4']
 ['0' '0' '0' ... '15.1' '90.4' '0.27499999999999997']
 ['0' '0' '0' ... '90.4' '0.27499999999999997' '0.201']
 ...
 ['0' '0' '1' ... '0.038' '0.06099999999999999' '0.059000000000000004']
 ['0' '1' '2' ... '0.06099999999999999' '0.059000000000000004' '0.03']
 ['1' '2' '6' ... '0.059000000000000004' '0.03' '0.055999999999999994']]

Output Data For Training
['0.27499999999999997' '0.201' '0.17600000000000002' ... '0.03'
 '0.055999999999999994' '0.057999999999999996']
Input Data For Test
[['0' '0' '0' ... '6' '11.6' '52.6']
 ['0' '0' '0' ... '11.6' '52.6' '0.13']
 ['0' '0' '0' ... '52.6' '0.13' '0.046']
 ...
 ['0' '0' '4' ... '0.067' '0.072' '0.21400000000000002']
 ['0' '4' '3' ... '0.072' '0.21400000000000002' '0.301']
 ['4' '3' '2' ... '0.21400000000000002' '0.301' '0.28400000000000003']]

Output Data For Test
['0.13' '0.046' '0.042' ... '0.301' '0.28400000000000003' '0.186']


In [6]:
client.close()