In [285]:
# Config Data Structure
import pandas as pd
from datetime import datetime as dt
from pymongo import MongoClient as mc
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from collections import Counter
import pprint as pp
import random as ran
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN, LSTM
from tensorflow.keras.utils import to_categorical
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(rc={'figure.figsize': (15.7, 13.27)})
plt.rcParams['figure.figsize'] = 15.7,13.27
plt.rcParams['font.family'] = 'AppleGothic'

mongo_uri = "mongodb://localhost:27017"
client = mc(mongo_uri)
keti_db = client.keti_pattern_recognition

household_col = keti_db.household_info
weather_col = keti_db.weather_info

In [286]:
# TimeSlot In
hh_db_datas = household_col.find_one({"uid": "아파트1-101-1602"})
hh_db_datas

uid_in, timeslot = hh_db_datas['uid'], hh_db_datas['timeslot']

datelist = [
    dt.strptime(ts['time'], "%Y-%m-%d T%H:%M %z").date()
    for ts in timeslot
]
datelist = list(set(datelist))
datelist.sort()

ts_datas = {}
start_idx = 0
end_idx = 96
enl = 1

for date in datelist:
    ts_datas[date] = [ts['power'] *
                      enl for ts in timeslot[start_idx:end_idx]]
    start_idx = end_idx
    end_idx = end_idx + 96

ts_datas = pd.DataFrame(ts_datas).T
hh_datas = ts_datas.reset_index().copy()

hh_datas.rename(columns={"index": "date"}, inplace=True)
hh_datas['date'] = pd.to_datetime(hh_datas['date'])

hh_datas.set_index('date', inplace=True)

# Merging
merge_size = 4
merge_datas = pd.DataFrame()
for date in hh_datas.index:
    merge_ts = []
    new_ts_size = round(len(hh_datas.loc[date]) / merge_size)
    
    for idx in range(0,new_ts_size):
        merge_ts.append(
            round(hh_datas.loc[date][merge_size * idx:merge_size * (idx + 1)].sum(), 3) * 1000
        )
    if len(list(set(merge_ts))) >= 3:
        merge_datas[date] = merge_ts
    
def get_season_no(month):
    if month in [3,4,5]:
        return 1 # 봄
    elif month in [6,7,8]:
        return 2 # 여름
    elif month in [9,10,11]:
        return 3 # 가을
    elif month in [12,1,2]:
        return 4 # 겨울
    
# separate_datas_col = list(filter(lambda data: get_season_no(data.month) == 4, merge_datas.columns))
# print(separate_datas_col)
# merge_datas = merge_datas[separate_datas_col]
merge_datas

Unnamed: 0,2018-05-01,2018-05-02,2018-05-03,2018-05-04,2018-05-05,2018-05-06,2018-05-07,2018-05-08,2018-05-09,2018-05-10,...,2019-04-21,2019-04-22,2019-04-23,2019-04-24,2019-04-25,2019-04-26,2019-04-27,2019-04-28,2019-04-29,2019-04-30
0,431.0,286.0,434.0,280.0,143.0,216.0,438.0,206.0,298.0,471.0,...,264.0,462.0,315.0,456.0,388.0,462.0,502.0,522.0,441.0,232.0
1,178.0,265.0,316.0,266.0,124.0,211.0,344.0,156.0,264.0,223.0,...,235.0,455.0,316.0,462.0,361.0,427.0,345.0,462.0,386.0,236.0
2,205.0,306.0,238.0,234.0,143.0,228.0,317.0,154.0,255.0,282.0,...,217.0,494.0,315.0,363.0,319.0,425.0,341.0,369.0,406.0,284.0
3,198.0,290.0,269.0,317.0,135.0,322.0,379.0,155.0,235.0,269.0,...,248.0,413.0,355.0,365.0,312.0,442.0,390.0,308.0,301.0,377.0
4,204.0,270.0,335.0,236.0,149.0,269.0,332.0,169.0,261.0,293.0,...,256.0,390.0,296.0,389.0,362.0,462.0,341.0,293.0,429.0,307.0
5,251.0,283.0,271.0,240.0,268.0,216.0,258.0,325.0,283.0,287.0,...,227.0,595.0,354.0,382.0,283.0,452.0,324.0,449.0,394.0,335.0
6,231.0,525.0,722.0,1088.0,219.0,228.0,205.0,454.0,510.0,702.0,...,224.0,854.0,538.0,775.0,541.0,694.0,307.0,378.0,535.0,534.0
7,234.0,350.0,403.0,348.0,307.0,237.0,222.0,150.0,291.0,667.0,...,232.0,616.0,457.0,383.0,383.0,590.0,284.0,294.0,395.0,427.0
8,228.0,340.0,445.0,481.0,243.0,261.0,266.0,146.0,269.0,297.0,...,234.0,388.0,398.0,430.0,507.0,564.0,429.0,367.0,343.0,403.0
9,328.0,207.0,402.0,446.0,305.0,215.0,545.0,207.0,360.0,305.0,...,252.0,326.0,243.0,367.0,321.0,506.0,645.0,503.0,265.0,325.0


In [287]:
# Set Test Datas
# 계절별 10% 랜덤하게
def get_season_no(month):
    if month in [3,4,5]:
        return 1 # 봄
    elif month in [6,7,8]:
        return 2 # 여름
    elif month in [9,10,11]:
        return 3 # 가을
    elif month in [12,1,2]:
        return 4 # 겨울
    
test_merge_datas = pd.DataFrame();

for i in range(1,5):
    filter_list = list(filter(lambda date: get_season_no(date.month) == i, merge_datas.columns))
    test_list_idx = list()
    while True:
        filter_data = filter_list[ran.randrange(0,len(filter_list))]
        if filter_data not in test_list_idx:
            test_list_idx.append(filter_data)
            
        if len(test_list_idx) >= (len(filter_list) * 15 / 100):
            break;
    test_merge_datas = pd.concat([test_merge_datas, merge_datas[test_list_idx]], axis=1)
    merge_datas.drop(test_list_idx, axis=1, inplace=True)

test_merge_datas

Unnamed: 0,2019-03-30,2019-04-23,2018-05-23,2018-05-29,2019-04-16,2019-04-13,2018-05-05,2019-03-01,2019-04-22,2018-05-17,...,2018-12-01,2019-02-26,2019-01-28,2019-02-28,2019-01-20,2019-02-10,2018-12-05,2019-01-09,2019-01-24,2018-12-17
0,748.0,315.0,277.0,296.0,574.0,254.0,143.0,489.0,462.0,410.0,...,302.0,383.0,375.0,327.0,447.0,538.0,349.0,320.0,749.0,409.0
1,633.0,316.0,272.0,274.0,512.0,284.0,124.0,519.0,455.0,450.0,...,285.0,455.0,344.0,329.0,560.0,502.0,223.0,352.0,619.0,526.0
2,531.0,315.0,242.0,289.0,430.0,438.0,143.0,413.0,494.0,317.0,...,272.0,305.0,392.0,395.0,586.0,451.0,249.0,316.0,414.0,488.0
3,453.0,355.0,323.0,271.0,413.0,321.0,135.0,309.0,413.0,300.0,...,346.0,322.0,355.0,349.0,557.0,356.0,236.0,306.0,404.0,433.0
4,454.0,296.0,243.0,251.0,403.0,275.0,149.0,262.0,390.0,304.0,...,302.0,310.0,354.0,422.0,413.0,426.0,292.0,283.0,406.0,376.0
5,549.0,354.0,272.0,281.0,376.0,312.0,268.0,233.0,595.0,339.0,...,431.0,291.0,307.0,364.0,366.0,418.0,285.0,354.0,332.0,376.0
6,490.0,538.0,495.0,465.0,743.0,493.0,219.0,263.0,854.0,706.0,...,281.0,352.0,518.0,606.0,269.0,359.0,409.0,609.0,545.0,376.0
7,431.0,457.0,387.0,280.0,354.0,305.0,307.0,421.0,616.0,400.0,...,613.0,326.0,333.0,817.0,310.0,377.0,286.0,312.0,382.0,376.0
8,500.0,398.0,833.0,527.0,263.0,351.0,243.0,529.0,388.0,416.0,...,536.0,419.0,363.0,393.0,366.0,315.0,249.0,403.0,582.0,376.0
9,871.0,243.0,539.0,371.0,265.0,345.0,305.0,510.0,326.0,288.0,...,541.0,475.0,340.0,248.0,351.0,784.0,496.0,569.0,688.0,376.0


In [288]:
y = reduce(lambda acc, cur: cur + acc ,merge_datas.values.tolist(), [])
value_size = len(y)
print(value_size)

7344


In [289]:
# Config Training Datas
wt_db_datas = weather_col.find()
wt_datas = pd.DataFrame()

for wt in wt_db_datas:
    tmp = pd.DataFrame()
    tmp['date'] = [wt['date']]
    tmp['weather'] = [wt['weather']]
    tmp['avg_ta'] = round(float(wt['avgTa']))
    tmp['avg_rhm'] = round(float(wt['avgRhm']))
    
    wt_datas = pd.concat([wt_datas, tmp])

# 영하 온도 전처리
min_ta = min(wt_datas['avg_ta'])
wt_datas['avg_ta'] += (min_ta * -1 + 1)
min(wt_datas['avg_ta'])

1

In [290]:
# weather 정수 인코딩
weather_count = Counter(wt_datas['weather'])
weather_integer = dict()
rank = 1
for key, count in weather_count.most_common():
    weather_integer[key] = rank
    rank += 1
pp.pprint(weather_integer)
wt_datas['weather_no'] = [weather_integer[weather] for weather in wt_datas['weather']]
wt_datas

def get_season_no(month):
    if month in [3,4,5]:
        return 1 # 봄
    elif month in [6,7,8]:
        return 2 # 여름
    elif month in [9,10,11]:
        return 3 # 가을
    elif month in [12,1,2]:
        return 4 # 겨울
    
# Date, Season Utils
wt_datas['season_no'] = [get_season_no(weather.month) for weather in wt_datas['date']] 
wt_datas['day_no'] = [weather.weekday() + 1 for weather in wt_datas['date']] 

sample_weather_col = ['season_no','day_no','weather_no','avg_ta', 'avg_rhm']
sample_weather_col_2 = ['date','season_no','day_no','weather','weather_no','avg_ta', 'avg_rhm']
wt_datas[sample_weather_col_2]

{'눈': 5,
 '박무': 4,
 '비': 3,
 '소나기': 7,
 '안개': 10,
 '안개비': 11,
 '연무': 2,
 '진눈깨비': 12,
 '채운': 8,
 '특이사항 없음': 1,
 '햇무리': 6,
 '황사': 9}


Unnamed: 0,date,season_no,day_no,weather,weather_no,avg_ta,avg_rhm
0,2018-05-01,1,2,연무,2,31,73
0,2018-05-02,1,3,비,3,26,90
0,2018-05-03,1,4,비,3,22,62
0,2018-05-04,1,5,특이사항 없음,1,25,46
0,2018-05-05,1,6,특이사항 없음,1,29,47
...,...,...,...,...,...,...,...
0,2019-04-26,1,5,비,3,19,82
0,2019-04-27,1,6,햇무리,6,23,53
0,2019-04-28,1,7,비,3,24,49
0,2019-04-29,1,1,햇무리,6,24,53


In [291]:
# Config Sample Datas - Padding
# Data Preprocessing
def get_samples(datas):
    samples = list()
    for col in datas:
        timeslot = datas[col].values.tolist()
        weather = wt_datas[wt_datas['date'] == col][sample_weather_col].values.tolist()[0]
        for time in range(1,25):
            samples.append(weather + timeslot[:time])

    print("Samples Before Padding Process")
    print(np.array(samples))
    print("\nTranining Sample Size : {}".format(len(samples)))
    
    return samples

def get_padding_samples(samples):    
    # Padding
    pad_samples = list()
    SAMPLE_MAX_LEN = max([len(s) for s in samples])
    print("Tranining Sample MAX_LEN : {}".format(SAMPLE_MAX_LEN))
    for sample in samples:
        err = SAMPLE_MAX_LEN - len(sample)
        if err == 0:
            pad_samples.append(sample)
        else:
            pad_data = [0 for i in range(0, err)]
            pad_samples.append(pad_data + sample)
    print("\nFinal Samples")
    print(np.array(pad_samples))
    
    return pad_samples
    
test_samples = get_samples(test_merge_datas)
test_samples = get_padding_samples(test_samples)

training_samples = get_samples(merge_datas)
training_samples = get_padding_samples(training_samples)

Samples Before Padding Process
[list([1, 6, 3, 16, 72, 748.0]) list([1, 6, 3, 16, 72, 748.0, 633.0])
 list([1, 6, 3, 16, 72, 748.0, 633.0, 531.0]) ...
 list([4, 1, 2, 13, 60, 409.0, 526.0, 488.0, 433.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 370.0, 725.0, 498.0, 506.0, 551.0, 691.0])
 list([4, 1, 2, 13, 60, 409.0, 526.0, 488.0, 433.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 370.0, 725.0, 498.0, 506.0, 551.0, 691.0, 657.0])
 list([4, 1, 2, 13, 60, 409.0, 526.0, 488.0, 433.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 376.0, 370.0, 725.0, 498.0, 506.0, 551.0, 691.0, 657.0, 662.0])]

Tranining Sample Size : 1344
Tranining Sample MAX_LEN : 29

Final Samples
[[  0.   0.   0. ...  16.  72. 748.]
 [  0.   0.   0. ...  72. 748. 633.]
 [  0.   0.   0. ... 748. 633. 531.]
 ...
 [  0.   0.   4. ... 506. 551. 691.]
 [  0.   4.   1. ... 551. 691. 657.]
 [  4.   1.   2. ... 691.

  print(np.array(samples))


In [292]:
# Set Tranining Data
training_samples = np.array(training_samples)

train_X = training_samples[:,:-1]
train_y = training_samples[:,-1]

# Set Test Data
test_samples = np.array(test_samples)
test_X = test_samples[:,:-1]
test_y = test_samples[:,-1]

print("Input Data For Training")
print(np.array(train_X))
print()
print("Output Data For Training")
print(np.array(train_y))

print("Input Data For Test")
print(np.array(test_X))
print()
print("Output Data For Test")
print(np.array(test_y))

Input Data For Training
[[  0.   0.   0. ...   2.  31.  73.]
 [  0.   0.   0. ...  31.  73. 431.]
 [  0.   0.   0. ...  73. 431. 178.]
 ...
 [  0.   0.   1. ... 897. 426. 516.]
 [  0.   1.   2. ... 426. 516. 561.]
 [  1.   2.   6. ... 516. 561. 549.]]

Output Data For Training
[431. 178. 205. ... 561. 549. 551.]
Input Data For Test
[[  0.   0.   0. ...   3.  16.  72.]
 [  0.   0.   0. ...  16.  72. 748.]
 [  0.   0.   0. ...  72. 748. 633.]
 ...
 [  0.   0.   4. ... 498. 506. 551.]
 [  0.   4.   1. ... 506. 551. 691.]
 [  4.   1.   2. ... 551. 691. 657.]]

Output Data For Test
[748. 633. 531. ... 691. 657. 662.]


In [293]:
one_hot_y = to_categorical(train_y,num_classes=value_size)
one_hot_y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [301]:
model_2 = Sequential(name="rnn-model-1-101-1602")
model_2.add(Embedding(value_size, 10, input_length=28))
model_2.add(LSTM(128))
model_2.add(Dense(value_size, activation='softmax'))
model_2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model_2.summary()

Model: "rnn-model-1-101-1602"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_26 (Embedding)     (None, 28, 10)            73440     
_________________________________________________________________
lstm_27 (LSTM)               (None, 128)               71168     
_________________________________________________________________
dense_27 (Dense)             (None, 7344)              947376    
Total params: 1,091,984
Trainable params: 1,091,984
Non-trainable params: 0
_________________________________________________________________


In [302]:
model_2.fit(train_X, one_hot_y, epochs=400, verbose=2)

Epoch 1/400
230/230 - 5s - loss: 7.4159 - accuracy: 0.0034
Epoch 2/400
230/230 - 3s - loss: 6.7314 - accuracy: 0.0050
Epoch 3/400
230/230 - 4s - loss: 6.6666 - accuracy: 0.0042
Epoch 4/400
230/230 - 4s - loss: 6.6219 - accuracy: 0.0050
Epoch 5/400
230/230 - 4s - loss: 6.5734 - accuracy: 0.0053
Epoch 6/400
230/230 - 3s - loss: 6.5039 - accuracy: 0.0061
Epoch 7/400
230/230 - 3s - loss: 6.4165 - accuracy: 0.0060
Epoch 8/400
230/230 - 3s - loss: 6.3265 - accuracy: 0.0078
Epoch 9/400
230/230 - 3s - loss: 6.2375 - accuracy: 0.0079
Epoch 10/400
230/230 - 3s - loss: 6.1486 - accuracy: 0.0082
Epoch 11/400
230/230 - 3s - loss: 6.0589 - accuracy: 0.0109
Epoch 12/400
230/230 - 3s - loss: 5.9661 - accuracy: 0.0131
Epoch 13/400
230/230 - 3s - loss: 5.8720 - accuracy: 0.0157
Epoch 14/400
230/230 - 3s - loss: 5.7839 - accuracy: 0.0182
Epoch 15/400
230/230 - 4s - loss: 5.6940 - accuracy: 0.0202
Epoch 16/400
230/230 - 4s - loss: 5.6032 - accuracy: 0.0238
Epoch 17/400
230/230 - 3s - loss: 5.5129 - accura

KeyboardInterrupt: 

In [None]:
# visual
fig, axes = plt.subplots(10,8, figsize=(15,15))
ax = plt.gca()
ax.axes.xaxis.set_visible(False)
ax.axes.yaxis.set_visible(False)

test_real = list()
test_predict = list()

power_info = 1

for i in range(0, round(len(test_X) / 24)):
    start_idx = 24 * i
    end_idx = (24 * (i + 1))
    
    real_pattern = test_X[end_idx-1][-23:]
    real_pattern = np.append(real_pattern, [test_y[end_idx-1]])
    
    test_real.append(real_pattern)
#     print("real_pattern:",real_pattern)
    
    predict_pattern = test_X[start_idx+power_info][-power_info:].tolist()

    for p in range(start_idx + power_info, end_idx):
        result = model_2.predict_classes([test_X[p].tolist()])
        predict_pattern.append(y[result[0]])
    test_predict.append(predict_pattern)
    
#     print("predict_pattern:", predict_pattern)

for pi in range(0, len(test_real)):
    sns.lineplot(data=test_real[pi], label="real", lw=2, color="blue", ax=axes[int(pi / 8)][pi % 8], legend=False)
    sns.lineplot(data=test_predict[pi], label="predict", lw=2, color="red", ax=axes[int(pi / 8)][pi % 8], legend=False)

In [None]:
# visual
fig, axes = plt.subplots(10,8, figsize=(15,15))
ax = plt.gca()
ax.axes.xaxis.set_visible(False)
ax.axes.yaxis.set_visible(False)

test_real = list()
test_predict = list()

power_info = 5

for i in range(0, round(len(test_X) / 24)):
    start_idx = 24 * i
    end_idx = (24 * (i + 1))
    
    real_pattern = test_X[end_idx-1][-23:]
    real_pattern = np.append(real_pattern, [test_y[end_idx-1]])
    
    test_real.append(real_pattern)
#     print("real_pattern:",real_pattern)
    
    predict_pattern = test_X[start_idx+power_info][-power_info:].tolist()

    for p in range(start_idx + power_info, end_idx):
        result = model_2.predict_classes([test_X[p].tolist()])
        predict_pattern.append(y[result[0]])
    test_predict.append(predict_pattern)
    
#     print("predict_pattern:", predict_pattern)

for pi in range(0, len(test_real)):
    sns.lineplot(data=test_real[pi], label="real", lw=2, color="blue", ax=axes[int(pi / 8)][pi % 8], legend=False)
    sns.lineplot(data=test_predict[pi], label="predict", lw=2, color="red", ax=axes[int(pi / 8)][pi % 8], legend=False)

In [None]:
# visual
fig, axes = plt.subplots(10,8, figsize=(15,15))
ax = plt.gca()
ax.axes.xaxis.set_visible(False)
ax.axes.yaxis.set_visible(False)

test_real = list()
test_predict = list()

power_info = 10

for i in range(0, round(len(test_X) / 24)):
    start_idx = 24 * i
    end_idx = (24 * (i + 1))
    
    real_pattern = test_X[end_idx-1][-23:]
    real_pattern = np.append(real_pattern, [test_y[end_idx-1]])
    
    test_real.append(real_pattern)
#     print("real_pattern:",real_pattern)
    
    predict_pattern = test_X[start_idx+power_info][-power_info:].tolist()

    for p in range(start_idx + power_info, end_idx):
        result = model_2.predict_classes([test_X[p].tolist()])
        predict_pattern.append(y[result[0]])
    test_predict.append(predict_pattern)
    
#     print("predict_pattern:", predict_pattern)

for pi in range(0, len(test_real)):
    sns.lineplot(data=test_real[pi], label="real", lw=2, color="blue", ax=axes[int(pi / 8)][pi % 8], legend=False)
    sns.lineplot(data=test_predict[pi], label="predict", lw=2, color="red", ax=axes[int(pi / 8)][pi % 8], legend=False)

In [None]:
# visual
fig, axes = plt.subplots(10,8, figsize=(15,15))
ax = plt.gca()
ax.axes.xaxis.set_visible(False)
ax.axes.yaxis.set_visible(False)

test_real = list()
test_predict = list()

power_info = 15

for i in range(0, round(len(test_X) / 24)):
    start_idx = 24 * i
    end_idx = (24 * (i + 1))
    
    real_pattern = test_X[end_idx-1][-23:]
    real_pattern = np.append(real_pattern, [test_y[end_idx-1]])
    
    test_real.append(real_pattern)
#     print("real_pattern:",real_pattern)
    
    predict_pattern = test_X[start_idx+power_info][-power_info:].tolist()

    for p in range(start_idx + power_info, end_idx):
        result = model_2.predict_classes([test_X[p].tolist()])
        predict_pattern.append(y[result[0]])
    test_predict.append(predict_pattern)
    
#     print("predict_pattern:", predict_pattern)

for pi in range(0, len(test_real)):
    sns.lineplot(data=test_real[pi], label="real", lw=2, color="blue", ax=axes[int(pi / 8)][pi % 8], legend=False)
    sns.lineplot(data=test_predict[pi], label="predict", lw=2, color="red", ax=axes[int(pi / 8)][pi % 8], legend=False)

In [None]:
model_2.save("model/아파트1-101-1602")

In [282]:
# from tensorflow.keras.models import load_model
# test_save_model = load_model("model/아파트1-104-1206")
# test_save_model.summary()

Model: "sequential_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_21 (Embedding)     (None, 28, 10)            69360     
_________________________________________________________________
lstm_22 (LSTM)               (None, 128)               71168     
_________________________________________________________________
dense_22 (Dense)             (None, 6936)              894744    
Total params: 1,035,272
Trainable params: 1,035,272
Non-trainable params: 0
_________________________________________________________________


In [284]:
client.close()