In [258]:
# Config Data Structure
import pandas as pd
from datetime import datetime as dt
from pymongo import MongoClient as mc
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from collections import Counter
import pprint as pp
import random as ran
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN, LSTM

mongo_uri = "mongodb://localhost:27017"
client = mc(mongo_uri)
keti_db = client.keti_pattern_recognition

household_col = keti_db.household_info
weather_col = keti_db.weather_info

In [272]:
# TimeSlot In
hh_db_datas = household_col.find_one({"uid": "아파트1-104-1206"})
hh_db_datas

uid_in, timeslot = hh_db_datas['uid'], hh_db_datas['timeslot']

datelist = [
    dt.strptime(ts['time'], "%Y-%m-%d T%H:%M %z").date()
    for ts in timeslot
]
datelist = list(set(datelist))
datelist.sort()

ts_datas = {}
start_idx = 0
end_idx = 96
enl = 1

for date in datelist:
    ts_datas[date] = [ts['power'] *
                      enl for ts in timeslot[start_idx:end_idx]]
    start_idx = end_idx
    end_idx = end_idx + 96

ts_datas = pd.DataFrame(ts_datas).T
hh_datas = ts_datas.reset_index().copy()

hh_datas.rename(columns={"index": "date"}, inplace=True)
hh_datas['date'] = pd.to_datetime(hh_datas['date'])

hh_datas.set_index('date', inplace=True)

# Merging
merge_size = 4
merge_datas = pd.DataFrame()
for date in hh_datas.index:
    merge_ts = []
    new_ts_size = round(len(hh_datas.loc[date]) / merge_size)
    
    for idx in range(0,new_ts_size):
        merge_ts.append(
            round(hh_datas.loc[date][merge_size * idx:merge_size * (idx + 1)].sum() * 1000) + 1
        )
    merge_datas[date] = merge_ts
    
def get_season_no(month):
    if month in [3,4,5]:
        return 1 # 봄
    elif month in [6,7,8]:
        return 2 # 여름
    elif month in [9,10,11]:
        return 3 # 가을
    elif month in [12,1,2]:
        return 4 # 겨울
    
separate_datas_col = list(filter(lambda data: get_season_no(data.month) == 1, merge_datas.columns))
print(separate_datas_col)
merge_datas = merge_datas[separate_datas_col]

[Timestamp('2018-05-01 00:00:00'), Timestamp('2018-05-02 00:00:00'), Timestamp('2018-05-03 00:00:00'), Timestamp('2018-05-04 00:00:00'), Timestamp('2018-05-05 00:00:00'), Timestamp('2018-05-06 00:00:00'), Timestamp('2018-05-07 00:00:00'), Timestamp('2018-05-08 00:00:00'), Timestamp('2018-05-09 00:00:00'), Timestamp('2018-05-10 00:00:00'), Timestamp('2018-05-11 00:00:00'), Timestamp('2018-05-12 00:00:00'), Timestamp('2018-05-13 00:00:00'), Timestamp('2018-05-14 00:00:00'), Timestamp('2018-05-15 00:00:00'), Timestamp('2018-05-16 00:00:00'), Timestamp('2018-05-17 00:00:00'), Timestamp('2018-05-18 00:00:00'), Timestamp('2018-05-19 00:00:00'), Timestamp('2018-05-20 00:00:00'), Timestamp('2018-05-21 00:00:00'), Timestamp('2018-05-22 00:00:00'), Timestamp('2018-05-23 00:00:00'), Timestamp('2018-05-24 00:00:00'), Timestamp('2018-05-25 00:00:00'), Timestamp('2018-05-26 00:00:00'), Timestamp('2018-05-27 00:00:00'), Timestamp('2018-05-28 00:00:00'), Timestamp('2018-05-29 00:00:00'), Timestamp('20

In [273]:
# Set Test Datas
# 계절별 10% 랜덤하게
def get_season_no(month):
    if month in [3,4,5]:
        return 1 # 봄
    elif month in [6,7,8]:
        return 2 # 여름
    elif month in [9,10,11]:
        return 3 # 가을
    elif month in [12,1,2]:
        return 4 # 겨울
    
test_merge_datas = pd.DataFrame();

for i in range(1,5):
    filter_list = list(filter(lambda date: get_season_no(date.month) == i, merge_datas.columns))
    test_list_idx = list()
    while True:
        filter_data = filter_list[ran.randrange(0,len(filter_list))]
        if filter_data not in test_list_idx:
            test_list_idx.append(filter_data)
            
        if len(test_list_idx) >= (len(filter_list) * 15 / 100):
            break;
    test_merge_datas = pd.concat([test_merge_datas, merge_datas[test_list_idx]], axis=1)
    merge_datas.drop(test_list_idx, axis=1, inplace=True)

test_merge_datas

ValueError: empty range for randrange() (0, 0, 0)

In [274]:
from functools import reduce

merge_datas
y = reduce(lambda acc, cur: cur + acc ,merge_datas.values.tolist(), [])
value_size = len(y)
print(value_size)

1872


In [275]:
# Config Training Datas
wt_db_datas = weather_col.find()
wt_datas = pd.DataFrame()
for wt in wt_db_datas:
    tmp = pd.DataFrame()
    tmp['date'] = [wt['date']]
    tmp['weather'] = [wt['weather']]
    tmp['avg_ta'] = round(float(wt['avgTa']))
    tmp['avg_rhm'] = round(float(wt['avgRhm']))
    
    wt_datas = pd.concat([wt_datas, tmp])

# weather 정수 인코딩
weather_count = Counter(wt_datas['weather'])
weather_integer = dict()
rank = 1
for key, count in weather_count.most_common():
    weather_integer[key] = rank
    rank += 1
pp.pprint(weather_integer)
wt_datas['weather_no'] = [weather_integer[weather] for weather in wt_datas['weather']]
wt_datas

def get_season_no(month):
    if month in [3,4,5]:
        return 1 # 봄
    elif month in [6,7,8]:
        return 2 # 여름
    elif month in [9,10,11]:
        return 3 # 가을
    elif month in [12,1,2]:
        return 4 # 겨울
    
# Date, Season Utils
wt_datas['season_no'] = [get_season_no(weather.month) for weather in wt_datas['date']] 
wt_datas['day_no'] = [weather.weekday() + 1 for weather in wt_datas['date']] 

sample_weather_col = ['season_no','day_no','weather_no','avg_ta', 'avg_rhm']
sample_weather_col_2 = ['date','season_no','day_no','weather','weather_no','avg_ta', 'avg_rhm']
wt_datas[sample_weather_col_2]

{'눈': 5,
 '박무': 4,
 '비': 3,
 '소나기': 7,
 '안개': 10,
 '안개비': 11,
 '연무': 2,
 '진눈깨비': 12,
 '채운': 8,
 '특이사항 없음': 1,
 '햇무리': 6,
 '황사': 9}


Unnamed: 0,date,season_no,day_no,weather,weather_no,avg_ta,avg_rhm
0,2018-05-01,1,2,연무,2,20,73
0,2018-05-02,1,3,비,3,15,90
0,2018-05-03,1,4,비,3,11,62
0,2018-05-04,1,5,특이사항 없음,1,14,46
0,2018-05-05,1,6,특이사항 없음,1,18,47
...,...,...,...,...,...,...,...
0,2019-04-26,1,5,비,3,8,82
0,2019-04-27,1,6,햇무리,6,12,53
0,2019-04-28,1,7,비,3,13,49
0,2019-04-29,1,1,햇무리,6,13,53


In [276]:
# Config Sample Datas - Padding
# Data Preprocessing
def get_samples(datas):
    samples = list()
    for col in datas:
        timeslot = datas[col].values.tolist()
        weather = wt_datas[wt_datas['date'] == col][sample_weather_col].values.tolist()[0]
        for time in range(1,25):
            samples.append(weather + timeslot[:time])

    print("Samples Before Padding Process")
    print(np.array(samples))
    print("\nTranining Sample Size : {}".format(len(samples)))
    
    return samples

def get_padding_samples(samples):    
    # Padding
    pad_samples = list()
    SAMPLE_MAX_LEN = max([len(s) for s in samples])
    print("Tranining Sample MAX_LEN : {}".format(SAMPLE_MAX_LEN))
    for sample in samples:
        err = SAMPLE_MAX_LEN - len(sample)
        if err == 0:
            pad_samples.append(sample)
        else:
            pad_data = [0 for i in range(0, err)]
            pad_samples.append(pad_data + sample)
    print("\nFinal Samples")
    print(np.array(pad_samples))
    
    return pad_samples
    
test_samples = get_samples(test_merge_datas)
test_samples = get_padding_samples(test_samples)

training_samples = get_samples(merge_datas)
training_samples = get_padding_samples(training_samples)

Samples Before Padding Process
[list([1, 3, 2, 8, 55, 162]) list([1, 3, 2, 8, 55, 162, 160])
 list([1, 3, 2, 8, 55, 162, 160, 144])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162, 163])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162, 163, 123])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162, 163, 123, 98])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162, 163, 123, 98, 114])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162, 163, 123, 98, 114, 114])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162, 163, 123, 98, 114, 114, 109])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162, 163, 123, 98, 114, 114, 109, 95])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162, 163, 123, 98, 114, 114, 109, 95, 108])
 list([1, 3, 2, 8, 55, 162, 160, 144, 148, 163, 162, 163, 123, 98, 114, 114, 109, 95, 108, 114])
 list([1, 3, 2, 8, 5

  print(np.array(samples))


In [277]:
# Set Tranining Data
training_samples = np.array(training_samples)

train_X = training_samples[:,:-1]
train_y = training_samples[:,-1]

# Set Test Data
test_samples = np.array(test_samples)
test_X = test_samples[:,:-1]
test_y = test_samples[:,-1]

print("Input Data For Training")
print(np.array(train_X))
print()
print("Output Data For Training")
print(np.array(train_y))

print("Input Data For Test")
print(np.array(test_X))
print()
print("Output Data For Test")
print(np.array(test_y))

Input Data For Training
[[  0   0   0 ...   2  20  73]
 [  0   0   0 ...  20  73 342]
 [  0   0   0 ...  73 342 338]
 ...
 [  0   0   1 ...  39  62  60]
 [  0   1   2 ...  62  60  31]
 [  1   2   6 ...  60  31  57]]

Output Data For Training
[342 338 325 ...  31  57  59]
Input Data For Test
[[  0   0   0 ...   2   8  55]
 [  0   0   0 ...   8  55 162]
 [  0   0   0 ...  55 162 160]
 ...
 [  0   0   1 ...  40  65  57]
 [  0   1   3 ...  65  57  29]
 [  1   3   5 ...  57  29  57]]

Output Data For Test
[162 160 144 148 163 162 163 123  98 114 114 109  95 108 114 112  95 102
 112 112 281 298 203 194  69  79  94  91  68  90 118  71  66  85  83  57
  80  84  67  67  83  77  56 146 228 201 181  91 131  47  43  61  34  55
  60  29  60  48  42  62  93 155 143 133 209 223 212 271 334 250 239 271
 280 167  73  98  96  68  88  95  85  74  93  93  65  94  95  73  83  94
 121 226 266 287 279 276 114 110  90 114 114 106  95 124 112  97 102 113
 109  86 113 111 104  94 111 275 281 280 293 268 175  90

In [278]:
model = Sequential()
# model.add(Embedding(value_size, input_length=28))
model.add(LSTM(128, input_shape=(28, 1),return_sequences=True, activation='softmax'))
model.add(Dense(1,activation='softmax'))
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

In [279]:
print(len(train_X), train_X.shape, train_X)
train_X = train_X.reshape(train_X.shape[0], train_X.shape[1], 1)
print(len(train_X), train_X.shape, train_X)
print(len(train_y), train_y.shape, train_y)

1872 (1872, 28) [[  0   0   0 ...   2  20  73]
 [  0   0   0 ...  20  73 342]
 [  0   0   0 ...  73 342 338]
 ...
 [  0   0   1 ...  39  62  60]
 [  0   1   2 ...  62  60  31]
 [  1   2   6 ...  60  31  57]]
1872 (1872, 28, 1) [[[  0]
  [  0]
  [  0]
  ...
  [  2]
  [ 20]
  [ 73]]

 [[  0]
  [  0]
  [  0]
  ...
  [ 20]
  [ 73]
  [342]]

 [[  0]
  [  0]
  [  0]
  ...
  [ 73]
  [342]
  [338]]

 ...

 [[  0]
  [  0]
  [  1]
  ...
  [ 39]
  [ 62]
  [ 60]]

 [[  0]
  [  1]
  [  2]
  ...
  [ 62]
  [ 60]
  [ 31]]

 [[  1]
  [  2]
  [  6]
  ...
  [ 60]
  [ 31]
  [ 57]]]
1872 (1872,) [342 338 325 ...  31  57  59]


In [280]:
model.fit(train_X, train_y, epochs=200, batch_size=1 ,verbose=2)

Epoch 1/200
1872/1872 - 6s - loss: 32014.5098 - accuracy: 0.0000e+00
Epoch 2/200
1872/1872 - 6s - loss: 32014.5391 - accuracy: 0.0000e+00
Epoch 3/200
1872/1872 - 6s - loss: 32014.5078 - accuracy: 0.0000e+00
Epoch 4/200
1872/1872 - 6s - loss: 32014.5039 - accuracy: 0.0000e+00
Epoch 5/200
1872/1872 - 6s - loss: 32014.5156 - accuracy: 0.0000e+00
Epoch 6/200
1872/1872 - 6s - loss: 32014.5059 - accuracy: 0.0000e+00
Epoch 7/200
1872/1872 - 6s - loss: 32014.5195 - accuracy: 0.0000e+00
Epoch 8/200
1872/1872 - 6s - loss: 32014.5137 - accuracy: 0.0000e+00
Epoch 9/200
1872/1872 - 6s - loss: 32014.5078 - accuracy: 0.0000e+00
Epoch 10/200
1872/1872 - 6s - loss: 32014.5039 - accuracy: 0.0000e+00
Epoch 11/200
1872/1872 - 6s - loss: 32014.5195 - accuracy: 0.0000e+00
Epoch 12/200
1872/1872 - 6s - loss: 32014.5078 - accuracy: 0.0000e+00
Epoch 13/200
1872/1872 - 6s - loss: 32014.5020 - accuracy: 0.0000e+00
Epoch 14/200
1872/1872 - 6s - loss: 32014.5215 - accuracy: 0.0000e+00
Epoch 15/200
1872/1872 - 6s -

Epoch 118/200
1872/1872 - 6s - loss: 32014.5039 - accuracy: 0.0000e+00
Epoch 119/200
1872/1872 - 6s - loss: 32014.5215 - accuracy: 0.0000e+00
Epoch 120/200
1872/1872 - 6s - loss: 32014.5020 - accuracy: 0.0000e+00
Epoch 121/200
1872/1872 - 6s - loss: 32014.5137 - accuracy: 0.0000e+00
Epoch 122/200
1872/1872 - 6s - loss: 32014.5098 - accuracy: 0.0000e+00
Epoch 123/200
1872/1872 - 6s - loss: 32014.5078 - accuracy: 0.0000e+00
Epoch 124/200
1872/1872 - 6s - loss: 32014.5156 - accuracy: 0.0000e+00
Epoch 125/200
1872/1872 - 6s - loss: 32014.5059 - accuracy: 0.0000e+00
Epoch 126/200
1872/1872 - 6s - loss: 32014.5156 - accuracy: 0.0000e+00
Epoch 127/200
1872/1872 - 6s - loss: 32014.5039 - accuracy: 0.0000e+00
Epoch 128/200
1872/1872 - 6s - loss: 32014.5176 - accuracy: 0.0000e+00
Epoch 129/200
1872/1872 - 6s - loss: 32014.5156 - accuracy: 0.0000e+00
Epoch 130/200
1872/1872 - 6s - loss: 32014.5098 - accuracy: 0.0000e+00
Epoch 131/200
1872/1872 - 6s - loss: 32014.5156 - accuracy: 0.0000e+00
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7fcaf94df2b0>

In [318]:
test = test_X[2]
test = test.reshape(28,1,1)
print(test)
result = model.predict(test, verbose=0)
print(result)

[[[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  0]]

 [[  1]]

 [[  3]]

 [[  2]]

 [[  8]]

 [[ 55]]

 [[162]]

 [[160]]]
[[[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]

 [[1.]]]


In [None]:
client.close()