In [1]:
# Config Data Structure
import IPython
import IPython.display

import pandas as pd
import numpy as np
import pprint as pp
import random as ran
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import random as ran
import math

from functools import reduce
from collections import Counter
from datetime import datetime as dt
from pymongo import MongoClient as mc
from functools import reduce

sns.set(rc={'figure.figsize': (15.7, 13.27)})
plt.rcParams['figure.figsize'] = 15.7,13.27
plt.rcParams['font.family'] = 'AppleGothic'

mongo_uri = "mongodb://localhost:27017"
client = mc(mongo_uri)
keti_db = client.keti_pattern_recognition

jungang_col = keti_db.jungang_pattern
cluster_col = keti_db.cluster_info
weather_col = keti_db.weather_info

In [2]:
jungang_db_cur = jungang_col.find()
db_datas = [_ for _ in jungang_db_cur]

jg_datas = pd.DataFrame(columns=['Date Time','energy (kw 15min)'])
jg_datas['Date Time'] = [_['ttime'] for _ in db_datas]
jg_datas['energy (kw 15min)'] = [_['energy'] for _ in db_datas]

date_time = pd.to_datetime(jg_datas.pop('Date Time'),
                          format="%Y-%m-%d %H:%M:%S")
jg_datas.index = date_time
jg_datas

Unnamed: 0_level_0,energy (kw 15min)
Date Time,Unnamed: 1_level_1
2017-01-01 00:00:00,15250
2017-01-01 00:15:00,15250
2017-01-01 00:30:00,13750
2017-01-01 00:45:00,14250
2017-01-01 01:00:00,14000
...,...
2020-04-08 22:45:00,0
2020-04-08 23:00:00,0
2020-04-08 23:15:00,0
2020-04-08 23:30:00,0


In [3]:
idx = jg_datas.index.get_loc(jg_datas[jg_datas['energy (kw 15min)'] == 0].index[7])

jg_datas = jg_datas.iloc[:idx].copy()

In [4]:
def calc_sin(ts, target_value):
    return np.sin(ts * (2 * np.pi / target_value)).values
def calc_cos(ts, target_value):
    return np.cos(ts * (2 * np.pi / target_value)).values
def get_season(month):
    if month in [3,4,5]:
        return "봄"
    elif month in [6,7,8]:
        return "여름"
    elif month in [9,10,11]:
        return "가을"
    else:
        return "겨울"

# ~ 2018 year data parsing
jg_datas = jg_datas[jg_datas.index.year <= 2018]
date_time = jg_datas.index
timestamp = date_time.map(dt.timestamp)

day = 24 * 60 * 60
week = 7 * day
year = (365) * day

jg_datas['week sin'] = calc_sin(timestamp, week)
jg_datas['week cos'] = calc_cos(timestamp, week)
jg_datas['year sin'] = calc_sin(timestamp, year)
jg_datas['year cos'] = calc_cos(timestamp, year)
jg_datas['season'] = [get_season(_.month) for _ in jg_datas.index]

jg_datas = jg_datas[::4]
jg_datas

Unnamed: 0_level_0,energy (kw 15min),week sin,week cos,year sin,year cos,season
Date Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-01 00:00:00,15250,0.707107,-0.707107,0.198782,0.980044,겨울
2017-01-01 01:00:00,14000,0.680173,-0.733052,0.199485,0.979901,겨울
2017-01-01 02:00:00,14750,0.652287,-0.757972,0.200188,0.979758,겨울
2017-01-01 03:00:00,29500,0.623490,-0.781831,0.200891,0.979614,겨울
2017-01-01 04:00:00,15750,0.593820,-0.804598,0.201593,0.979469,겨울
...,...,...,...,...,...,...
2018-09-19 19:00:00,22500,-0.500000,0.866025,-0.999984,-0.005738,가을
2018-09-19 20:00:00,19250,-0.467269,0.884115,-0.999987,-0.005021,가을
2018-09-19 21:00:00,20250,-0.433884,0.900969,-0.999991,-0.004304,가을
2018-09-19 22:00:00,19000,-0.399892,0.916562,-0.999994,-0.003586,가을


In [5]:
day_1_size = 24
year_half_size = day_1_size * int(365 / 4) 
year_1_size = day_1_size * 365

training_datas = jg_datas[:year_1_size]
validation_datas = jg_datas[year_1_size:
                        year_1_size + year_half_size]
testing_datas = jg_datas[year_1_size + year_half_size:]

In [6]:
all_dict = dict()
train_type = ['univariate', 'multivariate', 'season univariate', 'season multivariate']
uni_columns = ['energy (kw 15min)']
mul_columns = ['energy (kw 15min)', 'week sin', 'week cos', 'year sin', 'year cos']
seasons = ["봄", "여름", "가을", "겨울"]

for t_type in train_type:
    if 'season' in t_type:
        seasons_dict = dict()
        for season in seasons:
            season_dict = dict()
            in_col = mul_columns if "multivariate" in t_type else uni_columns
            
            season_dict['train'] = training_datas[in_col][training_datas['season'] == season].copy()
            season_dict['val'] = validation_datas[in_col].copy()
            season_dict['test'] = testing_datas[in_col].copy()
            
            seasons_dict[season] = season_dict
        all_dict[t_type] = seasons_dict
    else:
        in_dict = dict()
        in_col = mul_columns if "multivariate" in t_type else uni_columns
        
        in_dict['train'] = training_datas[in_col].copy()
        in_dict['val'] = validation_datas[in_col].copy()
        in_dict['test'] = testing_datas[in_col].copy()
        
        all_dict[t_type] = in_dict

In [7]:
norm_all_dict = dict()

mean = training_datas[mul_columns].mean()
std = training_datas[mul_columns].std()

for key in all_dict.keys():
    in_col = mul_columns if "multivariate" in key else uni_columns
    in_dict = all_dict[key].copy()
    m = mean[in_col]
    s = std[in_col]
    
    if "season" in key:
        for season in seasons:
            train = in_dict[season]['train'].copy()
            val = in_dict[season]['val'].copy()
            test = in_dict[season]['test'].copy()
            
            in_dict[season]['train'] = (train - m) / s
            in_dict[season]['val'] = (val - m) / s
            in_dict[season]['test'] = (test - m) / s
    else:
        train = in_dict['train'].copy()
        val = in_dict['val'].copy()
        test = in_dict['test'].copy()
        
        in_dict['train'] = (train - m) / s
        in_dict['val'] = (val - m) / s
        in_dict['test'] = (test - m) / s
        
    norm_all_dict[key] = in_dict

In [8]:
cur_cluster_result = cluster_col.find({
    "uid": "jungang_pattern"
})
cluster_result = dict()

for data in cur_cluster_result:
    in_dict = pd.DataFrame(columns=['Label'])
    in_dict.index.name = "Date Time"
    
    season = data['season']
    infos = data['info']
    
    dtime = [dt.strptime(_['date'], "%Y-%m-%d") for _ in infos]
    labels = [_['label'] for _ in infos]

    for idx, _ in enumerate(dtime):
        label = labels[idx]        
        in_dict.loc[_] = [label]
        
    cluster_result[season] = in_dict
    
cluster_pattern_dict = dict()

for season in seasons:
    result = cluster_result[season]
    in_dict = pd.DataFrame(columns=[_ for _ in range(0, 24)])
    in_dict.index.name = "Label"
    
    labels = list(set(result['Label']))
    for label in labels:
        cluster_pattern = np.array([])
        date_in_labels = result[result['Label'] == label].index
        for date in date_in_labels:
            idx = jg_datas.index.get_loc(date)
            pattern = jg_datas.iloc[idx: idx+24]['energy (kw 15min)'].values
            cluster_pattern = np.append(cluster_pattern, pattern)
        cluster_pattern = cluster_pattern.reshape(-1,24).mean(axis=0)
        in_dict.loc[label] = cluster_pattern
        
    cluster_pattern_dict[season] = in_dict

cluster_norm_dict = dict()

mean = all_dict['univariate']['train'].mean().values[0]
std = all_dict['univariate']['train'].std().values[0]

for key in cluster_pattern_dict.keys():
    cluster_norm_dict[key] = ((cluster_pattern_dict[key] - mean) / std).copy()

In [67]:
cluster_norm_dict['봄']

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-0.722177,-0.756048,-0.829437,-0.091318,-0.487898,-0.154827,-0.012284,0.183889,0.480266,0.607284,...,0.529662,0.375828,0.412523,0.343368,0.361715,0.203648,0.118969,0.085097,-0.359468,-0.626207
1,-0.444147,-0.480841,-0.492308,0.083333,-0.230862,0.317259,0.344779,0.558065,0.814924,1.016743,...,0.844739,0.723189,0.76447,0.730069,0.739243,0.652094,0.436515,0.516784,-0.182701,-0.233155
2,-0.780511,-0.847784,-0.841668,-0.395221,-0.737701,-0.034395,-0.101667,0.142961,0.668912,1.12759,...,1.017507,0.82792,0.99916,0.974697,0.681143,0.412052,0.136845,0.210234,-0.46861,-0.664313
3,-0.667135,-0.696773,-0.724999,-0.000993,-0.312895,0.123203,0.090742,0.392764,0.672205,1.071608,...,1.192981,1.06314,1.119593,1.161932,0.981284,0.73148,0.454862,0.374417,-0.166118,-0.360879
4,-0.317919,-0.406719,-0.444881,0.143695,-0.089681,0.349183,0.461467,0.741811,1.10435,1.446341,...,1.38763,1.226175,1.21003,1.233514,1.088939,0.85116,0.642003,0.569348,-0.016292,-0.235724
5,-0.238993,-0.33323,-0.359083,0.096259,-0.056356,0.442353,0.58079,1.027793,1.529003,2.038553,...,1.929304,1.741663,1.837568,1.741663,1.624074,1.376388,1.00194,0.884352,0.228025,-0.03634


In [26]:
cluster_dist_result = cluster_result.copy()

In [30]:
for key in cluster_dist_result.keys():
    cluster_dist_result[key]['weekday'] = [
        date.weekday()
        for date in cluster_dist_result[key].index
    ]

In [55]:
cluster_days_dist = dict()

for key in cluster_dist_result.keys():
    cluster_day = dict()
    week_list = set(cluster_dist_result[key]["weekday"].values)
    for week in week_list:
        week_top = cluster_dist_result[key]["weekday"].groupby(
            cluster_dist_result[key]["Label"]
        ).count().sort_values(ascending=False).index[0]
        cluster_day[week] = week_top
    cluster_days_dist[key] = cluster_day

In [56]:
cluster_days_dist

{'봄': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4},
 '여름': {0: 4, 1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4},
 '가을': {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1},
 '겨울': {0: 3, 1: 3, 2: 3, 3: 3, 4: 3, 5: 3, 6: 3}}

In [68]:
test_df = norm_all_dict['univariate']['test'].copy()
for idx in range(0, len(test_df), 24):
    split_df = test_df.iloc[idx: idx+24].copy()
    date = split_df.index[0]
    
    season = get_season(date.month)
    weekday = date.weekday()
    label = cluster_days_dist[season][weekday]
    
    print("{} is [season: {}, weekday: {}, label: {}]".format(
        date,
        season,
        weekday,
        label
    ))
    print("==pattern==> {}".format(cluster_norm_dict[season].loc[label].values))
    

2018-04-02 00:00:00 is [season: 봄, weekday: 0, label: 4]
==pattern==> [-0.31791884 -0.40671893 -0.44488095  0.14369486 -0.08968059  0.34918268
  0.46146709  0.74181118  1.10435039  1.44634083  1.56963352  1.75163701
  1.79200069  1.7127411   1.38763002  1.22617531  1.21002984  1.23351416
  1.08893881  0.85116005  0.64200281  0.56934819 -0.01629208 -0.23572371]
2018-04-03 00:00:00 is [season: 봄, weekday: 1, label: 4]
==pattern==> [-0.31791884 -0.40671893 -0.44488095  0.14369486 -0.08968059  0.34918268
  0.46146709  0.74181118  1.10435039  1.44634083  1.56963352  1.75163701
  1.79200069  1.7127411   1.38763002  1.22617531  1.21002984  1.23351416
  1.08893881  0.85116005  0.64200281  0.56934819 -0.01629208 -0.23572371]
2018-04-04 00:00:00 is [season: 봄, weekday: 2, label: 4]
==pattern==> [-0.31791884 -0.40671893 -0.44488095  0.14369486 -0.08968059  0.34918268
  0.46146709  0.74181118  1.10435039  1.44634083  1.56963352  1.75163701
  1.79200069  1.7127411   1.38763002  1.22617531  1.210029

2018-05-01 00:00:00 is [season: 봄, weekday: 1, label: 4]
==pattern==> [-0.31791884 -0.40671893 -0.44488095  0.14369486 -0.08968059  0.34918268
  0.46146709  0.74181118  1.10435039  1.44634083  1.56963352  1.75163701
  1.79200069  1.7127411   1.38763002  1.22617531  1.21002984  1.23351416
  1.08893881  0.85116005  0.64200281  0.56934819 -0.01629208 -0.23572371]
2018-05-02 00:00:00 is [season: 봄, weekday: 2, label: 4]
==pattern==> [-0.31791884 -0.40671893 -0.44488095  0.14369486 -0.08968059  0.34918268
  0.46146709  0.74181118  1.10435039  1.44634083  1.56963352  1.75163701
  1.79200069  1.7127411   1.38763002  1.22617531  1.21002984  1.23351416
  1.08893881  0.85116005  0.64200281  0.56934819 -0.01629208 -0.23572371]
2018-05-03 00:00:00 is [season: 봄, weekday: 3, label: 4]
==pattern==> [-0.31791884 -0.40671893 -0.44488095  0.14369486 -0.08968059  0.34918268
  0.46146709  0.74181118  1.10435039  1.44634083  1.56963352  1.75163701
  1.79200069  1.7127411   1.38763002  1.22617531  1.210029

2018-05-25 00:00:00 is [season: 봄, weekday: 4, label: 4]
==pattern==> [-0.31791884 -0.40671893 -0.44488095  0.14369486 -0.08968059  0.34918268
  0.46146709  0.74181118  1.10435039  1.44634083  1.56963352  1.75163701
  1.79200069  1.7127411   1.38763002  1.22617531  1.21002984  1.23351416
  1.08893881  0.85116005  0.64200281  0.56934819 -0.01629208 -0.23572371]
2018-05-26 00:00:00 is [season: 봄, weekday: 5, label: 4]
==pattern==> [-0.31791884 -0.40671893 -0.44488095  0.14369486 -0.08968059  0.34918268
  0.46146709  0.74181118  1.10435039  1.44634083  1.56963352  1.75163701
  1.79200069  1.7127411   1.38763002  1.22617531  1.21002984  1.23351416
  1.08893881  0.85116005  0.64200281  0.56934819 -0.01629208 -0.23572371]
2018-05-27 00:00:00 is [season: 봄, weekday: 6, label: 4]
==pattern==> [-0.31791884 -0.40671893 -0.44488095  0.14369486 -0.08968059  0.34918268
  0.46146709  0.74181118  1.10435039  1.44634083  1.56963352  1.75163701
  1.79200069  1.7127411   1.38763002  1.22617531  1.210029

2018-06-22 00:00:00 is [season: 여름, weekday: 4, label: 4]
==pattern==> [-1.17165051 -1.20674936 -1.22509648 -1.25142062 -1.28253097 -1.2378597
 -1.18919993 -0.98339304 -0.67149189 -0.22797006 -0.18489419  0.04803454
  0.16848741  0.19162075 -0.01737695 -0.16335626 -0.21042063 -0.25509189
 -0.31970569 -0.66989649 -0.91798154 -0.96903442 -1.04401833 -1.06316315]
2018-06-23 00:00:00 is [season: 여름, weekday: 5, label: 4]
==pattern==> [-1.17165051 -1.20674936 -1.22509648 -1.25142062 -1.28253097 -1.2378597
 -1.18919993 -0.98339304 -0.67149189 -0.22797006 -0.18489419  0.04803454
  0.16848741  0.19162075 -0.01737695 -0.16335626 -0.21042063 -0.25509189
 -0.31970569 -0.66989649 -0.91798154 -0.96903442 -1.04401833 -1.06316315]
2018-06-24 00:00:00 is [season: 여름, weekday: 6, label: 4]
==pattern==> [-1.17165051 -1.20674936 -1.22509648 -1.25142062 -1.28253097 -1.2378597
 -1.18919993 -0.98339304 -0.67149189 -0.22797006 -0.18489419  0.04803454
  0.16848741  0.19162075 -0.01737695 -0.16335626 -0.210420

 -0.31970569 -0.66989649 -0.91798154 -0.96903442 -1.04401833 -1.06316315]
2018-07-17 00:00:00 is [season: 여름, weekday: 1, label: 4]
==pattern==> [-1.17165051 -1.20674936 -1.22509648 -1.25142062 -1.28253097 -1.2378597
 -1.18919993 -0.98339304 -0.67149189 -0.22797006 -0.18489419  0.04803454
  0.16848741  0.19162075 -0.01737695 -0.16335626 -0.21042063 -0.25509189
 -0.31970569 -0.66989649 -0.91798154 -0.96903442 -1.04401833 -1.06316315]
2018-07-18 00:00:00 is [season: 여름, weekday: 2, label: 4]
==pattern==> [-1.17165051 -1.20674936 -1.22509648 -1.25142062 -1.28253097 -1.2378597
 -1.18919993 -0.98339304 -0.67149189 -0.22797006 -0.18489419  0.04803454
  0.16848741  0.19162075 -0.01737695 -0.16335626 -0.21042063 -0.25509189
 -0.31970569 -0.66989649 -0.91798154 -0.96903442 -1.04401833 -1.06316315]
2018-07-19 00:00:00 is [season: 여름, weekday: 3, label: 4]
==pattern==> [-1.17165051 -1.20674936 -1.22509648 -1.25142062 -1.28253097 -1.2378597
 -1.18919993 -0.98339304 -0.67149189 -0.22797006 -0.18489

2018-08-15 00:00:00 is [season: 여름, weekday: 2, label: 4]
==pattern==> [-1.17165051 -1.20674936 -1.22509648 -1.25142062 -1.28253097 -1.2378597
 -1.18919993 -0.98339304 -0.67149189 -0.22797006 -0.18489419  0.04803454
  0.16848741  0.19162075 -0.01737695 -0.16335626 -0.21042063 -0.25509189
 -0.31970569 -0.66989649 -0.91798154 -0.96903442 -1.04401833 -1.06316315]
2018-08-16 00:00:00 is [season: 여름, weekday: 3, label: 4]
==pattern==> [-1.17165051 -1.20674936 -1.22509648 -1.25142062 -1.28253097 -1.2378597
 -1.18919993 -0.98339304 -0.67149189 -0.22797006 -0.18489419  0.04803454
  0.16848741  0.19162075 -0.01737695 -0.16335626 -0.21042063 -0.25509189
 -0.31970569 -0.66989649 -0.91798154 -0.96903442 -1.04401833 -1.06316315]
2018-08-17 00:00:00 is [season: 여름, weekday: 4, label: 4]
==pattern==> [-1.17165051 -1.20674936 -1.22509648 -1.25142062 -1.28253097 -1.2378597
 -1.18919993 -0.98339304 -0.67149189 -0.22797006 -0.18489419  0.04803454
  0.16848741  0.19162075 -0.01737695 -0.16335626 -0.210420

2018-09-08 00:00:00 is [season: 가을, weekday: 5, label: 1]
==pattern==> [-0.85302588 -0.87661504 -0.90806726 -0.90457257 -0.94126682 -0.91680399
 -0.8844781  -0.5708296  -0.30086475  0.35264242  0.48806168  0.66454357
  0.49941943  0.52650328  0.41117849  0.2478017   0.26876984  0.32031653
  0.23906497  0.02239415 -0.38823201 -0.50792517 -0.63460771 -0.73682741]
2018-09-09 00:00:00 is [season: 가을, weekday: 6, label: 1]
==pattern==> [-0.85302588 -0.87661504 -0.90806726 -0.90457257 -0.94126682 -0.91680399
 -0.8844781  -0.5708296  -0.30086475  0.35264242  0.48806168  0.66454357
  0.49941943  0.52650328  0.41117849  0.2478017   0.26876984  0.32031653
  0.23906497  0.02239415 -0.38823201 -0.50792517 -0.63460771 -0.73682741]
2018-09-10 00:00:00 is [season: 가을, weekday: 0, label: 1]
==pattern==> [-0.85302588 -0.87661504 -0.90806726 -0.90457257 -0.94126682 -0.91680399
 -0.8844781  -0.5708296  -0.30086475  0.35264242  0.48806168  0.66454357
  0.49941943  0.52650328  0.41117849  0.2478017   0.268