## KMeans - TimeDivisionKMeans 비교

In [29]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import math as mt
import warnings
import random as ran

# 한글출력
matplotlib.rc('font', family='AppleGothic')
plt.rcParams['axes.unicode_minus'] = False
warnings.filterwarnings(action='ignore') 

from src import crs, PublicPredictor, KMeans, TimeDivisionKMeans
from src.dbc import utils
import src.utils as ut
from IPython.display import clear_output

In [30]:
# 1. load_excel
data_path = "data/apt_1.xlsx"
xlsx = pd.read_excel(data_path, header=None,
                     skiprows=2, engine="openpyxl")

# 2. data preprocessing
p, m = crs.utils.data_preprocessing(xlsx)

m.set_index("month", inplace=True)

# 3. data init
_month = 1

month_df = pd.DataFrame(m.loc[_month])
month_df.reset_index(inplace=True)

month_df.columns = ['name', 'usage (kWh)']

PUBLIC_PERCENTAGE = 30
APT = crs.utils.get_APT(month_df, PUBLIC_PERCENTAGE)

calc = crs.models.ManagementOffice(
        month=_month,
        households=month_df,
        APT=APT,
        contract="단일계약"
    )
apt = calc.apart

In [31]:
m_15 = utils.data_preprocessing(xlsx)

df = utils.dimension_reduction(m_15)
m_60 = df.copy()

m_60.head()

m_60_1 = m_60[m_60.index.month == 1].copy()
m_60_1.head()

Unnamed: 0,아파트1-104-1206,아파트1-104-303,아파트1-104-1307,아파트1-104-1208,아파트1-104-408,아파트1-104-203,아파트1-103-1402,아파트1-103-402,아파트1-103-1201,아파트1-103-801,...,아파트1-102-901,아파트1-103-1905,아파트1-103-503,아파트1-103-1504,아파트1-103-606,아파트1-103-903,아파트1-103-1106,아파트1-103-705,아파트1-103-1505,아파트1-103-406
2019-01-01 00:00:00,0.033,0.0,0.034,0.037,0.062,0.322,0.24,0.295,0.373,0.243,...,0.534,0.188,0.4,0.318,0.42,0.626,0.484,0.289,0.305,0.652
2019-01-01 01:00:00,0.048,0.0,0.033,0.037,0.063,0.185,0.257,0.397,0.257,0.228,...,0.396,0.434,0.355,0.25,0.465,0.409,0.459,0.336,0.332,0.557
2019-01-01 02:00:00,0.032,0.0,0.039,0.036,0.062,0.175,0.384,0.353,0.123,0.141,...,0.194,0.337,0.379,0.274,0.326,0.307,0.405,0.382,0.311,0.491
2019-01-01 03:00:00,0.033,0.0,0.039,0.037,0.062,0.167,0.276,0.488,0.142,0.159,...,0.286,0.263,0.375,0.264,0.336,0.345,0.298,0.291,0.261,0.511
2019-01-01 04:00:00,0.032,0.001,0.033,0.037,0.063,0.197,0.266,0.278,0.19,0.227,...,0.275,0.245,0.31,0.423,0.36,0.364,0.212,0.347,0.294,0.507


In [61]:
kmeans_test = pd.DataFrame(columns=['anomaly 발생 수', '최소 멤버 수 군집'])
tdkmeans_test = pd.DataFrame(columns=['anomaly 발생 수', '최소 멤버 수 군집'])

for case in range(0, 500):
    test_m = m_60_1.sample(n=ran.randrange(50, len(m_60_1.columns)), axis=1).copy()

    kmeans = KMeans(datas=test_m.T.values)
    kmeans.fit()
    kmeans_group_df = ut.make_group_df(test_m, kmeans)

    tdkmeans = TimeDivisionKMeans(datas=test_m)
    tdkmeans.fit()
    tdkmeans_group_df = ut.make_group_df(test_m, tdkmeans, _type="tdkmeans")

    kmeans_test = kmeans_test.append({
        "anomaly 발생 수":len(ut.get_anomaly_df(kmeans_group_df)),
        "최소 멤버 수 군집":kmeans_group_df['label'].value_counts().min()
    }, ignore_index=True)
    tdkmeans_test = tdkmeans_test.append({
        "anomaly 발생 수":len(ut.get_anomaly_df(tdkmeans_group_df)),
        "최소 멤버 수 군집":tdkmeans_group_df['label'].value_counts().min()
    }, ignore_index=True)
    
    clear_output(wait=True)

ECV : 40 %
1/248 - ECV:84%
11/248 - ECV:72%
21/248 - ECV:85%
31/248 - ECV:86%
41/248 - ECV:82%
51/248 - ECV:82%
61/248 - ECV:84%
71/248 - ECV:83%
81/248 - ECV:84%
91/248 - ECV:73%
101/248 - ECV:83%
111/248 - ECV:77%
121/248 - ECV:76%
131/248 - ECV:65%
141/248 - ECV:87%
151/248 - ECV:66%
161/248 - ECV:84%
171/248 - ECV:74%
181/248 - ECV:78%
191/248 - ECV:78%
201/248 - ECV:84%
211/248 - ECV:70%
221/248 - ECV:83%
231/248 - ECV:77%
241/248 - ECV:83%
248/248 - ECV:81%


In [62]:
kmeans_test.mean()

anomaly 발생 수    9.388
최소 멤버 수 군집      1.014
dtype: float64

In [63]:
tdkmeans_test.mean()

anomaly 발생 수    1.812
최소 멤버 수 군집      2.718
dtype: float64

In [64]:
print(kmeans_test['anomaly 발생 수'].max(), kmeans_test['최소 멤버 수 군집'].max())

35 6


In [65]:
print(tdkmeans_test['anomaly 발생 수'].max(), tdkmeans_test['최소 멤버 수 군집'].max())

9 9


In [66]:
print(kmeans_test['anomaly 발생 수'].min(), kmeans_test['최소 멤버 수 군집'].min())

0 1


In [67]:
print(tdkmeans_test['anomaly 발생 수'].min(), tdkmeans_test['최소 멤버 수 군집'].min())

0 1
