In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [2]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import math as mt
import warnings
import random as ran

# 한글출력
matplotlib.rc('font', family='AppleGothic')
plt.rcParams['axes.unicode_minus'] = False
warnings.filterwarnings(action='ignore') 

from src import crs, PublicPredictor, KMeans, TimeDivisionKMeans
from src.dbc import utils
import src.utils as ut
from IPython.display import clear_output

In [3]:
# 1. load_excel
data_path = "data/apt_1.xlsx"
xlsx = pd.read_excel(data_path, header=None,
                     skiprows=2, engine="openpyxl")

# 2. data preprocessing
p, m = crs.utils.data_preprocessing(xlsx)

m.set_index("month", inplace=True)

# 3. data init
_month = 1

month_df = pd.DataFrame(m.loc[_month])
month_df.reset_index(inplace=True)

month_df.columns = ['name', 'usage (kWh)']

PUBLIC_PERCENTAGE = 30
APT = crs.utils.get_APT(month_df, PUBLIC_PERCENTAGE)

calc = crs.models.ManagementOffice(
        month=_month,
        households=month_df,
        APT=APT,
        contract="단일계약"
    )
apt = calc.apart

In [4]:
m_15 = utils.data_preprocessing(xlsx)

df = utils.dimension_reduction(m_15)
m_60 = df.copy()

m_60.head()

m_60_1 = m_60[m_60.index.month == 1].copy()
m_60_1.head()

Unnamed: 0,아파트1-104-1206,아파트1-104-303,아파트1-104-1307,아파트1-104-1208,아파트1-104-408,아파트1-104-203,아파트1-103-1402,아파트1-103-402,아파트1-103-1201,아파트1-103-801,...,아파트1-102-901,아파트1-103-1905,아파트1-103-503,아파트1-103-1504,아파트1-103-606,아파트1-103-903,아파트1-103-1106,아파트1-103-705,아파트1-103-1505,아파트1-103-406
2019-01-01 00:00:00,0.033,0.0,0.034,0.037,0.062,0.322,0.24,0.295,0.373,0.243,...,0.534,0.188,0.4,0.318,0.42,0.626,0.484,0.289,0.305,0.652
2019-01-01 01:00:00,0.048,0.0,0.033,0.037,0.063,0.185,0.257,0.397,0.257,0.228,...,0.396,0.434,0.355,0.25,0.465,0.409,0.459,0.336,0.332,0.557
2019-01-01 02:00:00,0.032,0.0,0.039,0.036,0.062,0.175,0.384,0.353,0.123,0.141,...,0.194,0.337,0.379,0.274,0.326,0.307,0.405,0.382,0.311,0.491
2019-01-01 03:00:00,0.033,0.0,0.039,0.037,0.062,0.167,0.276,0.488,0.142,0.159,...,0.286,0.263,0.375,0.264,0.336,0.345,0.298,0.291,0.261,0.511
2019-01-01 04:00:00,0.032,0.001,0.033,0.037,0.063,0.197,0.266,0.278,0.19,0.227,...,0.275,0.245,0.31,0.423,0.36,0.364,0.212,0.347,0.294,0.507


In [6]:
kmeans = KMeans(datas=m_60_1.T.values)
kmeans.fit()
kmeans.sorting()

ECV : 47 %


In [28]:
group_df = ut.make_group_df(m_60_1, kmeans)
anomaly_df = ut.get_anomaly_df(group_df)

group_df = ut.adjust_anomaly_df(m_60_1, anomaly_df, group_df)
group_df

Unnamed: 0,가구명,usage (kWh),label
0,아파트1-104-1206,99,0
1,아파트1-104-303,10,0
2,아파트1-104-1307,23,0
3,아파트1-104-1208,40,0
4,아파트1-104-408,39,0
...,...,...,...
103,아파트1-103-903,273,0
104,아파트1-103-1106,255,0
105,아파트1-103-705,228,0
106,아파트1-103-1505,384,2


In [29]:
get_anomaly_data(group_df)

In [41]:
tdkmeans = TimeDivisionKMeans(datas=m_60_1)
tdkmeans.fit()

1/248 - ECV:85%
11/248 - ECV:78%
21/248 - ECV:85%
31/248 - ECV:86%
41/248 - ECV:88%
51/248 - ECV:82%
61/248 - ECV:87%
71/248 - ECV:82%
81/248 - ECV:87%
91/248 - ECV:81%
101/248 - ECV:85%
111/248 - ECV:79%
121/248 - ECV:88%
131/248 - ECV:78%
141/248 - ECV:84%
151/248 - ECV:87%
161/248 - ECV:86%
171/248 - ECV:79%
181/248 - ECV:82%
191/248 - ECV:80%
201/248 - ECV:88%
211/248 - ECV:78%
221/248 - ECV:83%
231/248 - ECV:80%
241/248 - ECV:87%
248/248 - ECV:84%


In [43]:
group_df = ut.make_group_df(m_60_1, tdkmeans, _type="tdKMeans")
anomaly_df = ut.get_anomaly_df(group_df)

group_df = ut.adjust_anomaly_df(m_60_1, anomaly_df, group_df)
group_df

Unnamed: 0,가구명,usage (kWh),label
0,아파트1-104-1206,99,0
1,아파트1-104-303,10,0
2,아파트1-104-1307,23,0
3,아파트1-104-1208,40,0
4,아파트1-104-408,39,0
...,...,...,...
103,아파트1-103-903,273,1
104,아파트1-103-1106,255,1
105,아파트1-103-705,228,1
106,아파트1-103-1505,384,2


In [45]:
get_anomaly_data(group_df)