In [1]:
import requests
import pandas as pd
import numpy as np

In [2]:
host = "http://localhost"
port = "7770"
baseUrl = f"{host}:{port}"
url = baseUrl + "/api/energy/elec"
params = { "start": "2023-10-23 12:00:00", "end": "2024-10-22 12:00:00", "datetimeType": 0}
response = requests.get(url, params=params)

if (str(response.status_code) == "200"):
    json_data = response.json()

In [3]:
timestamp = []
usage = []

for row in json_data['datas']:
    timestamp.append(row['timestamp'])
    usage.append(row['usage'])

data = {
    "timestamp" : pd.to_datetime(timestamp),
    "usage" : usage
}
data

{'timestamp': DatetimeIndex(['2023-10-23 12:00:00', '2023-10-23 13:00:00',
                '2023-10-23 14:00:00', '2023-10-23 15:00:00',
                '2023-10-23 16:00:00', '2023-10-23 17:00:00',
                '2023-10-23 18:00:00', '2023-10-23 19:00:00',
                '2023-10-23 20:00:00', '2023-10-23 21:00:00',
                ...
                '2024-10-22 02:00:00', '2024-10-22 03:00:00',
                '2024-10-22 04:00:00', '2024-10-22 05:00:00',
                '2024-10-22 06:00:00', '2024-10-22 07:00:00',
                '2024-10-22 08:00:00', '2024-10-22 09:00:00',
                '2024-10-22 10:00:00', '2024-10-22 11:00:00'],
               dtype='datetime64[ns]', length=8760, freq=None),
 'usage': [45.348827,
  42.205307,
  43.206253,
  35.20842,
  46.082664,
  39.040768,
  39.895996,
  33.25264,
  27.934923,
  0.380068,
  1.1617243,
  0.35667,
  0.0,
  0.360479,
  0.75649905,
  0.0484094,
  0.378406,
  0.383815,
  1.5474902,
  0.362361,
  31.449776,
  32.78323,
  

In [4]:
df = pd.DataFrame(data)
df.set_index('timestamp')
df

Unnamed: 0,timestamp,usage
0,2023-10-23 12:00:00,45.348827
1,2023-10-23 13:00:00,42.205307
2,2023-10-23 14:00:00,43.206253
3,2023-10-23 15:00:00,35.208420
4,2023-10-23 16:00:00,46.082664
...,...,...
8755,2024-10-22 07:00:00,0.827402
8756,2024-10-22 08:00:00,26.508482
8757,2024-10-22 09:00:00,44.953810
8758,2024-10-22 10:00:00,45.771590


In [5]:
# 시계열 데이터 학습을 위한 슬라이딩 윈도우 기법
window_size = 168  # 과거 데이터를 보고
horizon = 1  # 다음의 데이터를 예측

In [6]:
# 피처(X)와 타겟(y)을 생성하는 함수                                                                                                                                                                                                                                                                                                                                             
def create_sequences(data, window_size, horizon):             
    X, y = [], []                                                                                                                                                                                                                                                                                                       
    # 전체 데이터에서 윈도우와 타겟을 만들 수 있는 만큼 반복                                                                                                                                                                                                                                                                                                                       
    for i in range(len(data) - window_size - horizon + 1):                                                                                                                                                                                                                                                                                                                         
        window = data[i:(i + window_size)]                                                                                                                                                                                                                                                                                                                                         
        target = data[i + window_size + horizon - 1]                                                                                                                                                                                                                                                                                                                               
        X.append(window)                                                                                                                                                                                                                                                                                                                                                           
        y.append(target)                                                                                                                                                                                                                                                                                                                                                           
    return np.array(X), np.array(y)

In [7]:
usage_data = df['usage'].values
X, y = create_sequences(usage_data, window_size, horizon)

In [8]:
import lightgbm as lgb
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [9]:
split_ratio = 0.8
split_index = int(len(X) * split_ratio)
if split_index == 0 and len(X) > 0:
    split_index = 1

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [10]:
# 베이스 라인 모델(Naive Forecast: 직전 값이 다음 값이 될 것이다)
naive_preds = X_test[:, -1]  # 예측값 = 입력 데이터의 가장 마지막 값
naive_rmse = np.sqrt(mean_squared_error(y_test, naive_preds))
naive_rmse

np.float64(9.238373058040235)

In [11]:
lgbm = lgb.LGBMRegressor(random_state=42)
lgbm.fit(X_train, y_train)
lgbm_preds = lgbm.predict(X_test)
lgbm_rmse = np.sqrt(mean_squared_error(y_test, lgbm_preds))
lgbm_rmse

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004065 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 42840
[LightGBM] [Info] Number of data points in the train set: 6873, number of used features: 168
[LightGBM] [Info] Start training from score 12.004131




np.float64(2.6188861539008137)

In [12]:
xgbr = xgb.XGBRegressor(random_state=42)
xgbr.fit(X_train, y_train)
xgb_preds = xgbr.predict(X_test)
xgb_rmse = np.sqrt(mean_squared_error(y_test, xgb_preds))
xgb_rmse

np.float64(2.8277740569472933)

In [13]:
# 192, 1, nv, 9.238373058040235
# 192, 1, lg, 2.6372862230718703
# 192, 1, xg, 2.770072870963477

# 168, 1, nv, 9.238373058040235
# 168, 1, lg, 2.6188861539008137
# 168, 1, xg, 2.8277740569472933

In [14]:
df.describe()
# std = 16.396
# mean = 12.904

# => 성능이 그닥

Unnamed: 0,timestamp,usage
count,8760,8760.0
mean,2024-04-22 23:30:00,12.903665
min,2023-10-23 12:00:00,0.0
25%,2024-01-22 17:45:00,0.405338
50%,2024-04-22 23:30:00,0.859469
75%,2024-07-23 05:15:00,27.724058
max,2024-10-22 11:00:00,50.848896
std,,16.396312
