In [1]:
import requests
import pandas as pd
import numpy as np

In [None]:
host = "http://localhost"
port = "7770"
baseUrl = f"{host}:{port}"
url = baseUrl + "/api/energy/gas"
params = { "start": "2023-01-01 00:00:00", "end": "2025-01-01 00:00:00", "datetimeType": 0}
response = requests.get(url, params=params)

if (str(response.status_code) == "200"):
    json_data = response.json()

In [3]:
timestamp = []
usage = []

for row in json_data['datas']:
    timestamp.append(row['timestamp'])
    usage.append(row['usage'])

data = {
    "timestamp" : pd.to_datetime(timestamp),
    "usage" : usage
}
data

{'timestamp': DatetimeIndex(['2023-01-01 00:00:00', '2023-01-01 01:00:00',
                '2023-01-01 02:00:00', '2023-01-01 03:00:00',
                '2023-01-01 04:00:00', '2023-01-01 05:00:00',
                '2023-01-01 06:00:00', '2023-01-01 07:00:00',
                '2023-01-01 08:00:00', '2023-01-01 09:00:00',
                ...
                '2024-12-31 14:00:00', '2024-12-31 15:00:00',
                '2024-12-31 16:00:00', '2024-12-31 17:00:00',
                '2024-12-31 18:00:00', '2024-12-31 19:00:00',
                '2024-12-31 20:00:00', '2024-12-31 21:00:00',
                '2024-12-31 22:00:00', '2024-12-31 23:00:00'],
               dtype='datetime64[ns]', length=17544, freq=None),
 'usage': [0.41054672,
  0.40432462,
  0.4164819,
  2.184166,
  0.379151,
  1.2881639,
  0.4045097,
  0.4364953,
  0.4243924,
  0.8154679,
  0.723063,
  1.5496062,
  1.145427,
  1.1580802,
  0.0,
  1.1697541,
  1.512481,
  0.80787253,
  0.0521115,
  0.0,
  0.0513346,
  0.5033181,


In [4]:
df = pd.DataFrame(data)
df.set_index('timestamp')
df

Unnamed: 0,timestamp,usage
0,2023-01-01 00:00:00,0.410547
1,2023-01-01 01:00:00,0.404325
2,2023-01-01 02:00:00,0.416482
3,2023-01-01 03:00:00,2.184166
4,2023-01-01 04:00:00,0.379151
...,...,...
17539,2024-12-31 19:00:00,23.452340
17540,2024-12-31 20:00:00,21.924550
17541,2024-12-31 21:00:00,1.091118
17542,2024-12-31 22:00:00,0.404238


In [5]:
# 시계열 데이터 학습을 위한 슬라이딩 윈도우 기법
window_size = 168  # 과거 데이터를 보고
horizon = 1  # 다음의 데이터를 예측

In [6]:
# 피처(X)와 타겟(y)을 생성하는 함수                                                                                                                                                                                                                                                                                                                                             
def create_sequences(data, window_size, horizon):             
    X, y = [], []                                                                                                                                                                                                                                                                                                       
    # 전체 데이터에서 윈도우와 타겟을 만들 수 있는 만큼 반복                                                                                                                                                                                                                                                                                                                       
    for i in range(len(data) - window_size - horizon + 1):                                                                                                                                                                                                                                                                                                                         
        window = data[i:(i + window_size)]                                                                                                                                                                                                                                                                                                                                         
        target = data[i + window_size + horizon - 1]                                                                                                                                                                                                                                                                                                                               
        X.append(window)                                                                                                                                                                                                                                                                                                                                                           
        y.append(target)                                                                                                                                                                                                                                                                                                                                                           
    return np.array(X), np.array(y)

In [7]:
usage_data = df['usage'].values
X, y = create_sequences(usage_data, window_size, horizon)

In [8]:
import lightgbm as lgb
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [9]:
split_ratio = 0.8
split_index = int(len(X) * split_ratio)
if split_index == 0 and len(X) > 0:
    split_index = 1

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [10]:
# 베이스 라인 모델(Naive Forecast: 직전 값이 다음 값이 될 것이다)
naive_preds = X_test[:, -1]  # 예측값 = 입력 데이터의 가장 마지막 값
naive_rmse = np.sqrt(mean_squared_error(y_test, naive_preds))
naive_rmse

np.float64(8.30132696679449)

In [11]:
lgbm = lgb.LGBMRegressor(random_state=42)
lgbm.fit(X_train, y_train)
lgbm_preds = lgbm.predict(X_test)
lgbm_rmse = np.sqrt(mean_squared_error(y_test, lgbm_preds))
lgbm_rmse

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004989 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 42840
[LightGBM] [Info] Number of data points in the train set: 13900, number of used features: 168
[LightGBM] [Info] Start training from score 13.702464




np.float64(2.22353411601034)

In [12]:
xgbr = xgb.XGBRegressor(random_state=42)
xgbr.fit(X_train, y_train)
xgb_preds = xgbr.predict(X_test)
xgb_rmse = np.sqrt(mean_squared_error(y_test, xgb_preds))
xgb_rmse

np.float64(2.3994309960019744)

In [13]:
# 192, 1, nv, 9.238373058040235
# 192, 1, lg, 2.6372862230718703
# 192, 1, xg, 2.770072870963477

# 168, 1, nv, 9.238373058040235
# 168, 1, lg, 2.6188861539008137
# 168, 1, xg, 2.8277740569472933

In [14]:
df.describe()
# std = 16.396
# mean = 12.904

# => 성능이 그닥

Unnamed: 0,timestamp,usage
count,17544,17544.0
mean,2024-01-01 11:30:00,13.885192
min,2023-01-01 00:00:00,0.0
25%,2023-07-02 17:45:00,0.422127
50%,2024-01-01 11:30:00,1.116259
75%,2024-07-02 05:15:00,30.381291
max,2024-12-31 23:00:00,53.622894
std,,17.505698
