#### 라이브러리 불러오기

In [1]:
import os
import folium
import numpy as np
import pandas as pd
import geopandas as gpd
import tensorflow as tf
import requests

import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from xgboost import XGBRegressor
from model_evaluation import regression_evaluation, f_importances, user_mape

2023-08-28 17:54:25.398066: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


#### 설정

##### VWorld

In [2]:
VW_KEY_PATH = os.path.join(os.path.expanduser('~'), 'projects', 'vworld_key.txt')
with open(VW_KEY_PATH, 'r') as _key_file:
    _vw_lines = _key_file.readlines()
VW_KEY = _vw_lines[0].replace('\n', '')

In [3]:
VW_TILE_TYPE = 'png'
VW_ATTR = 'Vworld'

#### 지도에 터빈위치 표시

In [4]:
turbin_info = {
    'a': { 'lat': 35.486483143070764, 'lon': 126.34213114581526, 'name': 'A'},
    'b': { 'lat': 35.48131840227845,  'lon': 126.33507818977857, 'name': 'B'},
    'c': { 'lat': 35.476172884715325, 'lon': 126.32890690366085, 'name': 'C'},
    'd': { 'lat': 35.471026994051186, 'lon': 126.32273651737738, 'name': 'D'},
}

In [5]:
VW_LATS = [info['lat'] for info in turbin_info.values()]
VW_LONS = [info['lon'] for info in turbin_info.values()]
VW_MAP_CENTER = [(min(VW_LATS)+max(VW_LATS))/2, (min(VW_LONS)+max(VW_LONS))/2]


In [6]:
VW_LAYER = 'Base'
VW_TILES_URL = \
    f'http://api.vworld.kr/req/wmts/1.0.0/' \
    f'{VW_KEY}/{VW_LAYER}/{{z}}/{{y}}/{{x}}.{VW_TILE_TYPE}'

In [7]:
base_map = folium.Map(
    location=VW_MAP_CENTER, zoom_start=12,
    tiles=VW_TILES_URL, attr=VW_ATTR
)

for _, _value in turbin_info.items():
    folium.Marker(
        location=[_value['lat'], _value['lon']],
        popup=f'{_value["name"]}-Turbin',
    ).add_to(base_map)

base_map

#### 데이터 불러오기

In [8]:
_PATH_BASE = os.path.join(os.getcwd(), 'data', '41-turbin')
get_path = lambda turbin: os.path.join(_PATH_BASE, f'{turbin}_turbin.csv')

In [9]:
df_a = pd.read_csv(get_path('a'))
df_b = pd.read_csv(get_path('b'))
df_c = pd.read_csv(get_path('c'))
df_d = pd.read_csv(get_path('d'))

#### 데이터 확인

In [10]:
# 결측치 확인
df_a.isna().sum()       

# 없음: 결측치는 -1로 들어온것 같음

TURBINE_TIME    0
WIND_SPEED      0
WIND_DIR        0
ACTIVE_POWER    0
month           0
hour            0
dtype: int64

In [11]:
# 음수인 row 갯 수 확인
non_value_counts = (df_a.drop('TURBINE_TIME', axis=1) < 0).sum()

# 풍속이 음수인 갯 수: 17540, 이건 모두 NaN(-1)으로 볼 수 있음
# 풍향이 음수인 갯 수: 20088, 거의 모든 데이터, 풍향은 음수일 수도 있음
# 발전량이 음수인 갯 수: 17540, 풍속과 동일한걸 보면 같이 NaN(-1) 처리하면 될듯

In [12]:
target_col = 'ACTIVE_POWER'

In [13]:
df_a_nn = df_a[df_a[target_col] != -1]

In [14]:
df_b_nn = df_b[df_b[target_col] != -1]
df_c_nn = df_c[df_c[target_col] != -1]
df_d_nn = df_d[df_d[target_col] != -1]

#### 1차 모델링

In [15]:
SEED = 1234

In [16]:
X_data_b = df_b_nn.drop([target_col, 'TURBINE_TIME'], axis=1)
y_data_b = df_b_nn[target_col]

In [17]:
X_train_b, X_test_b, y_train_b, y_test_b = \
    train_test_split(X_data_b, y_data_b, test_size=0.2)

In [18]:
model_rfr = RandomForestRegressor(n_estimators=100, random_state=SEED)
model_rfr.fit(X_train_b, y_train_b)

RandomForestRegressor(random_state=1234)

In [19]:
pred_rfr = model_rfr.predict(X_test_b)
_ = regression_evaluation(y_test_b, pred_rfr)

R2_SCORE: 0.992441, MAPE2: 15.630452, MSE: 6535.870651, RMSE: 80.844732, MAPE: 3.926908, MAE: 40.516391


In [23]:
for t, p in zip(y_test_b, pred_rfr):
    print(f'{t}, {p}, {np.abs(t-p)/t*100}')

46.77272727272727, 22.687820721630857, 51.49348339398651
2969.893333333333, 3030.78216201396, 2.0502025442202245
2298.752542372881, 2032.951500000001, 11.562838429686204
152.72, 140.72539705471002, 7.853983070514657
56.35714285714285, 53.079659875537835, 5.8155591562066125
0.1, 4.660100689223061, 4560.100689223061
3050.116666666667, 3050.0548927917393, 0.002025295478121803
200.475, 198.7373459906622, 0.8667684296484861
132.06333333333333, 122.73360957520646, 7.064582971397719
2659.3283333333334, 2850.183607841063, 7.17682251249142
99.92291666666668, 95.96280075810317, 3.9631708527625125
181.95833333333331, 180.00108936427438, 1.0756550322284057
17.866666666666667, 15.67113984055161, 12.288396414823078
62.36666666666667, 46.75625281005852, 25.03005963111942
156.52916666666667, 131.51880622625697, 15.978083173259316
3051.673333333333, 3039.8645658192104, 0.38696040579232355
80.80434782608695, 80.81675567580933, 0.015355423385241929
1883.4816666666663, 2006.5584868442284, 6.53453773167752