# XGBBoost Regressor

In [4]:
# !pip install xgboost
# [venv] Anaconda->py310

In [9]:
import os
import numpy as np

from scipy.spatial.distance import euclidean
from scipy.signal import correlate
#from fastdtw import fastdtw

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
import pdb


from matplotlib import font_manager, rc
font_name = font_manager.FontProperties(fname="c:/Windows/Fonts/malgun.ttf").get_name()
rc('font', family=font_name)
plt.rcParams['axes.unicode_minus'] = False

In [7]:
import pandas as pd
from pandas import concat
from pandas import DataFrame
import numpy as np
import scipy.ndimage as ndi
import random

### Read data

In [20]:
# Read data
fname = os.path.join('train_data','fuel_predict_data2.npy')
data_dict = np.load(fname, allow_pickle=True).item()
train_ox = data_dict['train_x']
train_oy = data_dict['train_y']

print(f"경로: {data_dict['path']}, 데이터추출 파일명: {data_dict['ipynb']}, 만든날짜: {data_dict['Date']}", )
print(f"전체 keys: {[vv for vv in data_dict.keys()]}")
print(f"학습샘플 크기-> 입력: {data_dict['train_x'].shape}, 출력: {data_dict['train_y'].shape}")
print(f"특징값의 인덱스: {data_dict['feature_index']}")
print(f"특징값의 이름: {data_dict['feature_name']} x 2")
print(f"특징값 추출 위치: {data_dict['tic_position']}->이 위치에서 4개의 특징을 뽑아서 8개(입력 9개특징중 마지막 1개는 index값)")

경로: F:/nox/2024/nox_05sec, 데이터추출 파일명: extract_tempmatch_data.ipynb, 만든날짜: 20241108
전체 keys: ['train_x', 'train_y', 'feature_index', 'ipynb', 'path', 'Date', 'tic_position', 'feature_name']
학습샘플 크기-> 입력: (111, 9), 출력: (111, 2)
특징값의 인덱스: [7, 19, 3, 16]
특징값의 이름: ['metal temp', 'scr후단온도', '외기온도', 'MW', 'index'] x 2
특징값 추출 위치: [60, 90]->이 위치에서 4개의 특징을 뽑아서 8개(입력 9개특징중 마지막 1개는 index값)


In [24]:
train_x, train_y = train_ox.copy(), train_oy.copy()
print(train_x.shape, train_y.shape)
# train_x = np.delete(train_x, [75,91], axis=0)  #outlier
# train_y = np.delete(train_y, [75,91], axis=0)

# rising tic이 두군데가 있고, 각각 분리해서 학습하고 테스트
#X_train, X_test, y_train, y_test = train_test_split(train_x, train_y[:,0], test_size=0.2, random_state=42)  #test_size=0.2,depth=3 best
X_train, X_test, y_train, y_test = train_test_split(train_x, train_y[:,1], test_size=0.1, random_state=42)  #depth=4,test_size=0.1
print(f"2nd Rise 위치 예측을 위한 입출력-> 학습: {X_train.shape}, {y_train.shape}, 검증: {X_test.shape}, {y_test.shape}")

(111, 9) (111, 2)
2nd Rise 위치 예측을 위한 입출력-> 학습: (99, 9), (99,), 검증: (12, 9), (12,)


## XGBRegressor Class

In [27]:
class FuelRising:
    def __init__(self, train_x, train_y, ratio=0.2):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(train_x[:,:-1], train_y, test_size=ratio, random_state=42)  #depth=4,test_size=0.1

        # Training...
        self.model1 = XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
        self.model2 = XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=4, random_state=42)
        self.model1.fit(self.X_train, self.y_train[:,0])
        self.model2.fit(self.X_train, self.y_train[:,1])
        self.model1.save_model('train_data/xgbreg_1.json')
        self.model2.save_model('train_data/xgbreg_2.json')
        
    def predict(self, X_test):
        return self.model1.predict(X_test), self.model2.predict(X_test)

    def evaluate(self, ground, pred):
        mse = mean_squared_error(ground, pred)  
        r2 = r2_score(ground, pred)  # 데이터변동성 38.1%/89.8%설명
        print("Mean Squared Error:", mse)
        print("R-squared:", r2)       
        
        med_value = []
        for a, b in zip(ground, pred):
            med_value.append(np.abs(a-b))
            print(f"True Rise:{a:.1f}, Pred Rise: {b:.1f}, Diff: {np.abs(a-b)/12.0:.1f}분")
        
        return mse, r2
    
    
fuel_rising = FuelRising(train_x, train_y, ratio=0.1)
y_pred1, y_pred2 = fuel_rising.predict(fuel_rising.X_test)

mse, r2 = fuel_rising.evaluate(fuel_rising.y_test[:,0], y_pred1)
mse, r2 = fuel_rising.evaluate(fuel_rising.y_test[:,1], y_pred2)
y_pred1.shape, y_pred2.shape

Mean Squared Error: 1002.5321655273438
R-squared: -1.4120268821716309
True Rise:221.0, Pred Rise: 258.7, Diff: 3.1분
True Rise:186.0, Pred Rise: 242.6, Diff: 4.7분
True Rise:227.0, Pred Rise: 233.6, Diff: 0.5분
True Rise:184.0, Pred Rise: 249.0, Diff: 5.4분
True Rise:220.0, Pred Rise: 239.3, Diff: 1.6분
True Rise:233.0, Pred Rise: 235.0, Diff: 0.2분
True Rise:219.0, Pred Rise: 232.2, Diff: 1.1분
True Rise:205.0, Pred Rise: 234.8, Diff: 2.5분
True Rise:219.0, Pred Rise: 246.9, Diff: 2.3분
True Rise:241.0, Pred Rise: 241.5, Diff: 0.0분
True Rise:260.0, Pred Rise: 229.7, Diff: 2.5분
True Rise:227.0, Pred Rise: 227.4, Diff: 0.0분
Mean Squared Error: 24895.931640625
R-squared: 0.8855707049369812
True Rise:1844.0, Pred Rise: 1796.7, Diff: 3.9분
True Rise:2246.0, Pred Rise: 2681.0, Diff: 36.2분
True Rise:884.0, Pred Rise: 890.8, Diff: 0.6분
True Rise:883.0, Pred Rise: 872.6, Diff: 0.9분
True Rise:709.0, Pred Rise: 791.2, Diff: 6.8분
True Rise:910.0, Pred Rise: 906.9, Diff: 0.3분
True Rise:758.0, Pred Rise: 921

((12,), (12,))