In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('test2.modify2.csv')
df.columns

Index(['unit', 'Ps30', 'Nc', 'T50', 'phi', 'NRc', 'W32', 'P30', 'BPR', 'T30',
       'T24', 'W31', 'htBleed', 'NRf', 'Nf', 'RUL'],
      dtype='object')

In [3]:
features = ['Ps30', 'Nc', 'T50', 'phi', 'NRc', 'W32', 'P30', 'BPR', 'T30',
       'T24', 'W31', 'htBleed', 'NRf', 'Nf']
label = 'RUL'

In [4]:
train, test = df[0::2], df[1::2]
train, test = train.reset_index(), test.reset_index()
X_train, y_train = train[features], train[label]
X_test, y_test = test[features], test[label]

#### 모델 기획 및 수립
- XGBoost(속도를 개선함), CatBoost, LightGBM -> 트리 기반의 알고리즘 
- XGBoost : 내부적을 병렬 계산을 극대화함 -> 속도가 개선함 regularizer가 내부적으로 작동함 이후의 cat, light에 대비해 reference가 많다. 

In [5]:
from sklearn.ensemble import RandomForestRegressor as rf
from xgboost import XGBRegressor as xg
from lightgbm import LGBMRegressor as lg
from catboost import CatBoostRegressor as cb

In [6]:
m1, m2, m3, m4 = rf(), xg(), lg(), cb(silent= True)

In [7]:
m1.fit(X_train, y_train)

In [8]:
m2.fit(X_train, y_train)

In [9]:
m3.fit(X_train, y_train)

In [10]:
m4.fit(X_train, y_train)

<catboost.core.CatBoostRegressor at 0x23c4ea638e0>

In [11]:
print(m1.score(X_train, y_train), m2.score(X_train, y_train), m3.score(X_train, y_train), m4.score(X_train, y_train))
print(m1.score(X_test, y_test), m2.score(X_test, y_test), m3.score(X_test, y_test), m4.score(X_test, y_test))

# 학습한것에 비해 성능이 너무 좋지않다.
# rul이 0에 가까울수록 기울기의 변동폭이 엄청 커진다. 
# 수명이 죽을 때가 오면 기울기가 훅훅 떨어진다. 
# 7일차의 그래프를 참고하자. 
# 로그함수나 루트를 사용하자...

0.9487398253143053 0.8906117568834724 0.7678763179899359 0.806670475771444
0.6283134728294472 0.589376213162057 0.6274747881162159 0.6287516693582487


In [12]:
# 로그 오차....

In [13]:
df['RUL_Log'] = df['RUL'].apply(lambda x : np.log(x))
df.head()

Unnamed: 0,unit,Ps30,Nc,T50,phi,NRc,W32,P30,BPR,T30,T24,W31,htBleed,NRf,Nf,RUL,RUL_Log
0,1,47.47,9046.19,1400.6,521.66,8138.62,23.419,554.36,8.4195,1589.7,641.82,39.06,392,2388.02,2388.06,192,5.257495
1,1,47.49,9044.07,1403.14,522.28,8131.49,23.4236,553.75,8.4318,1591.82,642.15,39.0,392,2388.07,2388.04,191,5.252273
2,1,47.27,9052.94,1404.2,522.42,8133.23,23.3442,554.26,8.4178,1587.99,642.35,38.95,390,2388.03,2388.08,190,5.247024
3,1,47.13,9049.48,1401.87,522.86,8133.83,23.3739,554.45,8.3682,1582.79,642.35,38.88,392,2388.08,2388.11,189,5.241747
4,1,47.28,9055.15,1406.22,522.19,8133.8,23.4044,554.0,8.4294,1582.85,642.37,38.9,393,2388.04,2388.06,188,5.236442


In [14]:
features = ['Ps30', 'Nc', 'T50', 'phi', 'NRc', 'W32', 'P30', 'BPR', 'T30',
       'T24', 'W31', 'htBleed', 'NRf', 'Nf']
label = 'RUL_Log'

In [15]:
train, test = df[0::2], df[1::2]
train, test = train.reset_index(), test.reset_index()
X_train, y_train = train[features], train[label]
X_test, y_test = test[features], test[label]

In [16]:
m1, m2, m3, m4 = rf(), xg(), lg(), cb(silent= True)

In [17]:
m1.fit(X_train, y_train)

In [18]:
m2.fit(X_train, y_train)

In [19]:
m3.fit(X_train, y_train)

In [20]:
m4.fit(X_train, y_train)

<catboost.core.CatBoostRegressor at 0x23c5555a940>

In [21]:
print(m1.score(X_train, y_train), m2.score(X_train, y_train), m3.score(X_train, y_train), m4.score(X_train, y_train))
print(m1.score(X_test, y_test), m2.score(X_test, y_test), m3.score(X_test, y_test), m4.score(X_test, y_test))

0.9793777144343612 0.9505269071781702 0.9059628093954009 0.9190637667017059
0.8504263562007587 0.8349611612604918 0.8535134200337309 0.8553732607091954


#### Hyperparameter tuning

In [22]:
sample_model = xg(reg_alpha = 0.9)
sample_model.fit(X_train, y_train)

In [23]:
sample_model.score(X_test, y_test)

0.8364505083228531

### Optimizer

In [24]:
def alpha_search(alpha):
    sample_model = xg(reg_alpha = alpha)
    sample_model.fit(X_train, y_train)
    return sample_model.score(X_test, y_test)

In [25]:
alpha_search(0.2)

0.8368009499247369

In [26]:
def hyper_param(ra, rl, md, lr):
    sample_model = xg(reg_alpha = ra, reg_lambda = rl, max_depth = int(md), learning_rate = lr)
    sample_model.fit(X_train, y_train)
    return sample_model.score(X_test, y_test)

In [27]:
hyper_param(0.2, 0.3, 10, 0.6)

0.7825444137486678

In [28]:
from bayes_opt import BayesianOptimization as bo

In [29]:
epsilon = 0.1 ** 10
float_range = (epsilon, 1 - epsilon)
pbound = { 'ra' : float_range, 'rl' : float_range, 'lr' : float_range, 'md' : (5, 20)}

optimizer = bo(f = hyper_param, pbounds = pbound)

In [30]:
optimizer.maximize(n_iter = 25)

|   iter    |  target   |    lr     |    md     |    ra     |    rl     |
-------------------------------------------------------------------------
| [0m1        [0m | [0m0.7915   [0m | [0m0.0294   [0m | [0m17.66    [0m | [0m0.2366   [0m | [0m0.3002   [0m |
| [95m2        [0m | [95m0.8451   [0m | [95m0.07952  [0m | [95m15.22    [0m | [95m0.9534   [0m | [95m0.6362   [0m |
| [0m3        [0m | [0m0.7724   [0m | [0m0.6757   [0m | [0m11.67    [0m | [0m0.4622   [0m | [0m0.06811  [0m |
| [0m4        [0m | [0m0.7958   [0m | [0m0.5901   [0m | [0m12.78    [0m | [0m0.6232   [0m | [0m0.8068   [0m |
| [0m5        [0m | [0m0.729    [0m | [0m0.9732   [0m | [0m6.262    [0m | [0m0.02305  [0m | [0m0.5878   [0m |
| [0m6        [0m | [0m0.6832   [0m | [0m1.0      [0m | [0m15.12    [0m | [0m1e-10    [0m | [0m1e-10    [0m |
| [0m7        [0m | [0m0.7893   [0m | [0m0.6072   [0m | [0m13.51    [0m | [0m0.7319   [0m | [0m0.9331  

In [31]:
param_set = optimizer.max['params']

In [32]:
tuned_model = xg(reg_alpha = param_set['ra'],
  reg_lambda = param_set['rl'],
  learning_rate = param_set['lr'],
  max_depth = int(param_set['md']))
                 
tuned_model.fit(X_train, y_train)
tuned_model.score(X_test, y_test)

0.8467114142598033

In [33]:
print(tuned_model.score(X_train, y_train))

0.9470074347796928


In [34]:
# LightGBM

In [35]:
def hyper_param2(ra, rl, md, lr):
    sample_model = lg(reg_alpha = ra, reg_lambda = rl, max_depth = int(md), learning_rate = lr)
    sample_model.fit(X_train, y_train)
    return sample_model.score(X_test, y_test)

In [36]:
epsilon = 0.1 ** 10
float_range = (epsilon, 1 - epsilon)
pbound = { 'ra' : float_range, 'rl' : float_range, 'lr' : float_range, 'md' : (5, 20)}

optimizer = bo(f = hyper_param2, pbounds = pbound)

In [37]:
optimizer.maximize(n_iter = 25)

|   iter    |  target   |    lr     |    md     |    ra     |    rl     |
-------------------------------------------------------------------------
| [0m1        [0m | [0m0.8319   [0m | [0m0.3723   [0m | [0m19.43    [0m | [0m0.8221   [0m | [0m0.4562   [0m |
| [0m2        [0m | [0m0.8293   [0m | [0m0.4107   [0m | [0m7.375    [0m | [0m0.9074   [0m | [0m0.4004   [0m |
| [0m3        [0m | [0m0.8038   [0m | [0m0.6264   [0m | [0m13.79    [0m | [0m0.07845  [0m | [0m0.3959   [0m |
| [0m4        [0m | [0m0.8283   [0m | [0m0.3863   [0m | [0m12.13    [0m | [0m0.01184  [0m | [0m0.528    [0m |
| [95m5        [0m | [95m0.8543   [0m | [95m0.06522  [0m | [95m10.03    [0m | [95m0.8253   [0m | [95m0.6914   [0m |
| [0m6        [0m | [0m-2.899e-0[0m | [0m1e-10    [0m | [0m9.841    [0m | [0m1e-10    [0m | [0m0.5321   [0m |
| [0m7        [0m | [0m0.8298   [0m | [0m0.3906   [0m | [0m15.96    [0m | [0m0.2598   [0m | [0m0.8223  

In [38]:
param_set2 = optimizer.max['params']

In [39]:
tuned_model2 = xg(reg_alpha = param_set['ra'],
  reg_lambda = param_set['rl'],
  learning_rate = param_set['lr'],
  max_depth = int(param_set['md']))
                 
tuned_model2.fit(X_train, y_train)
tuned_model2.score(X_test, y_test)

0.8467114142598033

In [40]:
print(tuned_model2.score(X_train, y_train))

0.9470074347796928


In [41]:
# CatBoost

In [42]:
def hyper_param(depth, llr):
    sample_model = cb(depth = int(depth), l2_leaf_reg = llr, silent = True)
    sample_model.fit(X_train, y_train)
    return sample_model.score(X_test, y_test)

In [43]:
pbounds = { 'depth' : (4, 10), 'llr' : float_range}
optimizer = bo(f = hyper_param, pbounds = pbounds)

In [44]:
optimizer.maximize()

|   iter    |  target   |   depth   |    llr    |
-------------------------------------------------
| [0m1        [0m | [0m0.857    [0m | [0m4.874    [0m | [0m0.7666   [0m |
| [95m2        [0m | [95m0.8576   [0m | [95m7.397    [0m | [95m0.5681   [0m |
| [0m3        [0m | [0m0.857    [0m | [0m8.738    [0m | [0m0.754    [0m |
| [0m4        [0m | [0m0.8575   [0m | [0m6.199    [0m | [0m0.3464   [0m |
| [0m5        [0m | [0m0.8567   [0m | [0m9.057    [0m | [0m0.827    [0m |
| [0m6        [0m | [0m0.8568   [0m | [0m6.855    [0m | [0m0.4691   [0m |
| [95m7        [0m | [95m0.8577   [0m | [95m7.567    [0m | [95m0.6204   [0m |
| [0m8        [0m | [0m0.8568   [0m | [0m4.255    [0m | [0m0.8572   [0m |
| [0m9        [0m | [0m0.8574   [0m | [0m7.72     [0m | [0m0.3043   [0m |
| [95m10       [0m | [95m0.8577   [0m | [95m7.569    [0m | [95m1.0      [0m |
| [0m11       [0m | [0m0.8571   [0m | [0m8.65     [0m | [0m0.

In [45]:
optimizer.max['params']

{'depth': 7.776964552893718, 'llr': 0.9942512022177036}