<a href="https://colab.research.google.com/github/giirrr/first_ryun_project/blob/main/Optuna_LGBM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import lightgbm as lgb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib as mlp
import warnings
import sklearn
import os
import optuna
warnings.filterwarnings('ignore')

from sklearn.metrics import mean_squared_error
from catboost import CatBoostRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


"""data 불러오기"""
df_train = pd.read_csv('./train_data/master_data.csv')
df_test = pd.read_csv('./train_data/test_data.csv')

df_train[df_train['AIR_PRESSURE'] < 900] = np.nan
df_train[df_train['AIR_PRESSURE'] > 1100 ] = np.nan

df_train[df_train['AIR_TEMPERATURE'] < -50] = np.nan
df_train[df_train['AIR_TEMPERATURE'] > 70 ] = np.nan

df_train[df_train['HUMIDITY'] < 0] = np.nan
df_train[df_train['HUMIDITY'] > 100 ] = np.nan

df_train[df_train['WIND_SPEED'] < 0] = np.nan
df_train[df_train['WIND_SPEED'] > 80 ] = np.nan

df_test[df_test['AIR_PRESSURE'] < 900] = np.nan
df_test[df_test['AIR_PRESSURE'] > 1100 ] = np.nan

df_test[df_test['AIR_TEMPERATURE'] < -50] = np.nan
df_test[df_test['AIR_TEMPERATURE'] > 70 ] = np.nan

df_test[df_test['HUMIDITY'] < 0] = np.nan
df_test[df_test['HUMIDITY'] > 100 ] = np.nan

df_test[df_test['WIND_SPEED'] < 0] = np.nan
df_test[df_test['WIND_SPEED'] > 80 ] = np.nan



df_train = df_train.dropna()
df_test = df_test.dropna()
df_train.info()
df_test.info()



#MinMaxScaler 전처리
df_train['AIR_PRESSURE'] = (lambda ap : ((ap-900) / 100)-1)(df_train['AIR_PRESSURE'])
df_test['AIR_PRESSURE'] = (lambda ap : ((ap-900) / 100)-1)(df_test['AIR_PRESSURE'])
df_train['AIR_TEMPERATURE'] = (lambda at : ((at+50) / 60)-1)(df_train['AIR_TEMPERATURE'])
df_test['AIR_TEMPERATURE'] = (lambda at : ((at+50) / 60)-1)(df_test['AIR_TEMPERATURE'])
df_train['day_min'] = (lambda dm : (dm / 719.5)-1)(df_train['day_min'])
df_test['day_min'] = (lambda dm : (dm / 719.5)-1)(df_test['day_min'])
df_train['HUMIDITY'] = (lambda h : (h / 50)-1)(df_train['HUMIDITY'])
df_test['HUMIDITY'] = (lambda h : (h / 50)-1)(df_test['HUMIDITY'])
df_train['WIND_SPEED'] = (lambda ws : (ws / 40)-1)(df_train['WIND_SPEED'])
df_test['WIND_SPEED'] = (lambda ws : (ws / 40)-1)(df_test['WIND_SPEED'])
#StandardScaler 전처리


feature_cols = ['AIR_PRESSURE', 'AIR_TEMPERATURE', 'day_min', 'HUMIDITY', 'WIND_SPEED']
label_cols = ['AIR_TEMPERATURE']

y_train = df_train[label_cols].values
X_train = df_train[feature_cols].values
y_test = df_test[label_cols].values
X_test = df_test[feature_cols].values

X_train = X_train[:-1]
X_test = X_test[:-1]
y_train = y_train[1:]
y_test = y_test[1:]
"""data 불러오기"""
lgb_model = lgb.LGBMRegressor

print(X_train)

"""최적의 학습모델 찾기"""
#num_train = 2
#for j in range(num_train): # 학습을 여러번반복해도 동일한 결과가 나왔다.
#    lgb_model = lgb.LGBMRegressor( n_estimators=600, learning_rate=0.1, subsample=0.5,
#                                     max_depth=5) # 학습 모델 생성
#    lgb_model.fit(X_train, y_train)
#    #print(lgb_model.best_iteration_)
#    lgb_model.booster_.save_model("model1.txt")
#    lgb_model.save_model("./learn_data/{0}epoch3.json".format(j))
#



from optuna.samplers import TPESampler

sampler = TPESampler(seed=10)


def objective(trial):
    dtrain = lgb.Dataset(X_train, label=y_train)
    dtest = lgb.Dataset(X_test, label=y_test)

    param = {
        'objective': 'regression',  # 회귀
        'verbose': -1,
        'metric': 'rmse',
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'n_estimators': trial.suggest_int('n_estimators', 100, 5000),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        "colsample_bytree": trial.suggest_loguniform("colsample_bytree", 0.2, 0.6),
        'subsample': trial.suggest_loguniform('subsample', 0.4, 1),
    }

    model = lgb.LGBMRegressor(**param)
    lgb_model = model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=0)
    rmse = mean_squared_error(y_test, lgb_model.predict(X_test))
    return rmse


study_lgb = optuna.create_study(direction='minimize', sampler=sampler)
study_lgb.optimize(objective, n_trials=500)


trial = study_lgb.best_trial
trial_params = trial.params
print('Best Trial: score {},\nparams {}'.format(trial.value, trial_params))
##Best Trial: score 7.557070379435982e-05,
##params {'max_depth': 7, 'n_estimators': 101, 'min_child_samples': 72, 'colsample_bytree': 0.3176587236198577, 'subsample': 0.5193547845967387}
#optuna.visualization.plot_optimization_history(study_lgb).show()
optuna.visualization.plot_parallel_coordinate(study_lgb).show()  # 이 거지 같은건 따로 사진으로 저장해줘야함
#optuna.visualization.plot_contour(study_lgb).show()