# 공모전 과제 : 계절별 지면온도 예측 모델 생성
* 전처리가 완료된 데이터를 받아 각 계절별로 모델을 학습하고 예측
* 최종 학습 모델은 VotingRegressor를 사용했으며, 각 모델의 가중치를 계절별로 상이하게 부여

In [1]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2-cp310-cp310-manylinux2014_x86_64.whl (98.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.6/98.6 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2


In [2]:
import os, random
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import VotingRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
pd.set_option('display.max_rows', None)

In [3]:
train_spr = pd.read_csv('/content/drive/MyDrive/competition/temp_prediction/train_spring_prepro.csv')
train_sum = pd.read_csv('/content/drive/MyDrive/competition/temp_prediction/train_summer_prepro.csv')
train_aut = pd.read_csv('/content/drive/MyDrive/competition/temp_prediction/train_autumn_prepro.csv')
train_win = pd.read_csv('/content/drive/MyDrive/competition/temp_prediction/train_winter_prepro.csv')

test_spr = pd.read_csv('/content/drive/MyDrive/competition/temp_prediction/test_spring_prepro.csv')
test_sum = pd.read_csv('/content/drive/MyDrive/competition/temp_prediction/test_summer_prepro.csv')
test_aut = pd.read_csv('/content/drive/MyDrive/competition/temp_prediction/test_autumn_prepro.csv')
test_win = pd.read_csv('/content/drive/MyDrive/competition/temp_prediction/test_winter_prepro.csv')

sub_spr = pd.read_excel('/content/drive/MyDrive/competition/temp_prediction/subminssionUser.xlsx', sheet_name='SPRING')
sub_sum = pd.read_excel('/content/drive/MyDrive/competition/temp_prediction/subminssionUser.xlsx', sheet_name='SUMMER')
sub_aut = pd.read_excel('/content/drive/MyDrive/competition/temp_prediction/subminssionUser.xlsx', sheet_name='AUTUMN')
sub_win = pd.read_excel('/content/drive/MyDrive/competition/temp_prediction/subminssionUser.xlsx', sheet_name='WINTER')

In [4]:
def seed_everything(seed):
    '''
    seed 고정
    '''
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_num = 42
seed_everything(seed_num)

In [5]:
# 스케일링
# 각 계절별로 Robustscaler를 만들고, 각 계절의 train 데이터에 스케일러 학습 후 train과 test에 적용
# train, test 데이터셋에 적용하는 스케일링

train_spr_scaler = RobustScaler()
train_sum_scaler = RobustScaler()
train_aut_scaler = RobustScaler()
train_win_scaler = RobustScaler()

spr_cols = ['mmddhh', 'ta', 'td', 'hm', 'ws', 'rn', 're', 'ww', 'si', 'ss', 'sn', 'day', 'hour', 'rs_yn', 'sense_ta']
sum_cols = ['mmddhh', 'ta', 'td', 'hm', 'ws', 'rn', 're', 'ww', 'si', 'ss', 'day', 'hour', 'rs_yn', 'h_idx', 'u_idx']
aut_cols = ['mmddhh', 'ta', 'td', 'hm', 'ws', 'rn', 're', 'ww', 'si', 'ss', 'mmddhh_str', 'month', 'day', 'hour', 'rs_yn', 'f_ta', 'h_idx', 'u_idx']
win_cols = ['mmddhh', 'ta', 'td', 'hm', 'ws', 'rn', 're', 'ww', 'si', 'ss', 'sn', 'day', 'hour', 'sense_ta']

train_spr_scaler.fit(train_spr[spr_cols])
train_sum_scaler.fit(train_sum[sum_cols])
train_aut_scaler.fit(train_aut[aut_cols])
train_win_scaler.fit(train_win[win_cols])

train_spr[spr_cols] = train_spr_scaler.transform(train_spr[spr_cols])
train_sum[sum_cols] = train_sum_scaler.transform(train_sum[sum_cols])
train_aut[aut_cols] = train_aut_scaler.transform(train_aut[aut_cols])
train_win[win_cols] = train_win_scaler.transform(train_win[win_cols])

test_spr[spr_cols] = train_spr_scaler.transform(test_spr[spr_cols])
test_sum[sum_cols] = train_sum_scaler.transform(test_sum[sum_cols])
test_aut[aut_cols] = train_aut_scaler.transform(test_aut[aut_cols])
test_win[win_cols] = train_win_scaler.transform(test_win[win_cols])

In [6]:
# 계절별 학습 및 예측에 사용하지 않는 변수 제거
spring_drop_cols = ['Unnamed: 0', 'stn', 'year', 'month', 'f_ta', 'mmddhh_str']
summer_drop_cols = ['Unnamed: 0', 'stn', 'year', 'sn', 'month', 'f_ta', 'mmddhh_str']
autumn_drop_cols = ['Unnamed: 0', 'stn', 'year', 'sn', 'month', 'rs_yn', 'f_ta']
winter_drop_cols = ['Unnamed: 0.1', 'Unnamed: 0', 'stn', 'year', 'month', 'rs_yn', 'f_ta', 'mmddhh_str']

train_spr.drop(columns=spring_drop_cols, inplace=True)
test_spr.drop(columns=spring_drop_cols, inplace=True)

train_sum.drop(columns=summer_drop_cols, inplace=True)
test_sum.drop(columns=summer_drop_cols, inplace=True)

train_aut.drop(columns=autumn_drop_cols, inplace=True)
test_aut.drop(columns=autumn_drop_cols, inplace=True)

train_win.drop(columns=winter_drop_cols, inplace=True)
test_win.drop(columns=winter_drop_cols, inplace=True)


test_dset = [test_spr, test_sum, test_aut]
for test_df in test_dset:
  test_df.drop(columns='Unnamed: 0.1', inplace=True)

In [7]:
def model_train(df=None, *weights):
  '''
  각 계절별로 VotingRegressor 모델을 학습하는 함수
  VotingRegressor는 LGBM/CatBoost/XGBoost 3개 모델의 예측치를 취합하며, 가중치에 따라 각 모델의 학습 비중을 다르게 지정 가능
  VotingRegressor 내 각 모델은 튜닝되지 않은 기본 모델을 사용
  '''

  X_train = df.drop(columns='ts')
  y_train = df['ts']

  model = VotingRegressor(
                      estimators = [
                                    ('lgbm', LGBMRegressor(random_state=42, n_jobs=-1)),
                                    ('cb', CatBoostRegressor(random_state=42)),
                                    ('xgb', XGBRegressor(random_state=42, n_jobs=-1))
                      ],
                      n_jobs=-1, weights=weights
                    )
  model.fit(X_train, y_train)

  return model

spr_weights = [0.4, 0.4, 0.2]
spr_model = model_train(train_spr, *spr_weights)

sum_weights = [0.3, 0.6, 0.1]
sum_model = model_train(train_sum, *sum_weights)

aut_weights = [0.7, 0.2, 0.1]
aut_model = model_train(train_aut, *aut_weights)

win_weights = [0.6, 0.3, 0.1]
win_model = model_train(train_win, *win_weights)

In [8]:
# 학습이 완료된 계절별 모델의 예측값 도출
spr_preds = spr_model.predict(test_spr)
sum_preds = sum_model.predict(test_sum)
aut_preds = aut_model.predict(test_aut)
win_preds = win_model.predict(test_win)

In [9]:
# 모델 예측값 제출파일에 작성
sub_spr['TS'] = spr_preds
sub_sum['TS'] = sum_preds
sub_aut['TS'] = aut_preds
sub_win['TS'] = win_preds

writer = pd.ExcelWriter('230043.xlsx')
sub_spr.to_excel(writer, sheet_name='SPRING', index=False)
sub_sum.to_excel(writer, sheet_name='SUMMER', index=False)
sub_aut.to_excel(writer, sheet_name='AUTUMN', index=False)
sub_win.to_excel(writer, sheet_name='WINTER', index=False)
writer.save()

  writer.save()
