In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor, VotingRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.preprocessing import OneHotEncoder
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from statsmodels.tsa.arima.model import ARIMA
import optuna
import time
from datetime import timedelta

# train.csv 파일 불러오기
train_data = pd.read_csv('train.csv')

# 결측치를 평균값으로 대체
train_data = train_data.fillna(train_data.mean())

# '측정 시간대'를 원핫 인코딩하여 숫자 형태로 변환
encoder = OneHotEncoder(sparse=False)
time_encoded = encoder.fit_transform(train_data[['측정 시간대']])
time_encoded_df = pd.DataFrame(time_encoded, columns=encoder.get_feature_names(['측정 시간대']))
train_data = pd.concat([train_data, time_encoded_df], axis=1).drop(['측정 시간대'], axis=1)

# 풍속을 예측할 특성(입력 변수)과 풍속(출력 변수)을 분리합니다.
X_train = train_data.drop(['ID', '풍속 (m/s)'], axis=1)  # 입력 변수들
y_train = train_data['풍속 (m/s)']  # 출력 변수 (풍속)

# ARIMA 모델 생성과 예측
def arima_forecast(train_data, test_data):
    train_data.set_index('일시', inplace=True)
    test_data.set_index('일시', inplace=True)

    # ARIMA 모델 생성
    model = ARIMA(train_data['풍속 (m/s)'], order=(1, 1, 1))
    model_fit = model.fit()

    # 테스트 데이터로 예측 수행
    forecast = model_fit.forecast(steps=len(test_data))

    # 예측 결과만 추출하여 반환
    return forecast[0]


# 최적의 하이퍼파라미터 탐색 함수
def optimize_hyperparameters(X, y, model, params):
    def objective(trial):
        # trial.suggest_categorical 함수의 리턴값을 임시 변수에 저장하여 사용
        suggested_params = {}
        for param_name, param_range in params.items():
            suggested_params[param_name] = trial.suggest_categorical(param_name, param_range)
        model.set_params(**suggested_params)

        cv_scores = cross_val_score(model, X, y, cv=5, n_jobs=-1)
        return np.mean(cv_scores)

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=100)

    return study.best_params

# 부스팅 모델 생성
rf_model = RandomForestRegressor(random_state=42)
xgb_model = XGBRegressor(objective='reg:squarederror', random_state=42)
lgb_model = LGBMRegressor(random_state=42)
cat_model = CatBoostRegressor(random_state=42)
gb_model = GradientBoostingRegressor(random_state=42)

# 모델 학습 (Linear Regression, SVR)
lr_model = LinearRegression()
svr_model = SVR()

# 하이퍼파라미터 탐색 범위
xgb_params = {
    'n_estimators': [50, 100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3],
    'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
    'min_child_weight': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'subsample': [0.6, 0.7, 0.8, 0.9],
    'colsample_bytree': [0.6, 0.7, 0.8, 0.9],
    'random_state': [42],
}

lgb_params = {
    'n_estimators': [50, 100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3],
    'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
    'num_leaves': [10, 20, 30, 40, 50, 100, 200],
    'feature_fraction': [0.6, 0.7, 0.8, 0.9],
    'bagging_fraction': [0.6, 0.7, 0.8, 0.9],
    'random_state': [42],
}

cat_params = {
    'iterations': [50, 100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3],
    'depth': [3, 4, 5, 6, 7, 8, 9, 10],
    'random_strength': [0.01, 0.1, 1, 10],
    'bagging_temperature': [0.01, 0.1, 1, 10, 100],
    'random_state': [42],
}

# XGBoost 모델의 최적 하이퍼파라미터 탐색
xgb_best_params = optimize_hyperparameters(X_train, y_train, xgb_model, xgb_params)



[I 2023-07-24 14:06:21,451] A new study created in memory with name: no-name-1c645af7-5e3b-46ac-8080-4196589b87be
[I 2023-07-24 14:06:29,169] Trial 0 finished with value: 0.737958583262305 and parameters: {'n_estimators': 100, 'learning_rate': 0.3, 'max_depth': 5, 'min_child_weight': 10, 'subsample': 0.7, 'colsample_bytree': 0.7, 'random_state': 42}. Best is trial 0 with value: 0.737958583262305.
[I 2023-07-24 14:06:35,805] Trial 1 finished with value: 0.0013268305011300495 and parameters: {'n_estimators': 50, 'learning_rate': 0.01, 'max_depth': 8, 'min_child_weight': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'random_state': 42}. Best is trial 0 with value: 0.737958583262305.
[I 2023-07-24 14:06:59,569] Trial 2 finished with value: 0.8310591966497588 and parameters: {'n_estimators': 300, 'learning_rate': 0.25, 'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.9, 'colsample_bytree': 0.8, 'random_state': 42}. Best is trial 2 with value: 0.8310591966497588.
[I 2023-07-24 14:07:20

[I 2023-07-24 14:27:22,570] Trial 26 finished with value: 0.8368111555134087 and parameters: {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.8, 'colsample_bytree': 0.9, 'random_state': 42}. Best is trial 25 with value: 0.8522855362829478.
[I 2023-07-24 14:29:13,656] Trial 27 finished with value: 0.8538789662783184 and parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.8, 'colsample_bytree': 0.9, 'random_state': 42}. Best is trial 27 with value: 0.8538789662783184.
[I 2023-07-24 14:31:17,071] Trial 28 finished with value: 0.8538789662783184 and parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.8, 'colsample_bytree': 0.9, 'random_state': 42}. Best is trial 27 with value: 0.8538789662783184.
[I 2023-07-24 14:31:37,997] Trial 29 finished with value: 0.8187478265616001 and parameters: {'n_estimators': 100, 'learning_rat

[I 2023-07-24 15:07:38,220] Trial 55 finished with value: 0.8337101317959563 and parameters: {'n_estimators': 500, 'learning_rate': 0.3, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.8, 'colsample_bytree': 0.9, 'random_state': 42}. Best is trial 42 with value: 0.8546626431600206.
[I 2023-07-24 15:09:18,316] Trial 56 finished with value: 0.8335315642506271 and parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.7, 'colsample_bytree': 0.9, 'random_state': 42}. Best is trial 42 with value: 0.8546626431600206.
[I 2023-07-24 15:11:15,563] Trial 57 finished with value: 0.8462308380078047 and parameters: {'n_estimators': 500, 'learning_rate': 0.2, 'max_depth': 10, 'min_child_weight': 3, 'subsample': 0.8, 'colsample_bytree': 0.9, 'random_state': 42}. Best is trial 42 with value: 0.8546626431600206.
[I 2023-07-24 15:13:03,680] Trial 58 finished with value: 0.8531378897066478 and parameters: {'n_estimators': 500, 'learning_rate'

[I 2023-07-24 15:52:24,759] Trial 84 finished with value: 0.8330489773002684 and parameters: {'n_estimators': 500, 'learning_rate': 0.25, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.6, 'colsample_bytree': 0.7, 'random_state': 42}. Best is trial 42 with value: 0.8546626431600206.
[I 2023-07-24 15:54:14,915] Trial 85 finished with value: 0.8509507379618716 and parameters: {'n_estimators': 500, 'learning_rate': 0.1, 'max_depth': 10, 'min_child_weight': 8, 'subsample': 0.9, 'colsample_bytree': 0.8, 'random_state': 42}. Best is trial 42 with value: 0.8546626431600206.
[I 2023-07-24 15:55:12,451] Trial 86 finished with value: 0.8437053686665875 and parameters: {'n_estimators': 300, 'learning_rate': 0.05, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.6, 'colsample_bytree': 0.6, 'random_state': 42}. Best is trial 42 with value: 0.8546626431600206.
[I 2023-07-24 15:57:00,441] Trial 87 finished with value: 0.8412625926883699 and parameters: {'n_estimators': 500, 'learning_rat

In [37]:
# train.csv 파일 불러오기
train_data = pd.read_csv('train.csv')

# 결측치를 평균값으로 대체
train_data = train_data.fillna(train_data.mean())

# '측정 시간대'를 원핫 인코딩하여 숫자 형태로 변환
encoder = OneHotEncoder(sparse=False)
time_encoded = encoder.fit_transform(train_data[['측정 시간대']])
time_encoded_df = pd.DataFrame(time_encoded, columns=encoder.get_feature_names(['측정 시간대']))
train_data = pd.concat([train_data, time_encoded_df], axis=1).drop(['측정 시간대'], axis=1)

# 풍속을 예측할 특성(입력 변수)과 풍속(출력 변수)을 분리합니다.
X_train = train_data.drop(['ID', '풍속 (m/s)'], axis=1)  # 입력 변수들
y_train = train_data['풍속 (m/s)']  # 출력 변수 (풍속)

  """


In [8]:
# LightGBM 모델의 최적 하이퍼파라미터 탐색
lgb_best_params = optimize_hyperparameters(X_train, y_train, lgb_model, lgb_params)



[I 2023-07-24 16:12:49,813] A new study created in memory with name: no-name-847b6b9c-5f23-464b-b2ac-ed9b8014f6cf
[I 2023-07-24 16:12:52,214] Trial 0 finished with value: 0.603455417181317 and parameters: {'n_estimators': 50, 'learning_rate': 0.25, 'max_depth': 4, 'num_leaves': 10, 'feature_fraction': 0.7, 'bagging_fraction': 0.7, 'random_state': 42}. Best is trial 0 with value: 0.603455417181317.
[I 2023-07-24 16:12:54,178] Trial 1 finished with value: 0.1728181340355746 and parameters: {'n_estimators': 50, 'learning_rate': 0.01, 'max_depth': 3, 'num_leaves': 200, 'feature_fraction': 0.6, 'bagging_fraction': 0.7, 'random_state': 42}. Best is trial 0 with value: 0.603455417181317.
[I 2023-07-24 16:12:55,108] Trial 2 finished with value: 0.4904244767814677 and parameters: {'n_estimators': 50, 'learning_rate': 0.05, 'max_depth': 4, 'num_leaves': 40, 'feature_fraction': 0.9, 'bagging_fraction': 0.6, 'random_state': 42}. Best is trial 0 with value: 0.603455417181317.
[I 2023-07-24 16:13:08

[I 2023-07-24 16:17:30,592] Trial 28 finished with value: 0.8029478263134251 and parameters: {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 10, 'num_leaves': 100, 'feature_fraction': 0.9, 'bagging_fraction': 0.7, 'random_state': 42}. Best is trial 17 with value: 0.8471537199070645.
[I 2023-07-24 16:17:33,242] Trial 29 finished with value: 0.7081127236765157 and parameters: {'n_estimators': 300, 'learning_rate': 0.1, 'max_depth': 10, 'num_leaves': 10, 'feature_fraction': 0.7, 'bagging_fraction': 0.7, 'random_state': 42}. Best is trial 17 with value: 0.8471537199070645.
[I 2023-07-24 16:17:40,396] Trial 30 finished with value: 0.8271132373697332 and parameters: {'n_estimators': 200, 'learning_rate': 0.1, 'max_depth': 10, 'num_leaves': 100, 'feature_fraction': 0.9, 'bagging_fraction': 0.7, 'random_state': 42}. Best is trial 17 with value: 0.8471537199070645.
[I 2023-07-24 16:17:58,554] Trial 31 finished with value: 0.8471537199070645 and parameters: {'n_estimators': 500, 'learni

[I 2023-07-24 16:21:58,301] Trial 57 finished with value: 0.8376439354627022 and parameters: {'n_estimators': 500, 'learning_rate': 0.2, 'max_depth': 7, 'num_leaves': 200, 'feature_fraction': 0.8, 'bagging_fraction': 0.6, 'random_state': 42}. Best is trial 48 with value: 0.8482802833434364.
[I 2023-07-24 16:22:10,356] Trial 58 finished with value: 0.8433819375988225 and parameters: {'n_estimators': 500, 'learning_rate': 0.2, 'max_depth': 8, 'num_leaves': 200, 'feature_fraction': 0.9, 'bagging_fraction': 0.6, 'random_state': 42}. Best is trial 48 with value: 0.8482802833434364.
[I 2023-07-24 16:22:21,551] Trial 59 finished with value: 0.8472361922852656 and parameters: {'n_estimators': 400, 'learning_rate': 0.2, 'max_depth': 10, 'num_leaves': 200, 'feature_fraction': 0.6, 'bagging_fraction': 0.6, 'random_state': 42}. Best is trial 48 with value: 0.8482802833434364.
[I 2023-07-24 16:22:24,247] Trial 60 finished with value: 0.6942806635236402 and parameters: {'n_estimators': 400, 'learnin

[I 2023-07-24 16:27:10,376] Trial 86 finished with value: 0.8482802833434364 and parameters: {'n_estimators': 500, 'learning_rate': 0.2, 'max_depth': 10, 'num_leaves': 200, 'feature_fraction': 0.9, 'bagging_fraction': 0.9, 'random_state': 42}. Best is trial 48 with value: 0.8482802833434364.
[I 2023-07-24 16:27:29,830] Trial 87 finished with value: 0.8482802833434364 and parameters: {'n_estimators': 500, 'learning_rate': 0.2, 'max_depth': 10, 'num_leaves': 200, 'feature_fraction': 0.9, 'bagging_fraction': 0.9, 'random_state': 42}. Best is trial 48 with value: 0.8482802833434364.
[I 2023-07-24 16:27:33,720] Trial 88 finished with value: 0.7140320892295486 and parameters: {'n_estimators': 500, 'learning_rate': 0.2, 'max_depth': 3, 'num_leaves': 200, 'feature_fraction': 0.9, 'bagging_fraction': 0.9, 'random_state': 42}. Best is trial 48 with value: 0.8482802833434364.
[I 2023-07-24 16:27:39,001] Trial 89 finished with value: 0.82436894583506 and parameters: {'n_estimators': 500, 'learning

In [9]:
# CatBoost 모델의 최적 하이퍼파라미터 탐색
cat_best_params = optimize_hyperparameters(X_train, y_train, cat_model, cat_params)




[I 2023-07-24 16:29:57,491] A new study created in memory with name: no-name-69ed59ab-4ea6-4bbe-8ecc-3ac0590a9fc4
4 fits failed out of a total of 5.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
4 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\user\anaconda3\envs\tf\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\envs\tf\lib\site-packages\catboost\core.py", line 5737, in fit
    save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
  File "C:\Users\user\anaconda3\envs\tf\lib\site-packages\catboost\core.py", line 2362, in _fit
    train_p

[I 2023-07-24 16:44:52,218] Trial 25 finished with value: 0.7213281387270882 and parameters: {'iterations': 500, 'learning_rate': 0.15, 'depth': 4, 'random_strength': 0.1, 'bagging_temperature': 10, 'random_state': 42}. Best is trial 6 with value: 0.8393020374906548.
[I 2023-07-24 16:45:33,941] Trial 26 finished with value: 0.8222635222830297 and parameters: {'iterations': 300, 'learning_rate': 0.15, 'depth': 9, 'random_strength': 0.1, 'bagging_temperature': 100, 'random_state': 42}. Best is trial 6 with value: 0.8393020374906548.
[I 2023-07-24 16:46:35,011] Trial 27 finished with value: 0.8319409075161228 and parameters: {'iterations': 400, 'learning_rate': 0.15, 'depth': 9, 'random_strength': 0.01, 'bagging_temperature': 0.01, 'random_state': 42}. Best is trial 6 with value: 0.8393020374906548.
[I 2023-07-24 16:46:49,417] Trial 28 finished with value: 0.7093468345184508 and parameters: {'iterations': 100, 'learning_rate': 0.15, 'depth': 9, 'random_strength': 10, 'bagging_temperature'

[I 2023-07-24 17:12:10,243] Trial 56 finished with value: 0.8424065085370337 and parameters: {'iterations': 300, 'learning_rate': 0.3, 'depth': 10, 'random_strength': 1, 'bagging_temperature': 100, 'random_state': 42}. Best is trial 46 with value: 0.8437268573067364.
[I 2023-07-24 17:13:11,136] Trial 57 finished with value: 0.8424065085370337 and parameters: {'iterations': 300, 'learning_rate': 0.3, 'depth': 10, 'random_strength': 1, 'bagging_temperature': 100, 'random_state': 42}. Best is trial 46 with value: 0.8437268573067364.
[I 2023-07-24 17:14:13,767] Trial 58 finished with value: 0.8424065085370337 and parameters: {'iterations': 300, 'learning_rate': 0.3, 'depth': 10, 'random_strength': 1, 'bagging_temperature': 100, 'random_state': 42}. Best is trial 46 with value: 0.8437268573067364.
[I 2023-07-24 17:15:10,056] Trial 59 finished with value: 0.8424065085370337 and parameters: {'iterations': 300, 'learning_rate': 0.3, 'depth': 10, 'random_strength': 1, 'bagging_temperature': 100

[I 2023-07-24 17:38:30,984] Trial 87 finished with value: 0.7341067557984502 and parameters: {'iterations': 300, 'learning_rate': 0.05, 'depth': 8, 'random_strength': 1, 'bagging_temperature': 0.1, 'random_state': 42}. Best is trial 46 with value: 0.8437268573067364.
[I 2023-07-24 17:40:21,954] Trial 88 finished with value: 0.8475583263119593 and parameters: {'iterations': 400, 'learning_rate': 0.3, 'depth': 10, 'random_strength': 1, 'bagging_temperature': 100, 'random_state': 42}. Best is trial 88 with value: 0.8475583263119593.
[I 2023-07-24 17:40:47,901] Trial 89 finished with value: 0.8085951738449145 and parameters: {'iterations': 400, 'learning_rate': 0.2, 'depth': 7, 'random_strength': 1, 'bagging_temperature': 100, 'random_state': 42}. Best is trial 88 with value: 0.8475583263119593.
[I 2023-07-24 17:40:58,933] Trial 90 finished with value: 0.69482147074628 and parameters: {'iterations': 400, 'learning_rate': 0.3, 'depth': 3, 'random_strength': 1, 'bagging_temperature': 1, 'ran

In [10]:
# 최적의 하이퍼파라미터로 모델 생성
xgb_model = XGBRegressor(**xgb_best_params)
lgb_model = LGBMRegressor(**lgb_best_params)
cat_model = CatBoostRegressor(**cat_best_params)



In [32]:
# 앙상블 모델 생성
ensemble_model = VotingRegressor(estimators=[
    ('rf', rf_model),
    ('xgb', xgb_model),
    ('lgb', lgb_model),
    ('cat', cat_model),
    ('gb', gb_model)
], n_jobs=-1)

# 스태킹을 위한 모델 생성
stacking_model = StackingRegressor(
    estimators=[
        ('rf', rf_model),
        ('xgb', xgb_model),
        ('lgb', lgb_model),
        ('cat', cat_model),
        ('gb', gb_model)
    ],
    final_estimator=LinearRegression()
)

# 앙상블 모델 학습
ensemble_model.fit(X_train, y_train)

# 스태킹 모델 학습
stacking_model.fit(X_train, y_train)

# test.csv 파일 불러오기
test_data = pd.read_csv('test.csv')

# 결측치를 평균값으로 대체
test_data = test_data.fillna(test_data.mean())

# '측정 시간대'를 원핫 인코딩하여 숫자 형태로 변환
time_encoded = encoder.transform(test_data[['측정 시간대']])
time_encoded_df = pd.DataFrame(time_encoded, columns=encoder.get_feature_names(['측정 시간대']))
test_data = pd.concat([test_data, time_encoded_df], axis=1).drop(['측정 시간대'], axis=1)

TypeError: The DType <class 'numpy.dtype[datetime64]'> could not be promoted by <class 'numpy.dtype[float64]'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtype[int64]'>, <class 'numpy.dtype[int64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[datetime64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>)

In [46]:
# test.csv 파일 불러오기
test_data = pd.read_csv('test.csv')

# 결측치를 평균값으로 대체
test_data = test_data.fillna(test_data.mean())

# '측정 시간대'를 원핫 인코딩하여 숫자 형태로 변환
time_encoded = encoder.transform(test_data[['측정 시간대']])
time_encoded_df = pd.DataFrame(time_encoded, columns=encoder.get_feature_names(['측정 시간대']))
test_data = pd.concat([test_data, time_encoded_df], axis=1).drop(['측정 시간대'], axis=1)

  """


In [54]:
# 테스트 데이터로 예측을 수행합니다.
X_test = test_data.drop('ID', axis=1)  # 테스트 입력 변수들

Unnamed: 0,월,일,섭씨 온도(°⁣C),절대 온도(K),이슬점 온도(°C),상대 습도 (%),대기압(mbar),포화 증기압(mbar),실제 증기압(mbar),증기압 부족량(mbar),수증기 함량 (g/kg),공기 밀도 (g/m**3),풍향 (deg),일시,측정 시간대_새벽,측정 시간대_오전,측정 시간대_오후,측정 시간대_저녁
0,3,24,4.28,278.68,-0.21,72.5,984.48,8.30,6.02,2.28,3.81,1233.29,251.80,2024-03-24 00:00:00,1.0,0.0,0.0,0.0
1,9,24,13.40,286.81,10.36,81.8,996.98,15.40,12.59,2.80,7.89,1206.20,225.60,2024-09-24 18:00:00,0.0,0.0,0.0,1.0
2,5,28,19.89,294.33,14.95,73.2,984.83,23.26,17.03,6.23,10.82,1163.06,10.39,2024-05-28 18:00:00,0.0,0.0,0.0,1.0
3,1,17,-2.88,270.44,-4.47,88.7,998.02,4.94,4.38,0.56,2.73,1284.19,260.20,2024-01-17 18:00:00,0.0,0.0,0.0,1.0
4,10,22,6.97,281.18,4.36,83.4,987.00,10.01,8.35,1.66,5.28,1223.47,262.50,2024-10-22 12:00:00,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15673,3,16,1.61,274.67,-0.49,85.9,1001.26,6.86,5.89,0.97,3.67,1266.62,56.18,2024-03-16 00:00:00,1.0,0.0,0.0,0.0
15674,9,5,18.27,292.19,13.35,73.0,990.98,21.03,15.35,5.68,9.69,1177.63,265.30,2024-09-05 12:00:00,0.0,0.0,1.0,0.0
15675,8,8,16.08,289.66,12.25,78.0,994.97,18.30,14.28,4.03,8.97,1191.84,189.80,2024-08-08 00:00:00,1.0,0.0,0.0,0.0
15676,2,4,3.07,276.68,-0.95,74.8,994.31,7.62,5.70,1.92,3.57,1251.24,269.70,2024-02-04 00:00:00,1.0,0.0,0.0,0.0


In [14]:

# 모델 학습
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgb_model.fit(X_train, y_train)
cat_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)
lr_model.fit(X_train, y_train)
svr_model.fit(X_train, y_train)

# 각 모델의 예측 결과를 가져옵니다.
rf_pred = rf_model.predict(X_test)
xgb_pred = xgb_model.predict(X_test)
lgb_pred = lgb_model.predict(X_test)
cat_pred = cat_model.predict(X_test)
gb_pred = gb_model.predict(X_test)
lr_pred = lr_model.predict(X_test)
svr_pred = svr_model.predict(X_test)

# 앙상블 모델 예측
ensemble_pred = ensemble_model.predict(X_test)

# 스태킹 모델 예측
stacking_pred = stacking_model.predict(X_test)



You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2858
[LightGBM] [Info] Number of data points in the train set: 36581, number of used features: 17
[LightGBM] [Info] Start training from score 2.036446






0:	learn: 1.3579635	total: 48.5ms	remaining: 19.4s
1:	learn: 1.2342349	total: 97.7ms	remaining: 19.4s
2:	learn: 1.1633435	total: 146ms	remaining: 19.4s
3:	learn: 1.1034396	total: 191ms	remaining: 18.9s
4:	learn: 1.0603597	total: 238ms	remaining: 18.8s
5:	learn: 1.0242043	total: 282ms	remaining: 18.5s
6:	learn: 0.9935376	total: 333ms	remaining: 18.7s
7:	learn: 0.9680994	total: 382ms	remaining: 18.7s
8:	learn: 0.9477325	total: 434ms	remaining: 18.9s
9:	learn: 0.9295203	total: 486ms	remaining: 18.9s
10:	learn: 0.9169004	total: 529ms	remaining: 18.7s
11:	learn: 0.9021130	total: 580ms	remaining: 18.8s
12:	learn: 0.8823245	total: 632ms	remaining: 18.8s
13:	learn: 0.8716624	total: 683ms	remaining: 18.8s
14:	learn: 0.8648449	total: 730ms	remaining: 18.7s
15:	learn: 0.8543324	total: 780ms	remaining: 18.7s
16:	learn: 0.8463541	total: 829ms	remaining: 18.7s
17:	learn: 0.8396610	total: 880ms	remaining: 18.7s
18:	learn: 0.8350903	total: 933ms	remaining: 18.7s
19:	learn: 0.8257762	total: 997ms	remai

41:	learn: 0.7066704	total: 2.19s	remaining: 18.7s
42:	learn: 0.7017103	total: 2.26s	remaining: 18.8s
43:	learn: 0.6975606	total: 2.32s	remaining: 18.7s
44:	learn: 0.6935913	total: 2.37s	remaining: 18.7s
45:	learn: 0.6899025	total: 2.43s	remaining: 18.7s
46:	learn: 0.6868145	total: 2.49s	remaining: 18.7s
47:	learn: 0.6823574	total: 2.55s	remaining: 18.7s
48:	learn: 0.6781622	total: 2.6s	remaining: 18.6s
49:	learn: 0.6741693	total: 2.66s	remaining: 18.6s
50:	learn: 0.6716267	total: 2.72s	remaining: 18.6s
51:	learn: 0.6682115	total: 2.78s	remaining: 18.6s
52:	learn: 0.6649706	total: 2.84s	remaining: 18.6s
53:	learn: 0.6620427	total: 2.9s	remaining: 18.6s
54:	learn: 0.6599763	total: 2.96s	remaining: 18.6s
55:	learn: 0.6565916	total: 3.01s	remaining: 18.5s
56:	learn: 0.6533533	total: 3.07s	remaining: 18.5s
57:	learn: 0.6512974	total: 3.12s	remaining: 18.4s
58:	learn: 0.6475936	total: 3.18s	remaining: 18.4s
59:	learn: 0.6448109	total: 3.24s	remaining: 18.3s
60:	learn: 0.6425553	total: 3.29s

202:	learn: 0.4724227	total: 10.9s	remaining: 10.6s
203:	learn: 0.4716399	total: 11s	remaining: 10.6s
204:	learn: 0.4703846	total: 11.1s	remaining: 10.5s
205:	learn: 0.4697873	total: 11.1s	remaining: 10.5s
206:	learn: 0.4693274	total: 11.2s	remaining: 10.4s
207:	learn: 0.4689889	total: 11.2s	remaining: 10.3s
208:	learn: 0.4686339	total: 11.3s	remaining: 10.3s
209:	learn: 0.4680081	total: 11.3s	remaining: 10.2s
210:	learn: 0.4674283	total: 11.4s	remaining: 10.2s
211:	learn: 0.4667262	total: 11.4s	remaining: 10.1s
212:	learn: 0.4659543	total: 11.5s	remaining: 10.1s
213:	learn: 0.4656052	total: 11.5s	remaining: 10s
214:	learn: 0.4650380	total: 11.6s	remaining: 9.98s
215:	learn: 0.4644012	total: 11.7s	remaining: 9.94s
216:	learn: 0.4636941	total: 11.7s	remaining: 9.88s
217:	learn: 0.4628440	total: 11.8s	remaining: 9.83s
218:	learn: 0.4618202	total: 11.8s	remaining: 9.78s
219:	learn: 0.4611339	total: 11.9s	remaining: 9.72s
220:	learn: 0.4605796	total: 11.9s	remaining: 9.66s
221:	learn: 0.45

361:	learn: 0.3939894	total: 19.4s	remaining: 2.04s
362:	learn: 0.3935715	total: 19.5s	remaining: 1.98s
363:	learn: 0.3930744	total: 19.5s	remaining: 1.93s
364:	learn: 0.3927336	total: 19.6s	remaining: 1.88s
365:	learn: 0.3924006	total: 19.6s	remaining: 1.82s
366:	learn: 0.3920940	total: 19.7s	remaining: 1.77s
367:	learn: 0.3916102	total: 19.7s	remaining: 1.71s
368:	learn: 0.3911981	total: 19.8s	remaining: 1.66s
369:	learn: 0.3907652	total: 19.8s	remaining: 1.61s
370:	learn: 0.3903434	total: 19.9s	remaining: 1.55s
371:	learn: 0.3900391	total: 19.9s	remaining: 1.5s
372:	learn: 0.3896032	total: 20s	remaining: 1.45s
373:	learn: 0.3893299	total: 20s	remaining: 1.39s
374:	learn: 0.3888758	total: 20.1s	remaining: 1.34s
375:	learn: 0.3887067	total: 20.1s	remaining: 1.28s
376:	learn: 0.3885596	total: 20.2s	remaining: 1.23s
377:	learn: 0.3882201	total: 20.2s	remaining: 1.18s
378:	learn: 0.3877925	total: 20.3s	remaining: 1.12s
379:	learn: 0.3873624	total: 20.3s	remaining: 1.07s
380:	learn: 0.386

KeyError: "['일시'] not in index"

In [60]:
# 앙상블 결과와 스태킹 결과를 평균하여 최종 예측값 도출
final_pred = (ensemble_pred + stacking_pred + rf_pred + xgb_pred + lgb_pred + cat_pred + gb_pred + lr_pred + svr_pred) / 9

# Submit / 제출
submission = pd.read_csv('./sample_submission.csv')
submission['풍속 (m/s)'] = final_pred

# 예측 결과를 submission.csv 양식에 맞게 저장합니다.
submission.to_csv('submission.csv', index=False)

print("풍속 예측이 완료되었습니다. 결과가 submission.csv에 저장되었습니다.")

풍속 예측이 완료되었습니다. 결과가 submission.csv에 저장되었습니다.
