In [2]:
import pandas as pd

df = pd.read_csv('Preprocessed_Modeling.csv')
df.shape

(154400, 16)

In [3]:
# 데이터 분리
from sklearn.model_selection import train_test_split

X = df.drop(columns=['풍속(m/s)', '유의파고(m)'])
y = df[['풍속(m/s)', '유의파고(m)']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# # 변수 중요도
# from sklearn.ensemble import RandomForestRegressor
# import numpy as np
# import matplotlib.pyplot as plt
# import matplotlib.font_manager as fm

# rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
# rf_model.fit(X_train, y_train)

# importances = rf_model.feature_importances_
# indices = np.argsort(importances)[::-1]

# font_path = 'C:/Windows/Fonts/malgun.ttf'
# font_prop = fm.FontProperties(fname=font_path, size=12)
# plt.rcParams['font.family'] = font_prop.get_name()
# plt.rcParams['axes.unicode_minus'] = False

# plt.title('변수 중요도', pad=10, fontweight='bold')
# plt.bar(range(X.shape[1]), importances[indices], color=(52/255, 73/255, 94/255, 1.0), align='center')
# tick_positions = np.arange(0, X.shape[1], 1)
# plt.xticks(tick_positions, X.columns[indices], rotation=45)
# plt.xlim([-1, X.shape[1]])
# plt.show()

In [5]:
# 데이터 스케일링
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# 평가 함수 정의
from sklearn.metrics import mean_squared_error, r2_score

def evaluate_model(model, X_train, X_test, y_train, y_test, pred):
    train_r2 = model.score(X_train, y_train)  # R² 값
    test_r2 = model.score(X_test, y_test)  # R² 값
    mse = mean_squared_error(y_test, pred)
    
    print(f"훈련 R²: {train_r2:.4f}")
    print(f"테스트 R²: {test_r2:.4f}")
    print(f"MSE: {mse:.4f}")

In [7]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV

lasso = Lasso(alpha=0.01)
lasso.fit(X_train_scaled, y_train)
lasso_pred = lasso.predict(X_test_scaled)
evaluate_model(lasso, X_train_scaled, X_test_scaled, y_train, y_test, lasso_pred)

# 하이퍼파라미터 튜닝
param_grid_lasso = {
    'alpha': [0.001, 0.01, 0.1, 1, 10, 100],
    'max_iter': [1000, 10000, 100000]
}
lasso_search = GridSearchCV(Lasso(), param_grid_lasso, cv=5, scoring='neg_mean_squared_error')
lasso_search.fit(X_train_scaled, y_train)

lasso_params = lasso_search.best_params_
lasso_score = -lasso_search.best_score_  # MSE가 음수로 반환되므로 부호를 반전시킴
print(f"최적 하이퍼파라미터: {lasso_params}")
print(f"최적 MSE: {lasso_score:.4f}")

# 최적 하이퍼파라미터 사용하여 재학습
lasso_best = Lasso(**lasso_params)
lasso_best.fit(X_train_scaled, y_train)

lasso_pred_best = lasso_best.predict(X_test_scaled)
evaluate_model(lasso_best, X_train_scaled, X_test_scaled, y_train, y_test, lasso_pred_best)

훈련 R²: 0.9429
테스트 R²: 0.9428
MSE: 0.6363
최적 하이퍼파라미터: {'alpha': 0.001, 'max_iter': 1000}
최적 MSE: 0.6340
훈련 R²: 0.9436
테스트 R²: 0.9435
MSE: 0.6333


In [8]:
from sklearn.linear_model import LinearRegression

linear = LinearRegression()
linear.fit(X_train_scaled, y_train)
linear_pred = linear.predict(X_test_scaled)
evaluate_model(linear, X_train_scaled, X_test_scaled, y_train, y_test, linear_pred)

# 하이퍼파라미터가 없음

훈련 R²: 0.9436
테스트 R²: 0.9435
MSE: 0.6333


In [9]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
ridge_pred = ridge.predict(X_test_scaled)
evaluate_model(ridge, X_train_scaled, X_test_scaled, y_train, y_test, ridge_pred)

# 하이퍼파라미터 튜닝
ridge_param_grid = {
    'alpha': [0.001, 0.01, 0.1, 1, 10, 100]
}
ridge_grid_search = GridSearchCV(Ridge(), ridge_param_grid, cv=5, scoring='neg_mean_squared_error')
ridge_grid_search.fit(X_train_scaled, y_train)

ridge_best_params = ridge_grid_search.best_params_
ridge_best_score = -ridge_grid_search.best_score_  # MSE는 음수로 반환되므로 부호를 반전시킴
print(f"최적 Ridge 하이퍼파라미터: {ridge_best_params}")
print(f"최적 Ridge MSE: {ridge_best_score:.4f}")

# 최적 하이퍼파라미터 사용하여 재학습
ridge_best_model = Ridge(**ridge_best_params)
ridge_best_model.fit(X_train_scaled, y_train)
ridge_best_pred = ridge_best_model.predict(X_test_scaled)
evaluate_model(ridge_best_model, X_train_scaled, X_test_scaled, y_train, y_test, ridge_best_pred)


훈련 R²: 0.9436
테스트 R²: 0.9435
MSE: 0.6333
최적 Ridge 하이퍼파라미터: {'alpha': 10}
최적 Ridge MSE: 0.6339
훈련 R²: 0.9436
테스트 R²: 0.9435
MSE: 0.6333


In [10]:
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV

elastic_net = ElasticNet(alpha=1.0, l1_ratio=0.5)  # 기본적으로 alpha=1.0, l1_ratio=0.5로 설정
elastic_net.fit(X_train_scaled, y_train)
elastic_net_pred = elastic_net.predict(X_test_scaled)
evaluate_model(elastic_net, X_train_scaled, X_test_scaled, y_train, y_test, elastic_net_pred)

# 하이퍼파라미터 튜닝
param_grid_en = {
    'alpha': [0.001, 0.01, 0.1, 1, 10, 100],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]  # L1과 L2의 비율을 조정
}
elastic_net_grid_search = GridSearchCV(ElasticNet(), param_grid_en, cv=5, scoring='neg_mean_squared_error')
elastic_net_grid_search.fit(X_train_scaled, y_train)

elastic_net_best_params = elastic_net_grid_search.best_params_
elastic_net_best_score = -elastic_net_grid_search.best_score_  # MSE는 음수로 반환되므로 부호를 반전시킴
print(f"최적 ElasticNet 하이퍼파라미터: {elastic_net_best_params}")
print(f"최적 ElasticNet MSE: {elastic_net_best_score:.4f}")

# 최적 하이퍼파라미터 사용하여 재학습
elastic_net_best = ElasticNet(**elastic_net_best_params)
elastic_net_best.fit(X_train_scaled, y_train)
elastic_net_best_pred = elastic_net_best.predict(X_test_scaled)
evaluate_model(elastic_net_best, X_train_scaled, X_test_scaled, y_train, y_test, elastic_net_best_pred)

훈련 R²: 0.7592
테스트 R²: 0.7593
MSE: 1.8136
최적 ElasticNet 하이퍼파라미터: {'alpha': 0.001, 'l1_ratio': 0.1}
최적 ElasticNet MSE: 0.6339
훈련 R²: 0.9436
테스트 R²: 0.9435
MSE: 0.6333


In [11]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV

dt_regressor = DecisionTreeRegressor(random_state=42)
dt_regressor.fit(X_train_scaled, y_train)
dt_pred = dt_regressor.predict(X_test_scaled)
evaluate_model(dt_regressor, X_train_scaled, X_test_scaled, y_train, y_test, dt_pred)

# 하이퍼파라미터 튜닝
param_grid_dt = {
    'max_depth': [3, 5, 10, None],  # 트리의 최대 깊이
    'min_samples_split': [2, 5, 10],  # 분할을 위한 최소 샘플 수
    'min_samples_leaf': [1, 2, 4],  # 리프 노드의 최소 샘플 수
    'max_features': [None, 'sqrt', 'log2'],  # 분할을 위한 최대 특성 수
}

dt_grid_search = GridSearchCV(DecisionTreeRegressor(random_state=42), param_grid_dt, cv=5, scoring='neg_mean_squared_error')
dt_grid_search.fit(X_train_scaled, y_train)

dt_best_params = dt_grid_search.best_params_
dt_best_score = -dt_grid_search.best_score_  # MSE는 음수로 반환되므로 부호를 반전시킴
print(f"최적 DecisionTreeRegressor 하이퍼파라미터: {dt_best_params}")
print(f"최적 DecisionTreeRegressor MSE: {dt_best_score:.4f}")

# 최적 하이퍼파라미터 사용하여 재학습
dt_best_model = DecisionTreeRegressor(**dt_best_params, random_state=42)
dt_best_model.fit(X_train_scaled, y_train)
dt_best_pred = dt_best_model.predict(X_test_scaled)
evaluate_model(dt_best_model, X_train_scaled, X_test_scaled, y_train, y_test, dt_best_pred)


훈련 R²: 1.0000
테스트 R²: 0.9710
MSE: 0.2995
최적 DecisionTreeRegressor 하이퍼파라미터: {'max_depth': 10, 'max_features': None, 'min_samples_leaf': 4, 'min_samples_split': 2}
최적 DecisionTreeRegressor MSE: 0.2578
훈련 R²: 0.9738
테스트 R²: 0.9707
MSE: 0.2538


In [12]:
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.model_selection import GridSearchCV

# rf_regressor = RandomForestRegressor(random_state=42)
# rf_regressor.fit(X_train_scaled, y_train)
# rf_pred = rf_regressor.predict(X_test_scaled)
# evaluate_model(rf_regressor, X_train_scaled, X_test_scaled, y_train, y_test, rf_pred)

# # 하이퍼파라미터 튜닝
# param_grid_rf = {
#     'n_estimators': [50, 100, 200],  # 트리의 개수
#     'max_depth': [None, 10, 20, 30],  # 트리의 최대 깊이
#     'min_samples_split': [2, 5, 10],  # 분할을 위한 최소 샘플 수
#     'min_samples_leaf': [1, 2, 4],  # 리프 노드의 최소 샘플 수
#     'max_features': ['auto', 'sqrt', 'log2'],  # 분할을 위한 최대 특성 수
#     'bootstrap': [True, False]  # 부트스트랩 샘플링 여부
# }
# rf_grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid_rf, cv=5, scoring='neg_mean_squared_error')
# rf_grid_search.fit(X_train_scaled, y_train)

# rf_best_params = rf_grid_search.best_params_
# rf_best_score = -rf_grid_search.best_score_  # MSE는 음수로 반환되므로 부호를 반전시킴
# print(f"최적 RandomForestRegressor 하이퍼파라미터: {rf_best_params}")
# print(f"최적 RandomForestRegressor MSE: {rf_best_score:.4f}")

# # 최적 하이퍼파라미터 사용하여 재학습
# rf_best_model = RandomForestRegressor(**rf_best_params, random_state=42)
# rf_best_model.fit(X_train_scaled, y_train)
# rf_best_pred = rf_best_model.predict(X_test_scaled)
# evaluate_model(rf_best_model, X_train_scaled, X_test_scaled, y_train, y_test, rf_best_pred)

In [13]:
# from sklearn.ensemble import GradientBoostingRegressor
# from sklearn.model_selection import GridSearchCV

# y_train_GBR = y_train.values.ravel()  # pandas DataFrame을 numpy array로 변환하고 ravel() 호출

# gb_regressor = GradientBoostingRegressor(random_state=42)
# gb_regressor.fit(X_train_scaled, y_train_GBR)
# gb_pred = gb_regressor.predict(X_test_scaled)
# evaluate_model(gb_regressor, X_train_scaled, X_test_scaled, y_train_GBR, y_test, gb_pred)

# # 하이퍼파라미터 튜닝
# param_grid_gb = {
#     'n_estimators': [50, 100, 200],  # 트리의 개수
#     'learning_rate': [0.01, 0.1, 0.2],  # 학습률
#     'max_depth': [3, 5, 10],  # 트리의 최대 깊이
#     'min_samples_split': [2, 5, 10],  # 분할을 위한 최소 샘플 수
#     'min_samples_leaf': [1, 2, 4],  # 리프 노드의 최소 샘플 수
#     'subsample': [0.8, 0.9, 1.0],  # 부트스트랩 샘플 비율
# }
# gb_grid_search = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid_gb, cv=5, scoring='neg_mean_squared_error')
# gb_grid_search.fit(X_train_scaled, y_train_GBR)

# gb_best_params = gb_grid_search.best_params_
# gb_best_score = -gb_grid_search.best_score_  # MSE는 음수로 반환되므로 부호를 반전시킴
# print(f"최적 GradientBoostingRegressor 하이퍼파라미터: {gb_best_params}")
# print(f"최적 GradientBoostingRegressor MSE: {gb_best_score:.4f}")

# # 최적 하이퍼파라미터 사용하여 재학습
# gb_best_model = GradientBoostingRegressor(**gb_best_params, random_state=42)
# gb_best_model.fit(X_train_scaled, y_train_GBR)
# gb_best_pred = gb_best_model.predict(X_test_scaled)
# evaluate_model(gb_best_model, X_train_scaled, X_test_scaled, y_train_GBR, y_test, gb_best_pred)

In [14]:
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor

# KFold로 3개의 폴드로 설정
cv = KFold(n_splits=3, shuffle=True, random_state=42)

mlp_regressor = MLPRegressor(random_state=42, max_iter=500, solver='adam')
mlp_regressor.fit(X_train_scaled, y_train)
mlp_pred = mlp_regressor.predict(X_test_scaled)
evaluate_model(mlp_regressor, X_train_scaled, X_test_scaled, y_train, y_test, mlp_pred)

# 하이퍼파라미터 튜닝
param_grid_mlp = {
    'hidden_layer_sizes': [(50,), (100,), (100, 100), (200,)],  # 은닉층의 크기
    'activation': ['relu', 'tanh', 'logistic'],  # 활성화 함수
    'solver': ['adam', 'lbfgs', 'sgd'],  # 최적화 알고리즘
    'alpha': [0.0001, 0.001, 0.01, 0.1],  # L2 정규화의 강도
    'learning_rate': ['constant', 'invscaling', 'adaptive'],  # 학습률 일정
    'max_iter': [1000, 2000],  # 최대 반복 횟수
    'batch_size': ['auto', 32, 64]  # 미니배치 크기
}
mlp_grid_search = GridSearchCV(MLPRegressor(random_state=42), param_grid_mlp, cv=cv, scoring='neg_mean_squared_error', verbose=2)
mlp_grid_search.fit(X_train_scaled, y_train)

mlp_best_params = mlp_grid_search.best_params_
mlp_best_score = -mlp_grid_search.best_score_  # MSE는 음수로 반환되므로 부호를 반전시킴
print(f"최적 MLPRegressor 하이퍼파라미터: {mlp_best_params}")
print(f"최적 MLPRegressor MSE: {mlp_best_score:.4f}")

# 최적 하이퍼파라미터 사용하여 재학습
mlp_best_model = MLPRegressor(**mlp_best_params, random_state=42)
mlp_best_model.fit(X_train_scaled, y_train)
mlp_best_pred = mlp_best_model.predict(X_test_scaled)
evaluate_model(mlp_best_model, X_train_scaled, X_test_scaled, y_train, y_test, mlp_best_pred)

훈련 R²: 0.9848
테스트 R²: 0.9846
MSE: 0.1713
Fitting 3 folds for each of 2592 candidates, totalling 7776 fits
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=  51.7s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=  55.6s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=  41.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs; total time= 3.6min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs; total time= 2.9min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs; total time= 2.9min
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=  45.1s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=  47.1s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=  52.7s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=  44.3s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=  47.9s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(5

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=lbfgs; total time= 5.5min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=lbfgs; total time= 5.6min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=lbfgs; total time= 5.5min
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=sgd; total time=  44.5s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=sgd; total time=  45.9s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=sgd; total time=  51.5s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=1000, solver=adam; total time=  43.9s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=1000, solver=adam; total time=  47.2s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_size

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=1000, solver=lbfgs; total time= 2.8min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=1000, solver=lbfgs; total time= 2.8min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=1000, solver=lbfgs; total time= 2.9min
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=1000, solver=sgd; total time=   6.5s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=1000, solver=sgd; total time=   6.5s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=1000, solver=sgd; total time=   6.7s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=2000, solver=adam; total time=  45.1s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=2000, solver=adam; total time=  48.1s
[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_la

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=relu, alpha=0.0001, batch_size=auto, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=2000, solver=lbfgs; total time= 6.2min


KeyboardInterrupt: 

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# '풍속(m/s)'와 '유의파고(m)'에 대해 각각 별도의 SVR 모델을 학습
for target_column in y_train.columns:
    target = y_train[target_column]
    
    # SVR 모델 생성
    svr_regressor = SVR()
    svr_regressor.fit(X_train_scaled, target)
    svr_pred = svr_regressor.predict(X_test_scaled)
    evaluate_model(svr_regressor, X_train_scaled, X_test_scaled, target, y_test[target_column], svr_pred)
    
    # 하이퍼파라미터 튜닝
    param_grid_svr = {
        'C': [0.1, 1, 10, 100],
        'epsilon': [0.01, 0.1, 0.5, 1],
        'kernel': ['linear', 'rbf', 'poly'],
        'degree': [3, 4, 5],
        'gamma': ['scale', 'auto']
    }
    
    svr_grid_search = GridSearchCV(SVR(), param_grid_svr, cv=5, scoring='neg_mean_squared_error')
    svr_grid_search.fit(X_train_scaled, target)

    svr_best_params = svr_grid_search.best_params_
    svr_best_score = -svr_grid_search.best_score_  # MSE는 음수로 반환되므로 부호를 반전시킴
    print(f"최적 SVR 하이퍼파라미터 ({target_column}): {svr_best_params}")
    print(f"최적 SVR MSE ({target_column}): {svr_best_score:.4f}")

    # 최적 하이퍼파라미터 사용하여 재학습
    svr_best_model = SVR(**svr_best_params)
    svr_best_model.fit(X_train_scaled, target)
    svr_best_pred = svr_best_model.predict(X_test_scaled)
    evaluate_model(svr_best_model, X_train_scaled, X_test_scaled, target, y_test[target_column], svr_best_pred)

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV

knn_regressor = KNeighborsRegressor()
knn_regressor.fit(X_train_scaled, y_train)
knn_pred = knn_regressor.predict(X_test_scaled)
evaluate_model(knn_regressor, X_train_scaled, X_test_scaled, y_train, y_test, knn_pred)

# 하이퍼파라미터 튜닝
param_grid_knn = {
    'n_neighbors': [3, 5, 7, 9, 11],  # 이웃의 개수
    'weights': ['uniform', 'distance'],  # 가중치 설정
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],  # 알고리즘 선택
    'leaf_size': [20, 30, 40],  # ball_tree, kd_tree의 리프 크기
    'p': [1, 2]  # 거리 계산 방식 (1=맨해튼, 2=유클리드)
}
knn_grid_search = GridSearchCV(KNeighborsRegressor(), param_grid_knn, cv=5, scoring='neg_mean_squared_error')
knn_grid_search.fit(X_train_scaled, y_train)

knn_best_params = knn_grid_search.best_params_
knn_best_score = -knn_grid_search.best_score_  # MSE는 음수로 반환되므로 부호를 반전시킴
print(f"최적 KNeighborsRegressor 하이퍼파라미터: {knn_best_params}")
print(f"최적 KNeighborsRegressor MSE: {knn_best_score:.4f}")

# 최적 하이퍼파라미터 사용하여 재학습
knn_best_model = KNeighborsRegressor(**knn_best_params)
knn_best_model.fit(X_train_scaled, y_train)
knn_best_pred = knn_best_model.predict(X_test_scaled)
evaluate_model(knn_best_model, X_train_scaled, X_test_scaled, y_train, y_test, knn_best_pred)