In [26]:
from sklearn.ensemble import ExtraTreesRegressor
import optuna
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split


In [27]:
data = pd.read_csv("train.csv")

In [28]:
def month_to_season(month):
    if 3 <= month <= 5:
        return '봄'
    elif 6 <= month <= 8:
        return '여름'
    elif 9 <= month <= 11:
        return '가을'
    else:
        return '겨울'
data['계절'] = data['월'].apply(month_to_season)


In [29]:
# ignore_features=['절대 온도(K)', '이슬점 온도(°C)', '포화 증기압(mbar)', '실제 증기압(mbar)',
#        '증기압 부족량(mbar)', '수증기 함량 (g/kg)', '공기 밀도 (g/m**3)','ID']
ignore_features=['포화 증기압(mbar)', '이슬점 온도(°C)','수증기 함량 (g/kg)','섭씨 온도(°⁣C)',
       'ID','월']
categorical_features = ['계절','일','측정 시간대']


In [30]:
data.drop(ignore_features,axis=1,inplace=True)
data = pd.get_dummies(data,columns=categorical_features)

In [34]:
datas = data.drop("풍속 (m/s)",axis=1).values
target = data["풍속 (m/s)"].values

In [36]:
X_train, X_test, y_train, y_test = train_test_split(datas, target, test_size=0.2, random_state=42)


In [37]:
def objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 10, 400)
    max_depth = trial.suggest_int("max_depth", 2, 32, log=True)
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 10)

    model = ExtraTreesRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42,
    )

    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mae = mean_absolute_error(y_test, predictions)

    return mae

In [38]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)

[I 2023-07-19 23:26:55,857] A new study created in memory with name: no-name-637908db-1c90-4779-a791-23849672a260


In [None]:
print("Best trial:")
trial = study.best_trial
print("MAE: {}".format(trial.value))
print("Params: ")
for key, value in trial.params.items():
    print("{}: {}".format(key, value))