In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

plt.rc('font', family='Malgun Gothic')  # 폰트 지정
plt.rc('axes', unicode_minus=False)  # 마이너스 폰트 설정
%config InlineBackend.figure_format = 'retina'  # 그래프 글씨 뚜렷




In [2]:
df_train = pd.read_csv("../data/df_train_005.csv")
df_test = pd.read_csv("../data/df_test_005.csv")

In [3]:
X = df_train.drop(columns = 'ECLO',axis=1)
y = df_train['ECLO']

In [4]:
x_train, x_test, y_train , y_test = train_test_split(X, y, test_size = 0.2, random_state=42 )

In [5]:
import os
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42)

---

In [6]:
from supervised.automl import AutoML
automl = AutoML(mode="Compete",
                algorithms = ['Random Forest', 'LightGBM', 'Xgboost', 'CatBoost'],
                n_jobs = -1,total_time_limit=43200, eval_metric="rmse", ml_task = "regression")
#3.209155

In [7]:
automl.fit(X, y)

AutoML directory: AutoML_1
The task is regression with evaluation metric rmse
AutoML will use algorithms: ['Random Forest', 'LightGBM', 'Xgboost', 'CatBoost']
AutoML will stack models
AutoML will ensemble available models
AutoML steps: ['adjust_validation', 'simple_algorithms', 'default_algorithms', 'not_so_random', 'golden_features', 'kmeans_features', 'insert_random_feature', 'features_selection', 'hill_climbing_1', 'hill_climbing_2', 'boost_on_errors', 'ensemble', 'stack', 'ensemble_stacked']
* Step adjust_validation will try to check up to 1 model
1_DecisionTree rmse 3.046606 trained in 4.65 seconds
Adjust validation. Remove: 1_DecisionTree
Validation strategy: 10-fold CV Shuffle
Skip simple_algorithms because no parameters were generated.
* Step default_algorithms will try to check up to 4 models
1_Default_LightGBM rmse 3.243376 trained in 65.71 seconds
2_Default_Xgboost rmse 3.245334 trained in 54.45 seconds
3_Default_CatBoost rmse 3.240316 trained in 59.09 seconds
4_Default_Rand



3_Default_CatBoost_KMeansFeatures rmse 3.242449 trained in 87.89 seconds




25_CatBoost_KMeansFeatures rmse 3.242003 trained in 93.83 seconds




31_CatBoost_KMeansFeatures rmse 3.242452 trained in 148.1 seconds
* Step insert_random_feature will try to check up to 1 model
3_Default_CatBoost_GoldenFeatures_RandomFeature rmse 3.241475 trained in 138.47 seconds
Drop features ['차대차_sum_달서구 호림동', '토요일', '달서구 상인동', '북구 노원동1가_diff_차대차', '차량단독', '단일로 - 터널', '동구 덕곡동', '젖음/습기', '금요일', '화요일', '북구 금호동', '동구 용수동', '목요일', '차대차', '비', '중구 북성로1가', '동구 불로동', '동구 중대동', '북구 칠성동2가', '북구 조야동', '차대차_sum_중구 수창동', '중구 달성동', '차대차_sum_달서구 파호동', '중구 동인동2가', '맑음', '수성구 수성동4가', '동구 용계동', '북구 고성동3가', '북구 서변동', '동구 진인동', '달성군 다사읍', '동구 신평동', '북구 검단동', '달서구 송현동', '서리/결빙', '달성군 논공읍', '달서구 용산동', '단일로 - 기타', '동구 방촌동', '달성군 가창면', '동구 신암동', '달서구 이곡동', '달서구 장기동', '동구 신서동', '북구 노곡동', '동구 율암동', '달서구 신당동', '수성구 범어동', '달서구 월암동', '동구 대림동', '달성군 화원읍', '북구 동천동', '동구 상매동', '동구 부동', '동구 동호동', '수성구 가천동', '중구 대안동', '중구 포정동', '중구 화전동', '수성구 욱수동', '중구 인교동', '중구 태평로1가', '남구 이천동', '수성구 두산동', '중구 남산동', '동구 도동', '수성구 연호동', '동구 백안동', '수성구 상동', '동구 괴전동', '서구 원대동3가', '중구 동인동4가', '달서구 호



25_CatBoost_KMeansFeatures_Stacked rmse 3.23775 trained in 96.67 seconds
50_Xgboost_Stacked rmse 3.242631 trained in 67.93 seconds
22_LightGBM_Stacked rmse 3.2391 trained in 56.18 seconds
55_RandomForest_Stacked rmse 3.238426 trained in 342.16 seconds
23_CatBoost_Stacked rmse 3.23815 trained in 98.73 seconds
51_Xgboost_Stacked rmse 3.242991 trained in 60.9 seconds
21_LightGBM_Stacked rmse 3.241118 trained in 58.5 seconds
4_Default_RandomForest_Stacked rmse 3.238811 trained in 360.85 seconds




3_Default_CatBoost_KMeansFeatures_Stacked rmse 3.238077 trained in 90.9 seconds
8_Xgboost_Stacked rmse 3.247068 trained in 62.19 seconds
18_LightGBM_Stacked rmse 3.246241 trained in 61.89 seconds
38_RandomForest_Stacked rmse 3.238367 trained in 243.32 seconds
7_Xgboost_Stacked rmse 3.245233 trained in 62.06 seconds
20_LightGBM_Stacked rmse 3.240546 trained in 65.99 seconds
37_RandomForest_SelectedFeatures_Stacked rmse 3.237665 trained in 322.58 seconds
* Step ensemble_stacked will try to check up to 1 model
Ensemble_Stacked rmse 3.230885 trained in 44.57 seconds
AutoML fit time: 11390.42 seconds
AutoML best model: 3_Default_CatBoost_GoldenFeatures_BoostOnErrors


---

In [20]:
sub_file = pd.read_csv("../data/sample_submission.csv")

In [21]:
pred = automl.predict(df_test)

In [22]:
sub_file.ECLO = pred

In [25]:
sub_file.to_csv("../data/sub_file/021automl.csv", index=False)