# AutoML

In [1]:
#!pip install sklearn
#!pip install pycaret[full]
#!pip install plotly
#!pip install pycaret-ts-alpha
import numpy as np
import pandas as pd
from pycaret.time_series import *
import os

In [2]:
def create_save_model(file_name):
    # 데이터 불러오기 및 전처리
    stock_data = pd.read_csv(file_name+'.csv')
    stock_data.drop(["Date"], axis=1, inplace=True)
    stock_data = stock_data.dropna()
    # 설정 변수
    FH = 14
    target = 'Close'
    exog_vars = ['Open', 'High', 'Low', 'Volume']
    include = [target] + exog_vars
    data = stock_data[include]

    # 실험 설정
    exp_auto = TSForecastingExperiment()
    exp_auto.setup(data=data, target=target, fh=FH, session_id=123)

    # ARIMA
    best = exp_auto.create_model('arima')
    
    # 모델 플롯 및 저장
    final_auto_model = exp_auto.finalize_model(best)
    
    if not os.path.exists('model_data'):
        os.makedirs('model_data')
        
    model_path = os.path.join("model_data", f'{file_name}_total')
    _ = exp_auto.save_model(final_auto_model, model_path)

    # 외생 변수를 위한 최적 모델 찾기 및 저장
    for exog_var in exog_vars:
        exog_exp = TSForecastingExperiment()
        exog_exp.setup(data=data[exog_var], target=exog_var, fh=FH, session_id=123)
        exog_best = exog_exp.compare_models(turbo = True)
        final_exog_model = exog_exp.finalize_model(exog_best)
        model_path = os.path.join("model_data", f'{file_name}_{exog_var}')
        _ = exog_exp.save_model(final_exog_model, model_path)

In [None]:
create_save_model('^KS11')

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Close
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(6664, 5)"
5,Transformed data shape,"(6664, 5)"
6,Transformed train set shape,"(6650, 5)"
7,Transformed test set shape,"(14, 5)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,6607.0,0.1088,0.1163,10.5984,15.0067,0.0044,0.0044,0.8477
1,6621.0,0.0749,0.0753,7.3066,9.7175,0.003,0.003,0.9788
2,6635.0,0.0708,0.0649,6.9125,8.3909,0.0028,0.0028,0.7465
Mean,,0.0848,0.0855,8.2725,11.0384,0.0034,0.0034,0.8577
SD,,0.017,0.0222,1.6525,2.8578,0.0007,0.0007,0.0951


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,session_id,123
1,Target,Open
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(6664, 1)"
5,Transformed data shape,"(6664, 1)"
6,Transformed train set shape,"(6650, 1)"
7,Transformed test set shape,"(14, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
arima,ARIMA,0.9943,0.845,44.5826,51.5325,0.0185,0.0184,-1.8005,0.3967
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,1.0815,0.9257,48.5252,56.4754,0.02,0.02,-4.5419,0.31
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,1.0833,0.9278,48.6087,56.6048,0.02,0.02,-4.565,0.9867
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.0833,0.9278,48.6087,56.6048,0.02,0.02,-4.565,0.68
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.0833,0.9278,48.6078,56.6035,0.02,0.02,-4.5644,0.3
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.0835,0.9198,48.614,56.1153,0.0201,0.02,-4.4657,4.28
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.0857,0.9263,48.7177,56.5146,0.0201,0.0201,-4.6646,0.26
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.0913,0.9478,48.9597,57.8137,0.0202,0.0202,-4.0639,0.4033
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,1.0917,0.9343,48.9834,57.0029,0.0202,0.0202,-4.7028,0.2633
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,1.0924,0.9348,49.0167,57.0339,0.0202,0.0202,-4.7201,0.8667


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,session_id,123
1,Target,High
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(6664, 1)"
5,Transformed data shape,"(6664, 1)"
6,Transformed train set shape,"(6650, 1)"
7,Transformed test set shape,"(14, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.0032,0.8238,43.7254,48.7111,0.018,0.018,-2.4395,0.2867
catboost_cds_dt,CatBoost Regressor w/ Cond. Deseasonalize & Detrending,1.0619,0.8885,46.3045,52.5569,0.0189,0.019,-5.3008,2.7267
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.099,0.8832,47.9116,52.2365,0.0196,0.0197,-4.2,0.2667
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,1.099,0.8832,47.9116,52.2365,0.0196,0.0197,-4.2,0.2867
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.0991,0.8833,47.914,52.2414,0.0196,0.0197,-4.2013,0.2267
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.1005,0.9169,47.9781,54.2248,0.0195,0.0197,-4.5069,1.2433
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,1.1067,0.8891,48.2459,52.5862,0.0197,0.0198,-4.3606,0.2333
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,1.1073,0.888,48.273,52.5174,0.0198,0.0198,-4.3707,0.3233
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,1.1098,0.8905,48.3817,52.6691,0.0198,0.0198,-4.4287,0.2433
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.1516,0.9241,50.2091,54.6595,0.0205,0.0206,-5.2806,0.25


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,session_id,123
1,Target,Low
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(6664, 1)"
5,Transformed data shape,"(6664, 1)"
6,Transformed train set shape,"(6650, 1)"
7,Transformed test set shape,"(14, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.3848,0.3442,37.7068,44.7498,0.0156,0.0157,-0.8082,1.4767
catboost_cds_dt,CatBoost Regressor w/ Cond. Deseasonalize & Detrending,0.4084,0.3726,40.0235,48.4468,0.0166,0.0167,-1.1565,9.3133
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.4273,0.3662,41.8914,47.6166,0.0174,0.0174,-2.3093,1.03
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.4278,0.3664,41.9346,47.6485,0.0174,0.0174,-2.3247,0.54
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.4326,0.3799,42.4049,49.3985,0.0177,0.0176,-2.1937,1.9067
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.4334,0.3714,42.4864,48.292,0.0177,0.0176,-2.4763,0.8267
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.4334,0.3714,42.4864,48.292,0.0177,0.0176,-2.4763,1.2167
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.4335,0.3711,42.4983,48.2582,0.0177,0.0177,-2.4754,0.46
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.4336,0.3713,42.5013,48.2879,0.0177,0.0177,-2.4846,0.4467
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.4474,0.3746,43.8549,48.7054,0.0182,0.0183,-1.9611,15.2133


Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Description,Value
0,session_id,123
1,Target,Volume
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(6664, 1)"
5,Transformed data shape,"(6664, 1)"
6,Transformed train set shape,"(6650, 1)"
7,Transformed test set shape,"(14, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
grand_means,Grand Means Forecaster,0.7171,0.6037,92297.2907,124260.3229,0.1886,0.1922,-0.1155,0.0233
naive,Naive Forecaster,0.8789,0.7228,113111.9048,148745.1817,0.2252,0.2503,-0.7825,0.0267
arima,ARIMA,1.0075,0.8499,129662.9488,174901.8053,0.2813,0.2573,-1.7262,0.86
snaive,Seasonal Naive Forecaster,1.0403,0.8705,133890.4762,179150.6954,0.2916,0.2645,-1.9079,0.0533
polytrend,Polynomial Trend Forecaster,1.4299,0.9758,184044.7372,200822.6713,0.4554,0.3545,-2.9661,0.0167


Processing:   0%|          | 0/117 [00:00<?, ?it/s]

In [None]:
create_save_model("055550.KS")

In [None]:
create_save_model('^VNINDEX.VN')

In [None]:
create_save_model('VCB.VN')