In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install AutoTS



In [3]:
import pandas as pd
import numpy as np
import datetime as dt
import itertools
import random
import os
from autots import AutoTS
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

# Seed 고정
seed_everything(4382)
np.random.seed(777)

print(sklearn.__version__)

1.2.2


In [5]:

# 함수 정의
def make_month(df):
    dt = df['일자'].astype('str')
    month_data = pd.to_datetime(dt)
    md = month_data.dt.month
    return md

def make_year(df):
    dt = df['일자'].astype('str')
    year_data = pd.to_datetime(dt)
    yd = year_data.dt.year
    return yd

def make_day(df):
    dt = df['일자'].astype('str')
    year_data = pd.to_datetime(dt)
    dd = year_data.dt.day
    return dd

def make_weekday(df):
    dt = df['일자'].astype('str')
    year_data = pd.to_datetime(dt)
    wd = year_data.dt.weekday
    return wd

def make_gap (df):
    df['gap_minmax'] = df['고가']-df['저가']
    df['gap_day'] = df['시가']-df['종가']


def make_shift(df, n_days):

    columns = [ '시가','종가']
    for col in columns:
        for s in range(1,n_days+1):
            df['shift_{}_{}'.format(col,s)] = df[col].shift(s)

def Add_predicted_data_to_train(df):

    df['month'] = make_month(df)
    df['year'] = make_year(df)
    df['weekday'] = make_weekday(df)

    df['target4'] = df['종가'].shift(-1)
    df['target4'].fillna(method='bfill',inplace=True)

    df = df.drop(columns=['일자'])

    return df.reset_index(drop=True)

def Feature_Engineering(df,code_name):

    df = df[df['종목코드'] == code_name]

    df['month'] = make_month(df)
    df['year'] = make_year(df)
    df['day'] = make_day(df)
    df['weekday'] = make_weekday(df)

    df['target4'] = df['종가'].shift(-1)
    df['target4'].fillna(method='bfill',inplace=True)
    df['target1'] = df['시가'].shift(-1)
    df['target1'].fillna(method='bfill',inplace=True)

    print("== finish ==")
    return df#.reset_index(drop=True)

In [None]:
'''
https://medium.com/data-science-in-your-pocket/automl-for-time-series-forecasting-using-autots-with-example-735fb559443a'''

In [5]:
tmp_df = pd.read_csv('/content/drive/MyDrive/dacon/krs/train.csv')
#tmp_df = Feature_Engineering(train,'A060310')
#make_shift(tmp_df,10)

In [32]:
tmp_df=tmp_df.drop(columns=['종목코드','종목명'])

In [6]:
tmp_df['일자']=pd.to_datetime(tmp_df['일자'],format='%Y%m%d')
tmp_df['일자'] = tmp_df['일자'].dt.strftime('%Y%m%d')
tmp_df.index = pd.DatetimeIndex(tmp_df['일자'])

In [7]:
tmp_df.head()

Unnamed: 0_level_0,일자,종목코드,종목명,거래량,시가,고가,저가,종가
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-06-01,20210601,A060310,3S,166690,2890,2970,2885,2920
2021-06-01,20210601,A095570,AJ네트웍스,63836,5860,5940,5750,5780
2021-06-01,20210601,A006840,AK홀딩스,103691,35500,35600,34150,34400
2021-06-01,20210601,A054620,APS,462544,14600,14950,13800,14950
2021-06-01,20210601,A265520,AP시스템,131987,29150,29150,28800,29050


In [16]:
model = AutoTS(
    forecast_length=15,
    frequency='D',
    prediction_interval=0.95,
    ensemble=None,
    models_mode='deep',
    model_list = ['ARIMA','ETS','MLEnsemble'], #'univariate',# or ['ARIMA','ETS']
    max_generations=10,
    num_validations=3,
    no_negatives=True,
    random_seed=1234,
    n_jobs=1)

In [11]:
from autots.models.model_list import model_lists
print(model_lists.keys())
print(model_lists['all'])

dict_keys(['all', 'default', 'fast', 'superfast', 'parallel', 'fast_parallel', 'fast_parallel_no_arima', 'probabilistic', 'multivariate', 'univariate', 'no_params', 'recombination_approved', 'no_shared', 'no_shared_fast', 'experimental', 'slow', 'gpu', 'regressor', 'best', 'motifs', 'all_result_path', 'regressions', 'all_pragmatic'])
['FBProphet', 'ARIMA', 'ARCH', 'ARDL', 'SeasonalNaive', 'UnobservedComponents', 'UnivariateRegression', 'MLEnsemble', 'SeasonalityMotif', 'AverageValueNaive', 'LastValueNaive', 'GLS', 'KalmanStateSpace', 'GLM', 'NeuralProphet', 'UnivariateMotif', 'ETS', 'DatepartRegression', 'ConstantNaive', 'Theta', 'MetricMotif']


In [15]:
print(model_lists['fast_parallel'])

{'ETS': 1, 'FBProphet': 0.8, 'ARIMA': 0.2, 'GLM': 1, 'UnobservedComponents': 1, 'UnivariateMotif': 1, 'MultivariateMotif': 0.8, 'Theta': 1, 'ARDL': 1, 'ARCH': 1, 'ConstantNaive': 1, 'LastValueNaive': 1.5, 'AverageValueNaive': 1, 'GLS': 1, 'SeasonalNaive': 1, 'VAR': 0.8, 'VECM': 1, 'WindowRegression': 0.5, 'DatepartRegression': 0.8, 'SectionalMotif': 1, 'NVAR': 1, 'MAR': 1, 'RRVAR': 1, 'KalmanStateSpace': 1, 'MetricMotif': 1, 'Cassandra': 1, 'SeasonalityMotif': 1}


In [None]:
model.fit(tmp_df)
prediction = model.predict(forecast_length=15)

In [None]:
?model.predict

In [19]:
prediction.forecast

Unnamed: 0,일자,종목코드,종목명,거래량,시가,고가,저가,종가
2023-05-31,20230530.0,A060310,3S,76952990.0,2900.0,3475.665532,2759.178858,3021.507028
2023-06-01,20230530.0,A060310,3S,76952990.0,2900.0,3401.793495,2760.845566,2990.0
2023-06-02,20230530.0,A060310,3S,76952990.0,2900.0,3359.208392,2761.167022,2952.09324
2023-06-03,20230530.0,A060310,3S,621683.6,2884.603902,3360.670114,2714.253024,2948.916421
2023-06-04,20230530.0,A060310,3S,302347.4,2899.908836,3325.968833,2743.527975,2888.551047
2023-06-05,20230530.0,A060310,3S,205092.7,2856.340538,3256.917157,2736.889187,2843.796027
2023-06-06,20230530.0,A060310,3S,209832.3,2824.974612,3222.441309,2549.448536,2763.521175
2023-06-07,20230530.0,A060310,3S,76952990.0,2777.238289,3665.845624,2526.73508,3154.816421
2023-06-08,20230530.0,A060310,3S,76952990.0,2775.670689,3574.817789,2530.0,3136.811021
2023-06-09,20230530.0,A060310,3S,76952990.0,2775.0,3530.0,2530.0,3097.895349


In [20]:
prediction

Prediction object: 
Return .forecast, 
 .upper_forecast, 
 .lower_forecast 
 .model_parameters 
 .transformation_parameters

In [None]:
train = tmp_df.iloc[10:-2]

In [None]:
valid = tmp_df[-2:-1]

In [None]:
s = setup(data = train, train_size = 0.90, target = 'target4',  fold_strategy = 'timeseries',data_split_shuffle=True, session_id = 1224)

Unnamed: 0,Description,Value
0,Session id,1224
1,Target,target4
2,Target type,Regression
3,Original data shape,"(482, 9)"
4,Transformed data shape,"(482, 9)"
5,Transformed train set shape,"(433, 9)"
6,Transformed test set shape,"(49, 9)"
7,Numeric features,8
8,Preprocess,True
9,Imputation type,simple


In [None]:
?compare_models

In [None]:
compare_models( n_select=4 , verbose = False)

KeyboardInterrupt: ignored

In [None]:
compare = compare_models( n_select=3 , verbose = False)

In [None]:
#compare = compare_models(include=['lightgbm','dt','rf','et'], n_select=3 , verbose = False)
blend = blend_models(compare, verbose = False)
blend = finalize_model(blend)

In [None]:
kk = predict_model( blend , data=valid)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Voting Regressor,241.4859,58315.4207,241.4859,,0.0835,0.0801


In [None]:
kk['prediction_label'].values

array([2773.51413974])

In [None]:
valid

Unnamed: 0,거래량,시가,종가,month,year,day,weekday,target4,target1
984000,13590249,2245,2910,5,2023,26,4,3015.0,2805.0


In [None]:
future_dates = pd.date_range(start = '20230510', end = '20230530')
future_dates_df = pd.DataFrame(future_dates)
future_dates_df.columns = ['일자']
future_dates_df['일자'] = future_dates_df['일자'].dt.strftime('%Y%m%d')
for col in tmp_df.columns[1:]:
    future_dates_df[col]=0

In [None]:
future_dates_df.head()

In [None]:
s = setup(data = tmp_df, train_size = 0.95, target = '종가',  fold_strategy = 'timeseries',data_split_shuffle=True, session_id = 2,
              ignore_features = ['종목코드','종목명'])

In [None]:
compare = compare_models(include=['lightgbm','dt','rf','et'], n_select=3 , verbose = False)

In [None]:
blend = blend_models(compare, verbose = False)
blend = finalize_model(blend)

In [None]:
kk = predict_model( blend , data=future_dates_df)

NameError: ignored

In [None]:
future_dates_df.head()

Unnamed: 0,일자,future
0,20230510,0
1,20230511,0
2,20230512,0
3,20230513,0
4,20230514,0
