In [1]:
! pip install finance-datareader

Collecting finance-datareader
  Downloading finance_datareader-0.9.31-py3-none-any.whl (17 kB)
Collecting requests-file
  Downloading requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)
Installing collected packages: requests-file, finance-datareader
Successfully installed finance-datareader-0.9.31 requests-file-1.5.1


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
import os
import itertools
import random

import FinanceDataReader as fdr
from tqdm import tqdm
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm

  import pandas.util.testing as tm


In [3]:
random.seed(1234)
np.random.seed(1234)
os.environ["PYTHONHASHSEED"] = str(1234)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 나눠서 GridSearch

In [5]:
path = '/content/drive/MyDrive/주식 종가 예측/open/'
list_name = 'stock_list.csv'
stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))

##############################################################
###                       인덱스 바꿔주기                      ###
##############################################################
stock_list = stock_list[320:340].reset_index(drop = True)
##############################################################
###                       인덱스 바꿔주기                      ###
##############################################################

start_date = '20210104'
end_date = '20211126'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

In [6]:
import warnings
import itertools
from statsmodels.tsa.statespace.sarimax import SARIMAX

# ignore warnings
warnings.filterwarnings("ignore")

for code in tqdm(stock_list['종목코드'].values):
  print(f'\n code: {code} \n')

  # Load data
  stock_data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
  stock_data.fillna(method = 'ffill', inplace = True)

  train = stock_data[:-20] # ~ 10/29

  # Set the parameter combinations
  p = d = q = range(0, 3)
  s = [0, 5]
  pdq = list(itertools.product(p, d, q))
  seasonal_pdqs = list(itertools.product(p, d, q, s))
  trend_params = ('c', 't')

  # Generate a dataframe to save values
  gridsearch_result = pd.DataFrame(columns = ['pdq', 'seasonal_pdqs', 'trend_param', 'aic'])

  # Grid Search
  for params in pdq:
    for seasonal_params in seasonal_pdqs:
      for trend_param in trend_params:
        try:
          model = SARIMAX(train['Close'].values.astype('float64'), exog = None, order = params, seasonal_order = seasonal_params, trend = trend_param, 
                          enforce_stationarity = True, enforce_invertibility = True)
          model_fit = model.fit()
          aic = model_fit.aic

          gridsearch_result = gridsearch_result.append({'pdq' : params, 'seasonal_pdqs' : seasonal_params, 'trend_param' : trend_param, 'aic' : aic}, ignore_index = True)
      
        except:
          continue
  
  # Save grid search results
  gridsearch_result.to_csv(f'/content/drive/MyDrive/주식 종가 예측/sarimax_gridsearch_final/{code}_sarimax_gridsearch.csv', index = False)

  0%|          | 0/20 [00:00<?, ?it/s]


 code: 216080 



  5%|▌         | 1/20 [39:12<12:25:04, 2352.87s/it]


 code: 013120 



 10%|█         | 2/20 [1:19:01<11:52:05, 2373.64s/it]


 code: 095660 



 15%|█▌        | 3/20 [1:59:00<11:15:55, 2385.63s/it]


 code: 060150 



 20%|██        | 4/20 [2:39:48<10:42:43, 2410.22s/it]


 code: 061970 



 25%|██▌       | 5/20 [3:20:22<10:04:40, 2418.73s/it]


 code: 095610 



 30%|███       | 6/20 [3:59:52<9:20:31, 2402.26s/it] 


 code: 089970 



 35%|███▌      | 7/20 [4:39:21<8:38:05, 2391.18s/it]


 code: 036810 



 40%|████      | 8/20 [5:18:29<7:55:30, 2377.52s/it]


 code: 348210 



 45%|████▌     | 9/20 [5:55:40<7:07:29, 2331.77s/it]


 code: 330860 



 50%|█████     | 10/20 [6:32:31<6:22:23, 2294.38s/it]


 code: 263720 



 55%|█████▌    | 11/20 [7:10:00<5:42:05, 2280.64s/it]


 code: 143240 



 60%|██████    | 12/20 [7:48:03<5:04:10, 2281.29s/it]


 code: 091700 



 65%|██████▌   | 13/20 [8:28:18<4:30:52, 2321.82s/it]


 code: 067000 



 70%|███████   | 14/20 [9:06:39<3:51:32, 2315.37s/it]


 code: 204270 



 75%|███████▌  | 15/20 [9:46:08<3:14:17, 2331.57s/it]


 code: 032190 



 80%|████████  | 16/20 [10:25:04<2:35:31, 2332.85s/it]


 code: 131370 



 85%|████████▌ | 17/20 [11:04:40<1:57:18, 2346.07s/it]


 code: 321550 



 90%|█████████ | 18/20 [11:44:25<1:18:35, 2357.57s/it]


 code: 347860 



 95%|█████████▌| 19/20 [12:22:11<38:50, 2330.10s/it]  


 code: 003800 



100%|██████████| 20/20 [12:58:17<00:00, 2334.85s/it]
