In [1]:
! pip install finance-datareader

Collecting finance-datareader
  Downloading finance_datareader-0.9.31-py3-none-any.whl (17 kB)
Collecting requests-file
  Downloading requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)
Installing collected packages: requests-file, finance-datareader
Successfully installed finance-datareader-0.9.31 requests-file-1.5.1


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
import os
import itertools
import random

import FinanceDataReader as fdr
from tqdm import tqdm
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm

  import pandas.util.testing as tm


In [3]:
random.seed(1234)
np.random.seed(1234)
os.environ["PYTHONHASHSEED"] = str(1234)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# GridSearch로 찾은 최적의 파라미터로 예측 시행

In [5]:
path = '/content/drive/MyDrive/주식 종가 예측/open/'
list_name = 'stock_list.csv'
stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))

start_date = '20210104'
end_date = '20211126'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

In [6]:
submission_name = 'sample_submission.csv'
submission = pd.read_csv(os.path.join(path,submission_name))

In [7]:
nmae_df = pd.DataFrame(columns = ['code', 'nmae'])
error_df = pd.DataFrame(columns = ['code'])

In [None]:
import warnings
import itertools
import time
from statsmodels.tsa.statespace.sarimax import SARIMAX

# ignore warnings
warnings.filterwarnings("ignore")

for code in tqdm(stock_list['종목코드'].values):
  print(f'\n code: {code} \n')

  # Load data
  stock_data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
  stock_data.fillna(method = 'ffill', inplace = True)

  # Split the data into train / validation / test sets
  train_public = stock_data[:-20] # ~ 10/29 : predict from 11/1 to 11/5
  train_private = stock_data # ~ 11/26 : predict from 11/29 to 12/3

  test_public = stock_data[-20:-15]['Close'].reset_index(drop = True) # from 11/1 to 11/5
  test_public = np.array(test_public)

  gridsearch_result = pd.read_csv(f'/content/drive/MyDrive/주식 종가 예측/sarimax_gridsearch_final/{code}_sarimax_gridsearch.csv')

  min_idx = gridsearch_result[['aic']].idxmin()

  pdq = gridsearch_result.loc[min_idx, 'pdq'].item()
  p = np.int64(pdq[1])
  d = np.int64(pdq[4])
  q = np.int64(pdq[7])
  optimal_pdq = (p, d, q)

  pdqs = gridsearch_result.loc[min_idx, 'seasonal_pdqs'].item()
  seasonal_p = np.int64(pdqs[1])
  seasonal_d = np.int64(pdqs[4])
  seasonal_q = np.int64(pdqs[7])
  s = np.int64(pdqs[10])
  optimal_pdqs = (seasonal_p, seasonal_d, seasonal_q, s)

  optimal_trend = gridsearch_result.loc[min_idx, 'trend_param'].item()

  try:
    model_public = SARIMAX(train_public['Close'].values.astype('float64'), exog = None, order = optimal_pdq, seasonal_order = optimal_pdqs,
                         trend = optimal_trend, enforce_stationarity = True, enforce_invertibility = True)
    model_public_fit = model_public.fit()

    forecast_public = model_public_fit.forecast(steps = 5) # predict from 11/1 to 11/5
    pred_public = forecast_public

    nmae = np.mean(np.abs(test_public - pred_public) / test_public)
    nmae_df = nmae_df.append({'code' : code, 'nmae' : nmae}, ignore_index = True)

    print("pdq : ", optimal_pdq, "\n")
    print("pdqs : ", optimal_pdqs, "\n")
    print("trend : ", optimal_trend, "\n")
    print("public prediction result : ", pred_public, "\n")
    print("public actual value : ", test_public, "\n")
    print("private prediction result : ", pred_private, "\n")
    print("nmae * 100 : ", nmae * 100, "\n")

    # Add public predicted value to the submission file
    for i in range(0, 5):
      submission.loc[i, code] = pred_public[i]
  
  except:
    error_df = error_df.append({'code' : code}, ignore_index = True)

  try:
    model_private = SARIMAX(train_private['Close'].values.astype('float64'), exog = None, order = optimal_pdq, seasonal_order = optimal_pdqs,
                            trend = optimal_trend, enforce_seasonality = True, enforce_invertivility = True)
    model_private_fit = model_private.fit()

    forecast_private = model_private_fit.forecast(steps = 5) # predict from 11/29 to 12/3
    pred_private = forecast_private

    for i in range(0, 5):
      submission.loc[i + 5, code] = pred_private[i]
  
  except:
    error_df = error_df.append({'code' : code}, ignore_index = True)

  time.sleep(1)  

  0%|          | 0/370 [00:00<?, ?it/s]


 code: 005930 

pdq :  (2, 1, 2) 

pdqs :  (2, 0, 2, 5) 

trend :  c 

public prediction result :  [69605.49934626 69477.42012356 69610.44802139 69646.43687876
 69522.82761396] 

public actual value :  [69900 71500 70400 70600 70200] 



  0%|          | 1/370 [00:09<56:51,  9.24s/it]


 code: 000660 

pdq :  (0, 1, 0) 

pdqs :  (2, 2, 1, 5) 

trend :  t 

public prediction result :  [103334.69685037 104178.03202693 104684.26794825 106751.68435606
 107473.5072473 ] 

public actual value :  [106500 107500 105500 106000 107000] 

private prediction result :  [72419.37469584 72309.32040781 72451.11755533 72410.70486183
 72126.00267288] 

nmae * 100 :  1.5974380915988018 



  1%|          | 2/370 [00:15<46:36,  7.60s/it]


 code: 035420 

pdq :  (0, 1, 0) 

pdqs :  (2, 2, 1, 5) 

trend :  t 

public prediction result :  [406636.20412853 415347.12185132 417589.00538646 420247.87258243
 416553.95950082] 

public actual value :  [408500 410000 402500 411500 410500] 

private prediction result :  [121683.06937272 122064.25057559 121953.42925386 121243.00913011
 121535.93869324] 

nmae * 100 :  1.8219755586804416 



  1%|          | 3/370 [00:20<39:16,  6.42s/it]


 code: 035720 

pdq :  (2, 1, 1) 

pdqs :  (0, 2, 2, 5) 

trend :  t 

public prediction result :  [125186.67517684 126038.7623578  126876.45398817 126561.5902881
 127656.85648617] 

public actual value :  [125500 128000 124500 130000 128500] 

private prediction result :  [393263.47003154 385625.57481678 380532.28157368 384144.97128335
 378552.7312574 ] 

nmae * 100 :  1.3983499731298543 



  1%|          | 4/370 [00:29<44:16,  7.26s/it]


 code: 207940 

pdq :  (1, 1, 0) 

pdqs :  (0, 2, 1, 5) 

trend :  t 

public prediction result :  [871144.51047867 872790.5311659  892301.59569178 880019.51215172
 864664.07252675] 

public actual value :  [874000 874000 874000 865000 864000] 

private prediction result :  [126050.2941125  124206.38997595 123316.19514138 125383.65451616
 125463.76480603] 

nmae * 100 :  0.8744644652417111 



  1%|▏         | 5/370 [00:32<36:21,  5.98s/it]


 code: 005935 

pdq :  (1, 1, 2) 

pdqs :  (2, 0, 0, 0) 

trend :  t 

public prediction result :  [64005.78129124 64116.01859946 64086.91414367 63876.90140288
 63552.76764464] 

public actual value :  [65200 66200 66200 66600 66300] 

private prediction result :  [856061.36180631 832912.32942976 826443.26426047 824312.05277811
 847862.65390666] 

nmae * 100 :  3.280795942870211 



  2%|▏         | 6/370 [00:36<31:22,  5.17s/it]


 code: 051910 

pdq :  (0, 1, 0) 

pdqs :  (0, 2, 2, 5) 

trend :  t 

public prediction result :  [846094.49640519 856127.51571798 860388.41415482 874305.8676006
 871696.17112916] 

public actual value :  [832000 831000 784000 790000 786000] 

private prediction result :  [66832.03928877 66831.30845111 66555.80444678 66132.93066435
 65760.26238621] 

nmae * 100 :  7.207137644767933 



  2%|▏         | 7/370 [00:41<30:22,  5.02s/it]


 code: 006400 

pdq :  (0, 1, 0) 

pdqs :  (1, 2, 1, 5) 

trend :  t 

public prediction result :  [728391.38753508 747282.4441944  749416.17142569 745561.71016057
 748395.57405805] 

public actual value :  [723000 731000 732000 751000 755000] 

private prediction result :  [720164.16695127 708234.1463431  703790.37281501 704697.37055893
 690686.82468406] 

nmae * 100 :  1.3902547775062866 



  2%|▏         | 8/370 [00:45<28:21,  4.70s/it]


 code: 005380 

pdq :  (0, 1, 1) 

pdqs :  (1, 2, 1, 5) 

trend :  t 

public prediction result :  [210834.70567957 212115.69024183 212454.43869012 210137.91818168
 209368.02734163] 

public actual value :  [208000 210000 208500 214000 215000] 

private prediction result :  [708256.50454238 706181.33422567 702764.68182053 695223.29284848
 689498.41808297] 

nmae * 100 :  1.738231478025402 



  2%|▏         | 9/370 [00:50<29:34,  4.92s/it]


 code: 068270 

pdq :  (0, 0, 0) 

pdqs :  (2, 0, 0, 5) 

trend :  t 

public prediction result :  [224368.67332146 229469.20478797 226919.07479913 224042.20477224
 212440.54925663] 

public actual value :  [205500 206500 210000 208500 209000] 

private prediction result :  [212352.92120029 209277.38463013 208083.4281686  206344.63297555
 206201.86308531] 

nmae * 100 :  7.4924261430110235 



  3%|▎         | 10/370 [00:55<29:05,  4.85s/it]


 code: 000270 

pdq :  (2, 0, 1) 

pdqs :  (0, 2, 1, 5) 

trend :  c 

public prediction result :  [85397.40307744 85305.77872712 84909.24586445 85477.47464781
 83921.66870301] 

public actual value :  [84300 86000 85000 87000 88000] 

private prediction result :  [233121.37808122 222006.92383996 220300.93323492 218259.2356429
 223435.47768498] 

nmae * 100 :  1.720056628573537 



  3%|▎         | 11/370 [01:01<31:13,  5.22s/it]


 code: 005490 

pdq :  (2, 0, 1) 

pdqs :  (0, 2, 1, 5) 

trend :  t 

public prediction result :  [303912.42400702 306832.18095245 298321.01358429 291808.62429566
 285641.53462584] 

public actual value :  [296500 298000 300500 297500 285000] 

private prediction result :  [233121.37808122 222006.92383996 220300.93323492 218259.2356429
 223435.47768498] 

nmae * 100 :  1.665416217071735 



  3%|▎         | 12/370 [01:07<32:47,  5.49s/it]


 code: 066570 

pdq :  (0, 1, 0) 

pdqs :  (0, 2, 2, 5) 

trend :  t 

public prediction result :  [119247.1044523  120326.18095388 118302.78457329 118587.29062124
 118866.1687981 ] 

public actual value :  [120500 123000 122000 122500 123000] 

private prediction result :  [276283.74590689 280451.00826112 275546.49639421 270618.34261134
 266674.23602423] 

nmae * 100 :  2.559795052291227 



  4%|▎         | 13/370 [01:13<33:14,  5.59s/it]


 code: 028260 

pdq :  (0, 0, 0) 

pdqs :  (2, 0, 1, 5) 

trend :  t 

public prediction result :  [197245.43382898 199344.99183066 199367.13562869 196587.9351937
 194215.54305802] 

public actual value :  [115000 117000 116500 116500 116000] 

private prediction result :  [123236.55999412 120189.84292365 119877.05315415 118547.18491057
 120804.18193217] 

nmae * 100 :  69.84017860175415 



  4%|▍         | 14/370 [01:16<28:44,  4.84s/it]


 code: 012330 

pdq :  (1, 1, 0) 

pdqs :  (1, 2, 1, 5) 

trend :  t 

public prediction result :  [255149.09073744 256672.89857616 252837.35886122 250765.76422875
 243108.16612308] 

public actual value :  [253000 255500 252500 252000 252500] 

private prediction result :  [168574.65324526 169041.03782996 169539.92712566 168298.55032468
 167051.40155135] 

nmae * 100 :  1.1302849651756626 



  4%|▍         | 15/370 [01:21<29:47,  5.04s/it]


 code: 096770 



  4%|▍         | 16/370 [01:25<27:28,  4.66s/it]


 code: 051900 

