In [1]:
! pip install prophet

Collecting prophet
  Downloading prophet-1.0.1.tar.gz (65 kB)
[?25l[K     |█████                           | 10 kB 24.9 MB/s eta 0:00:01[K     |██████████                      | 20 kB 10.0 MB/s eta 0:00:01[K     |███████████████                 | 30 kB 8.3 MB/s eta 0:00:01[K     |████████████████████            | 40 kB 7.0 MB/s eta 0:00:01[K     |█████████████████████████       | 51 kB 4.3 MB/s eta 0:00:01[K     |██████████████████████████████  | 61 kB 4.5 MB/s eta 0:00:01[K     |████████████████████████████████| 65 kB 2.4 MB/s 
Collecting cmdstanpy==0.9.68
  Downloading cmdstanpy-0.9.68-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 4.3 MB/s 
Collecting ujson
  Downloading ujson-4.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (216 kB)
[K     |████████████████████████████████| 216 kB 8.7 MB/s 
Building wheels for collected packages: prophet
  Building wheel for prophet (setup.py) ... [?25l[?25hdone
  Created wheel for prophet

In [2]:
! pip install finance-datareader

Collecting finance-datareader
  Downloading finance_datareader-0.9.31-py3-none-any.whl (17 kB)
Collecting requests-file
  Downloading requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)
Installing collected packages: requests-file, finance-datareader
Successfully installed finance-datareader-0.9.31 requests-file-1.5.1


In [3]:
import FinanceDataReader as fdr
from tqdm import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import itertools
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
path = '/content/drive/MyDrive/주식 종가 예측/open/'
list_name = 'stock_list.csv'
stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,005930,KOSPI
1,SK하이닉스,000660,KOSPI
2,NAVER,035420,KOSPI
3,카카오,035720,KOSPI
4,삼성바이오로직스,207940,KOSPI
...,...,...,...
365,맘스터치,220630,KOSDAQ
366,다날,064260,KOSDAQ
367,제이시스메디칼,287410,KOSDAQ
368,크리스에프앤씨,110790,KOSDAQ


# 우선 하나의 종목에 대해서만 학습 및 예측

### 종가 데이터 불러오기

In [6]:
start_date = '20210104'
end_date = '20211105'

stock_code = stock_list.loc[1,'종목코드'] # 삼성전자

stock_price = fdr.DataReader(stock_code, start = start_date, end = end_date)[['Close']].reset_index()

In [7]:
stock_price

Unnamed: 0,Date,Close
0,2021-01-04,126000
1,2021-01-05,130500
2,2021-01-06,131000
3,2021-01-07,134500
4,2021-01-08,138000
...,...,...
204,2021-11-01,106500
205,2021-11-02,107500
206,2021-11-03,105500
207,2021-11-04,106000


In [8]:
stock_price.columns = ['ds', 'y']
stock_price['ds'] = pd.to_datetime(stock_price['ds'])

In [9]:
stock_price.dtypes

ds    datetime64[ns]
y              int64
dtype: object

### train set, test set 나누어주기

In [10]:
train = stock_price.iloc[:204, :]
test = stock_price.iloc[204:, :]

In [14]:
train

Unnamed: 0,ds,y
0,2021-01-04,126000
1,2021-01-05,130500
2,2021-01-06,131000
3,2021-01-07,134500
4,2021-01-08,138000
...,...,...
199,2021-10-25,100000
200,2021-10-26,102000
201,2021-10-27,101500
202,2021-10-28,106500


### Grid Search 시행

In [15]:
import itertools
from prophet.diagnostics import cross_validation, performance_metrics

# 그리드 서치 진행할 파라미터 넣어주기
param_grid = {  
    'changepoint_prior_scale': [0.001, 0.01, 0.05, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.05, 0.1, 0.5, 5, 10]
}

# 모든 파라미터에 대한 조합 생성
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]

# 각 파라미터에 대한 MAE 값을 저장하기 위한 리스트 초기화
mae = []

##########################################################
### Time-Series CV를 이용한 최적의 하이퍼파라미터 그리드서치 진행 ###
#########################################################

# 월별 cutoff 설정
cutoffs = pd.to_datetime(['2021-03-01', '2021-05-01', '2021-07-01', '2021-09-01'])

for params in all_params:
    m = Prophet(**params).fit(train) # 생성된 파라미터 조합으로 모델 피팅 진행
    df_cv = cross_validation(m, cutoffs = cutoffs, horizon = '5 days', parallel = "processes")
    df_p = performance_metrics(df_cv, rolling_window = 1) # 해당 파라미터로 cv 돌린 결과
    mae.append(df_p['mae'].values[0]) # 해당 파라미터로 돌린 cv 결과에서 mae만 뽑아내서 리스트에 저장해주기

# 파라미터 튜닝 결과 출력
tuning_results = pd.DataFrame(all_params)
tuning_results['mae'] = mae
print(tuning_results)

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fcd661c6b10>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fcd661d1650>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fcd66187b50>
INFO:prophet:Disabling yearly seasonality. Run prophet 

    changepoint_prior_scale  seasonality_prior_scale          mae
0                     0.001                     0.01  7948.411296
1                     0.001                     0.05  8339.372616
2                     0.001                     0.10  8192.806702
3                     0.001                     0.50  7894.380746
4                     0.001                     5.00  7771.180973
5                     0.001                    10.00  7783.408574
6                     0.010                     0.01  5807.900613
7                     0.010                     0.05  5738.639287
8                     0.010                     0.10  5676.575072
9                     0.010                     0.50  5651.675195
10                    0.010                     5.00  5666.932139
11                    0.010                    10.00  5757.349419
12                    0.050                     0.01  2513.760492
13                    0.050                     0.05  2513.546388
14        

### GridSearchCV를 이용하여 찾은 최적의 파라미터 값 저장

In [16]:
min_point = tuning_results.mae.min()
best_params = tuning_results.loc[tuning_results['mae'] == min_point, ]

In [17]:
best_changepoint_prior_scale = best_params['changepoint_prior_scale'].item()
best_seasonality_prior_scale = best_params['seasonality_prior_scale'].item()

In [18]:
best_params

Unnamed: 0,changepoint_prior_scale,seasonality_prior_scale,mae
13,0.05,0.05,2513.546388


### 최적의 파라미터를 이용하여 전체 train set에 대하여 학습

In [19]:
m = Prophet(changepoint_prior_scale = best_changepoint_prior_scale, 
            seasonality_prior_scale = best_seasonality_prior_scale)

m.fit(train)

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<prophet.forecaster.Prophet at 0x7fcd61299050>

### 학습한 모델을 이용하여 test 날짜에 대한 예측 시행

In [20]:
pred = m.predict(test[['ds']])
pred = pred[['yhat']]
pred = pred.reset_index(drop = True)

In [21]:
test = test.reset_index(drop = True)

### NMAE * 100 값 확인

In [22]:
np.mean(np.abs(test['y'] - pred['yhat']) / pred['yhat']) * 100 # 쓰레기!

12.477368966846354