In [None]:
!python -m pip install schedule
!python -m pip install pystan
!python -m pip install fbprophet
!python -m pip install finance-datareader

In [2]:
import os
import time
import schedule
import numpy as np
import pandas as pd
import tensorflow as tf
import FinanceDataReader as fdr
import matplotlib.pyplot as plt
from time import sleep
from fbprophet import Prophet
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Lambda
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [3]:
data = pd.read_excel('./samsung.xlsx')

In [4]:
def windowed_dataset(series, window_size, batch_size, shuffle):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    if shuffle:
        ds = ds.shuffle(1000)
    ds = ds.map(lambda w: (w[:-1], w[-1]))
    return ds.batch(batch_size).prefetch(1)

In [5]:
def pred_machine(data):
  scaler = MinMaxScaler()
  scale_cols = list(data.columns[1:])
  scaled = scaler.fit_transform(data[scale_cols])
  df = pd.DataFrame(scaled, columns=scale_cols)
  x_train, x_test, y_train, y_test = train_test_split(df.drop('y', 1), df['y'], test_size=0.2, random_state=0, shuffle=False)

  WINDOW_SIZE=120
  BATCH_SIZE=32

  train_data = windowed_dataset(y_train, WINDOW_SIZE, BATCH_SIZE, True)
  test_data = windowed_dataset(y_test, WINDOW_SIZE, BATCH_SIZE, False)

  model = Sequential([
      Conv1D(filters=32, kernel_size=5,
            padding="causal",
            activation="relu",
            input_shape=[WINDOW_SIZE, 1]),
      LSTM(16, activation='tanh'),
      Dense(16, activation="relu"),
      Dense(1),
  ])

  loss = Huber()
  optimizer = Adam(0.0005)
  model.compile(loss=Huber(), optimizer=optimizer, metrics=['mse'])

  earlystopping = EarlyStopping(monitor='val_loss', patience=100, mode='min')
  filename = os.path.join('tmp', 'ckeckpointer.ckpt')
  checkpoint = ModelCheckpoint(filename, 
                              save_weights_only=True, 
                              save_best_only=True, 
                              monitor='val_loss', 
                              verbose=1)

  history = model.fit(train_data, 
                      validation_data=(test_data), 
                      epochs=500, 
                      callbacks=[checkpoint, earlystopping])

  for i in range(10):
    merge_data = pd.DataFrame()
    for col in data:
      if col != 'DATE' and col != 'y':
        data_copy = data[['DATE', col, 'DATE']].copy()
        data_copy.columns = ['ds', 'y', 'DATE']
        data_copy = data_copy.set_index('DATE')

        prophet = Prophet(seasonality_mode='multiplicative', 
                      yearly_seasonality=True,
                      weekly_seasonality=True, daily_seasonality=True,
                      changepoint_prior_scale=0.5)
        prophet.fit(data_copy)

        future_data = prophet.make_future_dataframe(periods=1, freq='d')
        forecast_data = prophet.predict(future_data)
        forecast_copy = pd.DataFrame(forecast_data[['ds', 'yhat']].tail(1))
        forecast_copy.columns = ['DATE', col]

        merge_data[col] = forecast_copy[col]
    merge_data['DATE'] = forecast_copy['DATE']
    df_row = pd.concat([data, merge_data])

    pred_scaled = scaler.fit_transform(df_row[scale_cols])
    pred_df = pd.DataFrame(pred_scaled, columns=scale_cols)
    p_x_train, p_x_test, p_y_train, p_y_test = train_test_split(pred_df.drop('y', 1), pred_df['y'], test_size=0.2, random_state=0, shuffle=False)
    WINDOW_SIZE=120
    BATCH_SIZE=32
    pred_train = windowed_dataset(p_y_train, WINDOW_SIZE, BATCH_SIZE, True)
    pred_test = windowed_dataset(p_y_test, WINDOW_SIZE, BATCH_SIZE, False)

    pred = model.predict(pred_test)
    pred_df.iloc[-1]['y'] = pred[-1]

    data = scaler.inverse_transform(pred_df)
    data = pd.DataFrame(data, columns=scale_cols)
    data['DATE'] = df_row['DATE']
    data = data[['DATE', '거래량', 'PER', 'PBR', '기관 합계', '기타법인', '개인', '외국인 합계', 'ATR',
       'NASDAQ', 'S&P', 'CBOE', 'Exchange rate', 'futures2y', 'futures10y',
       'y']]
  return(data)

In [None]:
pred_machine(data)

In [None]:
schedule.every().hour.do(pred_machine, data)
data = pred_machine(data)
print(data)

while True:
    schedule.run_pending()
    data = data
    print(data)
    time.sleep(1)