In [None]:
# Copyright © 2021 LEADERG Inc. All rights reserved. Please keep it private. Publish to internet is not allowed. 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from xgboost import XGBClassifier
import xgboost
import pickle
import sys
import re
import datetime
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
#import warnings
#warnings.filterwarnings('ignore')

In [None]:
%matplotlib inline

In [None]:
# parameters
inference_input_filename = "data/stock/inference_input.csv"
model_filename = "data/stock/model/model.sav"
scaler_filename = "data/stock/model/scaler.pkl"
inference_output_filename = "data/stock/inference_output.csv"

In [None]:
# read csv
dataset_org = pd.read_csv(inference_input_filename)
dataset = dataset_org.copy()

In [None]:
# drop the first column
dataset = dataset.drop(dataset.columns[[0]],axis=1)

In [None]:
for i in dataset.columns:
    # deal with stock volume which has ','
    dataset[i] = dataset[i].replace(',', '', regex=True)
    dataset[i] = pd.to_numeric(dataset[i], errors='coerce')

In [None]:
# if dataset contains Nan, then exit
if dataset.isnull().values.sum() > 0:
    listna = np.argwhere(dataset.isnull().values).tolist()
    for listna_list in listna:
        print("ERROR!Value (%d, %d) is empty!" %(int(listna_list[0]) + 2,int(listna_list[1]) + 2))
    sys.exit(1)

In [None]:
#get prediction day from the last column of the data
close_day = ''
for i in dataset_org.columns:
	close_day = i

days = re.findall(r'(\d+)', close_day)

dataset_column = dataset.shape[1]

days = int(days[0])


datasetNoNA = dataset[0:-days]
datasetNoNA_all = dataset

In [None]:
X = dataset.iloc[:,0:-1]
print("Shape of X = ", X.shape)

Y = dataset.iloc[:,-1]
print("Shape of Y = ", Y.shape)

In [None]:
# normalization
from pickle import load

# load the scaler
scaler = load(open(scaler_filename, 'rb'))

# transform the training dataset
X_norm = scaler.transform(X)

In [None]:
# load the model from disk
regr = pickle.load(open(model_filename, 'rb'))

In [None]:
# calculate predictions
predictions = regr.predict(X_norm)

In [None]:
# print RMSR, R2
print('\n均方根誤差: %.5f' %( np.sqrt(metrics.mean_squared_error(Y[0:-days], predictions[0:-days]))))
print('R 平方值: %.5f\n' %( metrics.r2_score(Y[0:-days], predictions[0:-days])))

In [None]:
# restore to original predictions
#Y_org = Y*(data_ori_max[-1]-data_ori_min[-1]) + data_ori_mean[-1]
Y_org = Y
#predictions_cal = predictions*(data_max[-1]-data_min[-1]) + data_mean[-1]
predictions_cal = predictions.copy()

#predictions_cal = np.concatenate([predictions[0:-days]*(data_ori_max[-1]-data_ori_min[-1]) + data_ori_mean[-1], predictions[-days:]*(data_max[-1]-data_min[-1]) + data_mean[-1]], axis=0)

predictions_org = pd.DataFrame(predictions_cal, columns=['prediction'])

last_day_close = dataset_org.iloc[dataset_org.index[-1],-2]

print(dataset_org.date[dataset_org.index[-1]], '收盤價 %.2f 元' %(last_day_close))

In [None]:
predictions_close_price = []
predictions_updown = []
predictions_amp = []
i = days
while i > 0:
    if type(predictions_cal[-i]) != type(last_day_close) :
	#last_day = last_day.replace(',', '')
    	last_day_close = pd.to_numeric(last_day_close)

    updown = predictions_cal[-i]-last_day_close
    amp = (updown)/last_day_close
    predictions_close_price.append(predictions_cal[-i])
    predictions_updown.append(updown)
    predictions_amp.append(amp*100)
    # print close price, up or down... information
    #print('AI 預測', days-i + 1, '天後收盤價 %5.2f 元，漲跌 % 5.2f 元 ( % 5.2f %%)' %(predictions_cal[-i], updown, (amp*100)))
    i = i - 1;

In [None]:
#Calculate the display digits
display_predictions_close_price = len(str(int(max(predictions_close_price))))

if (int(len(str(int(min(predictions_updown)))) > int(len(str(int(max(predictions_updown))))))):
    display_predictions_updown = len(str(int(min(predictions_updown))))
else:
    display_predictions_updown = len(str(int(max(predictions_updown)))) + 1

if (int(len(str(int(min(predictions_amp)))) > int(len(str(int(max(predictions_amp))))))):
    display_predictions_amp = len(str(int(min(predictions_amp))))
else:
    display_predictions_amp = len(str(int(max(predictions_amp))))

for i in range(days):
    print('AI 預測', i + 1, '天後收盤價 %*.2f 元，漲跌 % +*.2f 元 ( % +*.2f %%)' %(display_predictions_close_price + 3, predictions_close_price[i],display_predictions_updown + 3, predictions_updown[i], display_predictions_amp + 3, predictions_amp[i]))
print('免責聲明：LEADERG AI ZOO，預測結果僅供技術研究，不負任何責任。')

In [None]:
# save predictions to result.csv
dataset_prediction = pd.concat([dataset_org, predictions_org], axis=1)
dataset_prediction.to_csv(inference_output_filename, index=False)

In [None]:
# show results
plt_x_ori = dataset_org['date']
plt_x = plt_x_ori[days:]
plt_x = pd.to_datetime(plt_x, format='%Y-%m-%d')
plt_x_length = plt_x.shape[0]

last_day_weekday_name = plt_x[int(plt_x_length)-1 + days].day_name()

last_day_weekday = 0

if (last_day_weekday_name == 'Monday'):
    last_day_weekday = 1
elif (last_day_weekday_name == 'Tuesday'):
    last_day_weekday = 2
elif (last_day_weekday_name == 'Wednesday'):
    last_day_weekday = 3
elif (last_day_weekday_name == 'Thursday'):
    last_day_weekday = 4
elif (last_day_weekday_name == 'Friday'):
    last_day_weekday = 5
elif (last_day_weekday_name == 'Saturday'):
    last_day_weekday = 6

i = 0
while (i < days):
    after_days = i + 1
    if (last_day_weekday + after_days > 5 and after_days <= 5):
        after_days = after_days + 2
    elif (after_days > 5):
        if (after_days >= 7):
            after_days = after_days + 2 * int(after_days / 7);
        after_days = after_days + 2
        
    delta = datetime.timedelta(days=after_days)
    #print(plt_x[int(plt_x_length)-1 + days] + delta)
    plt_x.loc[int(plt_x_length) + days + i] = plt_x[int(plt_x_length)-1 + days] + delta
    i = i + 1

plt.subplots(figsize=(12,8))

if (days != 0):
    line_1 = plt.plot(plt_x[0:-days], Y_org[0:-days], 'b', label='True')
else:
    line_1 = plt.plot(plt_x, Y_org, 'b', label='True')
line_2 = plt.plot(plt_x, predictions_org.iloc[:,-1], 'r--', label='Predicted')
plt.ylabel('Predicted Value')
#plt.xlabel('date')
plt.legend()
plt.show()  