In [None]:
# 引入相關套件與函式庫
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt  
import seaborn as sns   
import statsmodels.api as sm  
import pymysql
import datetime
from statsmodels.graphics import tsaplots 
from tensorflow.keras.optimizers import Adam 
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, BatchNormalization, TimeDistributed, Flatten, Bidirectional,Dropout
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.regularizers import l2
from time import time
from sklearn import preprocessing
from joblib import dump, load
from datetime import timedelta
from attention import Attention
from keras_self_attention import SeqSelfAttention
from tensorflow.keras.utils import plot_model
import os
from PyEMD import CEEMDAN
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from math import sqrt
import time
import yfinance as yf
from datetime import datetime
import math
%matplotlib inline

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# 抓新資料少量分解更新子訊號

In [None]:
# 取一段時間的金融資料，傳給several_CEEMDAN()
def take_latest_several_data(end_date,timerange):
    end_date = datetime.strptime(end_date, '%Y-%m-%d').date()
    start_date = end_date - timedelta(days=timerange)
    financial_new_data = yf.download("^TWII", start=start_date, end=end_date)
    financial_new_data = financial_new_data.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1)
    financial_new_data = financial_new_data.sort_index(ascending=False)
    financial_new_data['Close'] = financial_new_data['Close'].replace('', np.nan)
    financial_new_data['Close'] = financial_new_data['Close'].astype(float) 
    financial_new_data_df = financial_new_data
    financial_new_data = financial_new_data['Close'].to_list()
    return financial_new_data_df,financial_new_data

In [None]:
# 利用一段時間的金融資料做少量ceemdan分解，回傳分解結果(幾個imf對應幾個資料點)
def several_CEEMDAN(end_date,assume_shortest_decompose_length,timerange,imf_num):
    financial_new_data_df,financial_new_data = take_latest_several_data(end_date,timerange)
    ceemdan = CEEMDAN()
    start = time.time()
    for i in range(0,len(financial_new_data),5):
        decompose_start = time.time()
        ceemdan.ceemdan(np.array(financial_new_data[:assume_shortest_decompose_length+i]).ravel(),max_imf=imf_num)
        new_imfs, new_res = ceemdan.get_imfs_and_residue()
        if len(new_imfs) >= imf_num+1:
            decompose_end = time.time()
            decompose_time = decompose_end - decompose_start
            print("分解時間:%f 秒" % decompose_time)
            break
    end = time.time()
    select_and_decompose_time = end - start
    print("date:",end_date,"挑選數量+分解執行時間：%f 秒" % select_and_decompose_time)
    several_CEEMDAN_plot(new_imfs)
    return new_imfs, decompose_time, select_and_decompose_time

In [None]:
# 畫少量分解完的結果，傳給several_CEEMDAN()
def several_CEEMDAN_plot(data_to_draw):
    IImfs=[]
    for i in range(data_to_draw.shape[0]):
        plt.subplot(data_to_draw.shape[0]+3,1,i+2)
        plt.plot(data_to_draw[i])
        plt.ylabel("IMF %i" %(i+1))
        plt.locator_params(axis='x', nbins=10)
        IImfs.append(data_to_draw[i])

In [None]:
# 用分解後的結果更新資料庫中對應的imf資料表
def New_imf_to_Database(end_date,assume_shortest_decompose_length,timerange,imf_num): 
    
    data_to_update, decompose_time, select_and_decompose_time = several_CEEMDAN(end_date,assume_shortest_decompose_length,timerange,imf_num)
    financial_new_data_df,financial_new_data = take_latest_several_data(end_date,timerange)
    for i in range(len(data_to_update)):
        imf_number = i+1
        imf_df = pd.DataFrame(data_to_update[i])
        imf_df.set_index(financial_new_data_df[:len(data_to_update[i])].index, inplace=True)
        imf_df = imf_df.rename(columns={0: 'Close_^TWII_IMF'})
        imf_df = imf_df.reset_index()
        connection = pymysql.connect(host = '' ,  user = '' ,  passwd = "", db='')
        cursor = connection.cursor()
        data = imf_df.values.tolist()
        insert_query = '''
        REPLACE INTO imf{} (`Date`, `Close_^TWII_IMF{}`)
        VALUES (%s, %s)
        '''.format(imf_number,imf_number)
        cursor.executemany(insert_query, data)
        connection.commit()
        connection.close()
    print("更新資料表")
    return data_to_update, decompose_time, select_and_decompose_time

# 增量預測

In [None]:
def createSlidingWindow(dataset,windowSize):
    x_train = []
    y_train = []
    for i in range(len(dataset)-windowSize):
        x_train.append(dataset[i:i+windowSize])
        y_train.append(dataset[i+windowSize])
    return np.array(x_train), np.array(y_train)

In [None]:
def predict_financial(begin_date, table_name, model_structure, imf_num, model_path):
    window_size = 30
    feature_len = 1
    # 日期型態的轉換
    today_object = datetime.strptime(begin_date, '%Y-%m-%d')
    today_object_string = today_object.strftime("%Y-%m-%d")
    end_date = today_object_string
    # 從資料庫抓取資料
    try: 
        conn  =  pymysql.connect ( host = '' ,  user = '' ,  passwd = "" ,  db = '' ) 
        cur  =  conn.cursor() 
        select_sql = '''SELECT `Close_^TWII_IMF{}`, `Date`
                        FROM `{}`
                        WHERE `Date` < '{}'
                        ORDER BY `Date` DESC LIMIT {} '''.format(imf_num, table_name, end_date, window_size + 1)
        result_object = cur.execute(select_sql)
        results_values_list = cur.fetchall()
        result_key_list = [i[0] for i in cur.description]
        reframed = pd.DataFrame(results_values_list)
        reframed.columns = result_key_list
        timestamp = pd.to_datetime(reframed.datetime, infer_datetime_format=True).values
        reframed['timestamp'] = timestamp.tolist()
        reframed = reframed.set_index('Date')       
        cur.close () 
        conn.close()
    except Exception as e:
        print(e)
    # 將從資料庫抓取的資料進行處理以進行後續預測
    reframed = reframed.rename(columns={'Close_^TWII_IMF{}'.format(imf_num) : 'Close_^TWII'})
    reframed['Close_^TWII'] = reframed['Close_^TWII'].replace('', np.nan)
    reframed['Close_^TWII'] = reframed['Close_^TWII'].astype(float)   
    scaler = MinMaxScaler(feature_range=(0,1))
    test = np.array(reframed['Close_^TWII'][-(window_size+1):])
    test = test[::-1]
    test_scaled_data = scaler.fit_transform(test.reshape(-1,1))
    print(len(test_scaled_data))
    x_test,y_test = createSlidingWindow(test_scaled_data,window_size)
    # 載入先前預訓練好的模型進行預測並更新模型
    vanilla_model = load_model(model_path)
    test_predict = vanilla_model.predict(x_test)
    predict_Close = scaler.inverse_transform(test_predict)
    vanilla_model.compile(loss='mse', optimizer='Adam') 
    vanilla_model_history = vanilla_model.fit(x_test,y_test,batch_size=60,epochs=100)
    vanilla_model.save(model_path) 
    del vanilla_model
    print(predict_Close[0][0])
    return predict_Close[0][0]

In [None]:
# 取日期範圍
def datelist(start_date,end_date):
    date_list = []
    start_date = datetime.strptime(start_date, '%Y-%m-%d')  
    end_date = datetime.strptime(end_date, '%Y-%m-%d')    
    current_date = start_date
    while current_date <= end_date:
        date_list.append(current_date.strftime('%Y-%m-%d'))
        current_date += timedelta(days=1)
    return date_list

In [None]:
# 第一個半年
start_day_1 = '2020-12-26'
end_day_1 = '2021-07-02'
date_range_list_1 = datelist(start_day_1,end_day_1)

In [None]:
# 第二個半年
start_day_2 = '2021-07-06'
end_day_2 = '2021-12-24'
date_range_list_2 = datelist(start_day_2,end_day_2)

In [None]:
# 第二個半年(兩個月)
start_day_2_2_month = '2021-10-11'
end_day_2_2_month = '2021-12-24'
date_range_list_2_2_month = datelist(start_day_2_2_month,end_day_2_2_month)

In [None]:
# 第三個半年
start_day_3 = '2021-12-28'
end_day_3 = '2022-06-30'
date_range_list_3 = datelist(start_day_3,end_day_3)

In [None]:
# 第四個半年
start_day_4 = '2022-07-01'
end_day_4 = '2023-01-01'
date_range_list_4 = datelist(start_day_4,end_day_4)

# 執行預測

In [None]:
# 預測的時段
def predict_date_range(date_range_list):
    predict_list = []
    execute_time_list = []
    all_time_start = time.time()
    all_decompose_time = 0
    all_select_and_decompose_time = 0
    original_len = 15
    for i in range(len(date_range_list)):
        start = time.time()
        data_to_update, decompose_time, select_and_decompose_time = New_imf_to_Database(date_range_list[i],original_len,
                                                                                        500,2)
        imf1_result = predict_financial('{}'.format(date_range_list[i]), 'imf1', 'vanilla', 1, 
                                        './model/simple-LSTM-model-result(3imf)-1-30.h5')
        imf2_result = predict_financial('{}'.format(date_range_list[i]), 'imf2', 'vanilla', 2, 
                                        './model/simple-LSTM-model-result(3imf)-2-30.h5')
        imf3_result = predict_financial('{}'.format(date_range_list[i]), 'imf3', 'vanilla', 3, 
                                        './model/simple-LSTM-model-result(3imf)-3-30.h5')
        imf_predict_sum = imf1_result + imf2_result + imf3_result
        predict_list.append(imf_predict_sum)
        end = time.time()
        execute_time_list.append(end - start)
        print(date_range_list[i],imf_predict_sum,"一天的分解+預測執行時間：%f 秒" % (end - start))
        if i == 150 or i == 300 or i == 450:
            time.sleep(1)
        all_decompose_time += decompose_time
        all_select_and_decompose_time += select_and_decompose_time
    all_time_end = time.time()
    print("全部執行時間：%f 秒" % (all_time_end - all_time_start),"全部挑選分解時間：",all_select_and_decompose_time,
          "資料點數量：",len(date_range_list), "總分解時間：",all_decompose_time)
    print("平均一個回合：",(all_time_end - all_time_start)/len(date_range_list))
    return predict_list, execute_time_list

In [None]:
# 3 imf
predict_list_1, execute_time_list_1 = predict_date_range(date_range_list_1)

In [None]:
# 3 imf
predict_list_2, execute_time_list_2 = predict_date_range(date_range_list_2)

In [None]:
# 3 imf
predict_list_3, execute_time_list_3 = predict_date_range(date_range_list_3)

In [None]:
# 3 imf
predict_list_3_test, execute_time_list_3_test = predict_date_range(date_range_list_3)

In [None]:
predict_list_few_data, execute_time_list_few_data = predict_date_range(date_range_list_few_data)

In [None]:
# 3 imf
predict_list_4, execute_time_list_4 = predict_date_range(date_range_list_4)

In [None]:
predict_list_4, execute_time_list_4 = predict_date_range(date_range_list_4)

# 比較實際值與預測結果

In [None]:
# 取真實金融資料的dataframe
def get_true_value(start_day,end_day):
    conn  =  pymysql.connect ( host = '' ,  user = '' ,  passwd = "" ,  db = '' ) 
    cur  =  conn.cursor() 
    select_sql = '''SELECT `Close_^TWII`, `Date`
                    FROM `financial_data` 
                    WHERE `Date` >= '{}' AND `Date` <= '{}' '''.format(start_day, end_day)
    result_object = cur.execute(select_sql)
    results_values_list = cur.fetchall()
    result_key_list = [i[0] for i in cur.description]
    true_day_value = pd.DataFrame(results_values_list)
    true_day_value.columns = result_key_list
    true_day_value = true_day_value.set_index('Date')
    cur.close () 
    conn.close()

    true_day_value['Close_^TWII'] = true_day_value['Close_^TWII'].replace('', np.nan)
    true_day_value['Close_^TWII'] = true_day_value['Close_^TWII'].astype(float)  
    return true_day_value
    # datelist = true_day_value.index

In [None]:
# 將真實資料與預測值合併成一個dataframe
def concat_true_and_predict_value(start_day, end_day, predict_list,execute_list):
    true_day_value = get_true_value(start_day,end_day)
    predict_datelist = datelist(start_day,end_day)
    predict_df = pd.DataFrame({'Date': predict_datelist, 'predict': predict_list})
    execute_time_df = pd.DataFrame({'Date': predict_datelist, 'execute_time': execute_list})
    predict_df = predict_df.set_index('Date')
    execute_time_df = execute_time_df.set_index('Date')
    merged_df = pd.concat([true_day_value, predict_df], axis=1)
    merged_df = pd.concat([merged_df, execute_time_df], axis=1)
    merged_df = merged_df.dropna()
    return merged_df

In [None]:
concat_df_1= concat_true_and_predict_value(start_day_1,end_day_1,predict_list_1,execute_time_list_1)
concat_df_1

In [None]:
plt.figure(figsize=(10 ,5))
plt.plot(concat_df_1['Close_^TWII'].to_list())
plt.plot(concat_df_1['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
# plt.plot(concat_df_1['accumulative_error'].to_list())
# plt.ylim([14000,19000])
plt.show()

LSTM_R2 = r2_score(concat_df_1['Close_^TWII'],concat_df_1['predict'])
LSTM_MSE = mean_squared_error(concat_df_1['Close_^TWII'],concat_df_1['predict'])
LSTM_MAE = mean_absolute_error(concat_df_1['Close_^TWII'],concat_df_1['predict'])
k = concat_df_1['Close_^TWII']
r = concat_df_1['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
concat_df_1['error']=abs(concat_df_1['Close_^TWII']-concat_df_1['predict'])
concat_df_1['accumulative_error']=concat_df_1['error'].cumsum()
concat_df_1

In [None]:
plt.figure(figsize=(10 ,5))
plt.plot(concat_df_1.index, concat_df_1['accumulative_error'].to_list())
x_ticks_positions = range(0, len(concat_df_1.index), 15)
x_ticks_labels = concat_df_1.index[::15]
plt.xticks(x_ticks_positions, x_ticks_labels, rotation=45)
plt.show()

In [None]:
concat_df_2= concat_true_and_predict_value(start_day_2,end_day_2,predict_list_2,execute_time_list_2)
concat_df_2

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_2['Close_^TWII'].to_list())
plt.plot(concat_df_2['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
plt.ylim([16000,18500])
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_2['Close_^TWII'],concat_df_2['predict'])
LSTM_MSE = mean_squared_error(concat_df_2['Close_^TWII'],concat_df_2['predict'])
LSTM_MAE = mean_absolute_error(concat_df_2['Close_^TWII'],concat_df_2['predict'])
k = concat_df_2['Close_^TWII']
r = concat_df_2['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
concat_df_2['error']=abs(concat_df_2['Close_^TWII']-concat_df_2['predict'])
concat_df_2['accumulative_error']=concat_df_2['error'].cumsum()
plt.figure(figsize=(10 ,5))
plt.plot(concat_df_2.index, concat_df_2['accumulative_error'].to_list())
x_ticks_positions = range(0, len(concat_df_2.index), 15)
x_ticks_labels = concat_df_2.index[::15]
plt.xticks(x_ticks_positions, x_ticks_labels, rotation=45)
plt.show()

In [None]:
concat_df_3= concat_true_and_predict_value(start_day_3,end_day_3,predict_list_3,execute_time_list_3)
concat_df_3

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_3['Close_^TWII'].to_list())
plt.plot(concat_df_3['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
# plt.ylim([16000,18500])
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_3['Close_^TWII'],concat_df_3['predict'])
LSTM_MSE = mean_squared_error(concat_df_3['Close_^TWII'],concat_df_3['predict'])
LSTM_MAE = mean_absolute_error(concat_df_3['Close_^TWII'],concat_df_3['predict'])
k = concat_df_3['Close_^TWII']
r = concat_df_3['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
concat_df_3['error']=abs(concat_df_3['Close_^TWII']-concat_df_3['predict'])
concat_df_3['accumulative_error']=concat_df_3['error'].cumsum()
plt.figure(figsize=(10 ,5))
plt.plot(concat_df_3.index, concat_df_3['accumulative_error'].to_list())
x_ticks_positions = range(0, len(concat_df_3.index), 15)
x_ticks_labels = concat_df_3.index[::15]
plt.xticks(x_ticks_positions, x_ticks_labels, rotation=45)
plt.show()

In [None]:
concat_df_s1= concat_true_and_predict_value(start_day_1,end_day_1,predict_list_s1,execute_time_list_s1)
concat_df_s1

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_s1['Close_^TWII'].to_list())
plt.plot(concat_df_s1['predict'].to_list(),c='violet')

# plt.plot(concat_df_1['Close_^TWII'].to_list())
plt.plot(concat_df_1['predict'].to_list(),c='y')

plt.legend(['data', 'CEEMDAN_prediction','CEEMDAN_prediction(incremental)'], loc='upper right')
plt.ylim([14000,19000])
# plt.xlim([40,100])
plt.show()

LSTM_R2 = r2_score(concat_df_s1['Close_^TWII'],concat_df_s1['predict'])
LSTM_MSE = mean_squared_error(concat_df_s1['Close_^TWII'],concat_df_s1['predict'])
LSTM_MAE = mean_absolute_error(concat_df_s1['Close_^TWII'],concat_df_s1['predict'])
k = concat_df_s1['Close_^TWII']
r = concat_df_s1['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
concat_df_s2= concat_true_and_predict_value(start_day_2,end_day_2,predict_list_s2,execute_time_list_s2)
concat_df_s2

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_s2['Close_^TWII'].to_list())
plt.plot(concat_df_s2['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
plt.ylim([16000,18500])
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_s2['Close_^TWII'],concat_df_s2['predict'])
LSTM_MSE = mean_squared_error(concat_df_s2['Close_^TWII'],concat_df_s2['predict'])
LSTM_MAE = mean_absolute_error(concat_df_s2['Close_^TWII'],concat_df_s2['predict'])
k = concat_df_s2['Close_^TWII']
r = concat_df_s2['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
concat_df_s3= concat_true_and_predict_value(start_day_3,end_day_3,predict_list_s3,execute_time_list_s3)
concat_df_s3

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_s3['Close_^TWII'].to_list())
plt.plot(concat_df_s3['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
# plt.ylim([16000,18500])
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_s3['Close_^TWII'],concat_df_s3['predict'])
LSTM_MSE = mean_squared_error(concat_df_s3['Close_^TWII'],concat_df_s3['predict'])
LSTM_MAE = mean_absolute_error(concat_df_s3['Close_^TWII'],concat_df_s3['predict'])
k = concat_df_s3['Close_^TWII']
r = concat_df_s3['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
concat_df_t3= concat_true_and_predict_value(start_day_3,end_day_3,predict_list_3_test2,execute_time_list_3_test2)
concat_df_t3

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_t3['Close_^TWII'].to_list())
plt.plot(concat_df_t3['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
# plt.ylim([16000,18500])
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_t3['Close_^TWII'],concat_df_t3['predict'])
LSTM_MSE = mean_squared_error(concat_df_t3['Close_^TWII'],concat_df_t3['predict'])
LSTM_MAE = mean_absolute_error(concat_df_t3['Close_^TWII'],concat_df_t3['predict'])
k = concat_df_t3['Close_^TWII']
r = concat_df_t3['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
concat_df_1_zero = concat_true_and_predict_value(start_day_1,end_day_1,predict_list_1_zero,execute_time_list_1_zero)
concat_df_1_zero

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_1_zero['Close_^TWII'].to_list())
plt.plot(concat_df_1_zero['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
plt.ylim([14000,19000])
plt.show()

LSTM_R2 = r2_score(concat_df_1_zero['Close_^TWII'],concat_df_1_zero['predict'])
LSTM_MSE = mean_squared_error(concat_df_1_zero['Close_^TWII'],concat_df_1_zero['predict'])
LSTM_MAE = mean_absolute_error(concat_df_1_zero['Close_^TWII'],concat_df_1_zero['predict'])
k = concat_df_1_zero['Close_^TWII']
r = concat_df_1_zero['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
# concat_df_1_zero.to_csv('./incremental_predict_result_financial_(3_half_year)/IMF_LSTM_predict_20201228_20210702.csv')

In [None]:
concat_df_2_zero = concat_true_and_predict_value(start_day_2,end_day_2,predict_list_2_zero,execute_time_list_2_zero)
concat_df_2_zero

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_2_zero['Close_^TWII'].to_list())
plt.plot(concat_df_2_zero['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
plt.ylim([16000,18500])
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_2_zero['Close_^TWII'],concat_df_2_zero['predict'])
LSTM_MSE = mean_squared_error(concat_df_2_zero['Close_^TWII'],concat_df_2_zero['predict'])
LSTM_MAE = mean_absolute_error(concat_df_2_zero['Close_^TWII'],concat_df_2_zero['predict'])
k = concat_df_2_zero['Close_^TWII']
r = concat_df_2_zero['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
# concat_df_2_zero.to_csv('./incremental_predict_result_financial_(3_half_year)/IMF_LSTM_predict_20210706_20211224.csv')

In [None]:
concat_df_3_zero = concat_true_and_predict_value(start_day_3,end_day_3,predict_list_3_zero,execute_time_list_3_zero)
concat_df_3_zero

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_3_zero['Close_^TWII'].to_list())
plt.plot(concat_df_3_zero['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_3_zero['Close_^TWII'],concat_df_3_zero['predict'])
LSTM_MSE = mean_squared_error(concat_df_3_zero['Close_^TWII'],concat_df_3_zero['predict'])
LSTM_MAE = mean_absolute_error(concat_df_3_zero['Close_^TWII'],concat_df_3_zero['predict'])
k = concat_df_3_zero['Close_^TWII']
r = concat_df_3_zero['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
concat_df_5= concat_true_and_predict_value(start_day_5,end_day_5,predict_list_5,execute_time_list_5)

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_5['Close_^TWII'].to_list())
plt.plot(concat_df_5['predict'].to_list(),c='y')
plt.legend(['data', 'CEEMDAN_LSTM_prediction'], loc='upper right')
plt.ylim([14000,19000])
plt.show()

LSTM_R2 = r2_score(concat_df_5['Close_^TWII'],concat_df_5['predict'])
LSTM_MSE = mean_squared_error(concat_df_5['Close_^TWII'],concat_df_5['predict'])
LSTM_MAE = mean_absolute_error(concat_df_5['Close_^TWII'],concat_df_5['predict'])
k = concat_df_5['Close_^TWII']
r = concat_df_5['predict']
LSTM_MRE = np.mean(np.abs((k - r) / k)) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
# concat_df_3_zero.to_csv('./incremental_predict_result_financial_(3_half_year)/IMF_LSTM_predict_20211228_20220630.csv')

In [None]:
concat_df_1 = concat_true_and_predict_value(start_day_1,end_day_1,predict_list_1)
concat_df_1

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_1['Close_^TWII'].to_list())
plt.plot(concat_df_1['predict'].to_list(),c='g')
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_1['Close_^TWII'],concat_df_1['predict'])
LSTM_MSE = mean_squared_error(concat_df_1['Close_^TWII'],concat_df_1['predict'])
LSTM_MAE = mean_absolute_error(concat_df_1['Close_^TWII'],concat_df_1['predict'])
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))

In [None]:
concat_df_2 = concat_true_and_predict_value(start_day_2,end_day_2,predict_list_2)
concat_df_2

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_2['Close_^TWII'].to_list())
plt.plot(concat_df_2['predict'].to_list(),c='g')
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_2['Close_^TWII'],concat_df_2['predict'])
LSTM_MSE = mean_squared_error(concat_df_2['Close_^TWII'],concat_df_2['predict'])
LSTM_MAE = mean_absolute_error(concat_df_2['Close_^TWII'],concat_df_2['predict'])
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))

In [None]:
concat_df_3 = concat_true_and_predict_value(start_day_3,end_day_3,predict_list_3)
concat_df_3

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_3['Close_^TWII'].to_list())
plt.plot(concat_df_3['predict'].to_list(),c='g')
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_3['Close_^TWII'],concat_df_3['predict'])
LSTM_MSE = mean_squared_error(concat_df_3['Close_^TWII'],concat_df_3['predict'])
LSTM_MAE = mean_absolute_error(concat_df_3['Close_^TWII'],concat_df_3['predict'])
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))

In [None]:
concat_df_4 = concat_true_and_predict_value(start_day_4,end_day_4,predict_list_4)
concat_df_4

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_4['Close_^TWII'].to_list())
plt.plot(concat_df_4['predict'].to_list(),c='g')
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_4['Close_^TWII'],concat_df_4['predict'])
LSTM_MSE = mean_squared_error(concat_df_4['Close_^TWII'],concat_df_4['predict'])
LSTM_MAE = mean_absolute_error(concat_df_4['Close_^TWII'],concat_df_4['predict'])
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))

In [None]:
concat_df_8= concat_true_and_predict_value(start_day_8,end_day_8,predict_list_8)
concat_df_8

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_8['Close_^TWII'].to_list())
plt.plot(concat_df_8['predict'].to_list(),c='g')
# plt.xlim([144,244])
plt.show()

LSTM_R2 = r2_score(concat_df_8['Close_^TWII'],concat_df_8['predict'])
LSTM_MSE = mean_squared_error(concat_df_8['Close_^TWII'],concat_df_8['predict'])
LSTM_MAE = mean_absolute_error(concat_df_8['Close_^TWII'],concat_df_8['predict'])
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))

In [None]:
concat_df_2y_test= concat_true_and_predict_value(start_day_2y_test,end_day_2y_test,predict_list_2y_test,execute_time_list_2y_test)
len(concat_df_2y_test)

In [None]:
concat_df_2y_test.reset_index()

In [None]:

plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_2y_test['Close_^TWII'][0:114].to_list())
plt.plot(concat_df_2y_test['predict'][0:114].to_list(),c='y')
# plt.xlim([300,400])
plt.show()

# k = concat_df_2y_test['Close_^TWII'][0:114].to_list().flatten()
# r = concat_df_2y_test['predict'][0:114].to_list().flatten()

LSTM_R2 = r2_score(concat_df_2y_test['Close_^TWII'][0:114],concat_df_2y_test['predict'][0:114])
LSTM_MSE = mean_squared_error(concat_df_2y_test['Close_^TWII'][0:114],concat_df_2y_test['predict'][0:114])
LSTM_MAE = mean_absolute_error(concat_df_2y_test['Close_^TWII'][0:114],concat_df_2y_test['predict'][0:114])
LSTM_MRE = np.mean(np.abs((concat_df_2y_test['Close_^TWII'][0:114] - concat_df_2y_test['predict'][0:114]) / concat_df_2y_test['Close_^TWII'][0:114])) * 100
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))
print("LSTM_MRE=",LSTM_MRE)

In [None]:
plt.figure(figsize=(10 ,5))
# plt.ylim([13500,20000])
# plt.xlim([1,20000])
# plt.ylim([12000,19000])
plt.plot(concat_df_2y_test['Close_^TWII'][0:122].to_list())
plt.plot(concat_df_2y_test['predict'][0:122].to_list(),c='g')
# plt.xlim([300,400])
plt.show()

LSTM_R2 = r2_score(concat_df_2y_test['Close_^TWII'][0:122],concat_df_2y_test['predict'][0:122])
LSTM_MSE = mean_squared_error(concat_df_2y_test['Close_^TWII'][0:122],concat_df_2y_test['predict'][0:122])
LSTM_MAE = mean_absolute_error(concat_df_2y_test['Close_^TWII'][0:122],concat_df_2y_test['predict'][0:122])
print("LSTM_R2=",LSTM_R2)
print("LSTM_MSE=",LSTM_MSE)
print("LSTM_MAE=",LSTM_MAE)
print("LSTM_RMSE=",sqrt(LSTM_MSE))