In [1]:
import pandas as pd
import numpy as np

import scipy.stats as stats
from scipy.stats import kendalltau
from scipy.spatial.distance import pdist, squareform

from sklearn.linear_model import ElasticNet, LogisticRegression
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from math import sqrt
import matplotlib.pyplot as plt
from pyEDM import *

import warnings
warnings.filterwarnings('ignore')

### 一、Input 資料

1.1.載入資料

In [2]:
data = pd.read_csv('/Users/yitsung/git/MastersThesis/data/TaiwanStockData_Top100_EMA')

1.2.取出ticker資料

In [3]:
def get_ticker_data(data, ticker):
    ticker_data = data[data['ticker']==ticker].reset_index(drop=True)
    ticker_data = ticker_data.drop(columns=['ticker'])

    return ticker_data

In [4]:
ticker_data = get_ticker_data(data=data, ticker=2330)
ticker_data

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14
0,2021-01-04,530.0,540.0,528.0,536.0,39490.0,454.0,12463.0,-33.0,865.0,2342.0,6.0443,521.295251,518.980386,513.251221,5.729165,3.933239,84.477581
1,2021-01-05,536.0,542.0,535.0,542.0,34839.0,-355.0,2884.0,179.0,-451.0,-1374.0,5.3592,525.437881,522.532126,515.535238,6.996887,4.619674,88.417310
2,2021-01-06,555.0,555.0,541.0,549.0,55614.0,-256.0,5355.0,105.0,-4163.0,1.0,6.9696,530.151835,526.614084,518.179719,8.434365,5.454306,91.005801
3,2021-01-07,554.0,570.0,553.0,565.0,53393.0,2200.0,1671.0,-75.0,2060.0,-402.0,8.7664,537.123278,532.531850,521.861371,10.670478,6.574521,93.325963
4,2021-01-08,580.0,580.0,571.0,580.0,62957.0,-502.0,3278.0,187.0,1176.0,-5041.0,9.0658,545.700404,539.847445,526.412277,13.435169,8.026473,94.939847
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
704,2023-11-27,573.0,577.0,568.0,568.0,20322.0,-112.0,-2153.0,59.0,-56.0,-3554.0,4.1507,573.659169,571.840469,562.945683,8.894786,9.086560,81.069290
705,2023-11-28,565.0,576.0,565.0,575.0,26932.0,478.0,3323.0,-98.0,687.0,-416.0,5.1624,573.927335,572.326550,563.838595,8.487955,8.966839,76.500832
706,2023-11-29,578.0,579.0,570.0,574.0,27787.0,357.0,-180.0,55.0,-553.0,-2383.0,4.8624,573.941868,572.584004,564.591292,7.992712,8.772014,71.301362
707,2023-11-30,576.0,577.0,570.0,577.0,54365.0,-32.0,4730.0,-68.0,-770.0,-155.0,7.5527,574.553494,573.263388,565.510455,7.752933,8.568197,68.146342


1.3.製作Tp天後漲跌feature(bs)

In [5]:
def make_action_df(ticker_data, Tp): # 只能用一次, 不然頭會一直被去掉

    ticker_data['bs'] = ticker_data['close'].shift(Tp)
    ticker_data['bs'] = ticker_data['close'] - ticker_data['bs']
    ticker_data = ticker_data.dropna().reset_index(drop=True)
    ticker_data['bs'] = (ticker_data['bs'] >= 0).astype(int) # 選擇只看是0還是1就好

    return ticker_data

In [6]:
ticker_data = make_action_df(ticker_data=ticker_data, Tp=1)
ticker_data

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,bs
0,2021-01-05,536.0,542.0,535.0,542.0,34839.0,-355.0,2884.0,179.0,-451.0,-1374.0,5.3592,525.437881,522.532126,515.535238,6.996887,4.619674,88.417310,1
1,2021-01-06,555.0,555.0,541.0,549.0,55614.0,-256.0,5355.0,105.0,-4163.0,1.0,6.9696,530.151835,526.614084,518.179719,8.434365,5.454306,91.005801,1
2,2021-01-07,554.0,570.0,553.0,565.0,53393.0,2200.0,1671.0,-75.0,2060.0,-402.0,8.7664,537.123278,532.531850,521.861371,10.670478,6.574521,93.325963,1
3,2021-01-08,580.0,580.0,571.0,580.0,62957.0,-502.0,3278.0,187.0,1176.0,-5041.0,9.0658,545.700404,539.847445,526.412277,13.435169,8.026473,94.939847,1
4,2021-01-11,577.0,584.0,574.0,584.0,52772.0,681.0,-7717.0,351.0,-433.0,-10385.0,9.2596,553.361596,546.650220,530.901193,15.749027,9.642037,95.791563,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
703,2023-11-27,573.0,577.0,568.0,568.0,20322.0,-112.0,-2153.0,59.0,-56.0,-3554.0,4.1507,573.659169,571.840469,562.945683,8.894786,9.086560,81.069290,0
704,2023-11-28,565.0,576.0,565.0,575.0,26932.0,478.0,3323.0,-98.0,687.0,-416.0,5.1624,573.927335,572.326550,563.838595,8.487955,8.966839,76.500832,1
705,2023-11-29,578.0,579.0,570.0,574.0,27787.0,357.0,-180.0,55.0,-553.0,-2383.0,4.8624,573.941868,572.584004,564.591292,7.992712,8.772014,71.301362,0
706,2023-11-30,576.0,577.0,570.0,577.0,54365.0,-32.0,4730.0,-68.0,-770.0,-155.0,7.5527,574.553494,573.263388,565.510455,7.752933,8.568197,68.146342,1


1.4.轉換成EMA+return data

In [7]:
def feature_engineering(ticker_data): # 只能用一次, 不然頭會一直被去掉

    # 取differ
    feature_to_differ = ['financing', 'fi', 'ii']
    ticker_data[feature_to_differ] = ticker_data[feature_to_differ].diff()

    # origi_data
    origi_data = ticker_data.copy() # 用來之後還原答案的

    # 把價格變成報酬率
    ticker_data['open'] = (ticker_data['open'] - ticker_data['open'].shift(1)) / ticker_data['open'].shift(1)
    ticker_data['high'] = (ticker_data['high'] - ticker_data['high'].shift(1)) / ticker_data['high'].shift(1)
    ticker_data['low'] = (ticker_data['low'] - ticker_data['low'].shift(1)) / ticker_data['low'].shift(1)
    ticker_data['close'] = (ticker_data['close'] - ticker_data['close'].shift(1)) / ticker_data['close'].shift(1)

    ticker_data.replace([float('inf'), -float('inf')], 0, inplace=True) # 不知道為何有些調整過後會變inf, 要拿掉(應該是連兩天的價格都相同)
    ticker_data = ticker_data.dropna().reset_index(drop=True)

    return ticker_data, origi_data

In [8]:
ticker_data, origi_data = feature_engineering(ticker_data=ticker_data)
ticker_data

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,bs
0,2021-01-06,0.035448,0.023985,0.011215,0.012915,55614.0,99.0,2471.0,-74.0,-4163.0,1.0,6.9696,530.151835,526.614084,518.179719,8.434365,5.454306,91.005801,1
1,2021-01-07,-0.001802,0.027027,0.022181,0.029144,53393.0,2456.0,-3684.0,-180.0,2060.0,-402.0,8.7664,537.123278,532.531850,521.861371,10.670478,6.574521,93.325963,1
2,2021-01-08,0.046931,0.017544,0.032550,0.026549,62957.0,-2702.0,1607.0,262.0,1176.0,-5041.0,9.0658,545.700404,539.847445,526.412277,13.435169,8.026473,94.939847,1
3,2021-01-11,-0.005172,0.006897,0.005254,0.006897,52772.0,1183.0,-10995.0,164.0,-433.0,-10385.0,9.2596,553.361596,546.650220,530.901193,15.749027,9.642037,95.791563,1
4,2021-01-12,0.010399,0.022260,0.013937,0.011986,52605.0,57.0,-1908.0,1813.0,766.0,-9002.0,8.2691,560.890277,553.481824,535.567763,17.914060,11.356774,96.364177,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
702,2023-11-27,-0.006932,-0.001730,-0.010453,-0.012174,20322.0,-355.0,-1299.0,-11.0,-56.0,-3554.0,4.1507,573.659169,571.840469,562.945683,8.894786,9.086560,81.069290,0
703,2023-11-28,-0.013962,-0.001733,-0.005282,0.012324,26932.0,590.0,5476.0,-157.0,687.0,-416.0,5.1624,573.927335,572.326550,563.838595,8.487955,8.966839,76.500832,1
704,2023-11-29,0.023009,0.005208,0.008850,-0.001739,27787.0,-121.0,-3503.0,153.0,-553.0,-2383.0,4.8624,573.941868,572.584004,564.591292,7.992712,8.772014,71.301362,0
705,2023-11-30,-0.003460,-0.003454,0.000000,0.005226,54365.0,-389.0,4910.0,-123.0,-770.0,-155.0,7.5527,574.553494,573.263388,565.510455,7.752933,8.568197,68.146342,1


1.5.切Library和Prediction

In [9]:
def splite_Lib_Pred(ticker_data, start_date, end_date):
    
    Library = ticker_data[ticker_data['Date'] < start_date]
    Prediction = ticker_data[(ticker_data['Date'] >= start_date)&(ticker_data['Date'] <= end_date)]
    
    return Library, Prediction

In [10]:
ticker_Library, ticker_Prediction = splite_Lib_Pred(ticker_data=ticker_data, 
                                      start_date='2023-06-30', end_date='2023-11-29')
ticker_Prediction

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,bs
600,2023-06-30,-0.013841,-0.006897,-0.003509,0.005236,33831.0,89.0,-1218.0,38858.0,649.0,-1545.0,6.1965,576.126478,575.383895,565.777341,9.606554,12.496515,34.306232,1
601,2023-07-03,0.014035,0.006944,0.014085,0.005208,15118.0,-20.0,4926.0,-39042.0,1401.0,582.0,2.5477,576.701182,575.940219,566.756798,9.183421,11.833896,33.311646,1
602,2023-07-04,0.012111,0.008621,0.006944,0.010363,17777.0,-13.0,2452.0,-1043.0,-13.0,1767.0,2.7068,578.360946,577.334031,568.108146,9.225886,11.312294,30.627646,1
603,2023-07-05,0.006838,0.006838,-0.001724,-0.005128,15554.0,-134.0,-5695.0,845.0,-1092.0,-790.0,2.6473,579.088757,578.051873,569.137172,8.914701,10.832775,17.707207,0
604,2023-07-06,-0.027165,-0.025467,-0.024180,-0.029210,32070.0,613.0,-15586.0,-71.0,-603.0,-14045.0,5.1447,576.271005,576.043892,568.830715,7.213177,10.108856,7.697299,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,2023-11-23,-0.003472,-0.001727,0.000000,0.001733,15144.0,-360.0,6706.0,225.0,-218.0,93.0,3.0366,575.092451,572.091233,561.544644,10.546589,8.918779,95.718908,1
701,2023-11-24,0.005226,0.000000,0.000000,-0.005190,12503.0,70.0,-4594.0,323.0,-118.0,-2263.0,2.8318,575.073961,572.538736,562.541337,9.997398,9.134503,90.744592,0
702,2023-11-27,-0.006932,-0.001730,-0.010453,-0.012174,20322.0,-355.0,-1299.0,-11.0,-56.0,-3554.0,4.1507,573.659169,571.840469,562.945683,8.894786,9.086560,81.069290,0
703,2023-11-28,-0.013962,-0.001733,-0.005282,0.012324,26932.0,590.0,5476.0,-157.0,687.0,-416.0,5.1624,573.927335,572.326550,563.838595,8.487955,8.966839,76.500832,1


1.6.標準化(Minmax)

In [11]:
def make_data_minmax(Library, Prediction):

    feature_to_standardize = Library.columns.to_list()
    feature_to_standardize.remove(Library.columns[0])  # 排除Date
    feature_to_standardize.remove(Library.columns[4])  # 排除close

    # 處理target以外的feature
    scaler_X = MinMaxScaler() 
    Library[feature_to_standardize] = scaler_X.fit_transform(Library[feature_to_standardize])
    Prediction[feature_to_standardize] = scaler_X.fit_transform(Prediction[feature_to_standardize])

    # 處理target feature
    scaler_y = MinMaxScaler() 
    Library['close'] = scaler_y.fit_transform(Library['close'].values.reshape(-1, 1))
    Prediction['close'] = scaler_y.fit_transform(Prediction['close'].values.reshape(-1, 1))

    return Library, Prediction, scaler_y

In [12]:
ticker_Library, ticker_Prediction, _ = make_data_minmax(Library=ticker_Library, Prediction=ticker_Prediction) # scaler_y用不到
ticker_Prediction

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,bs
600,2023-06-30,0.265573,0.376204,0.489769,0.553064,0.467544,0.530453,0.617580,1.000000,0.609983,0.465376,0.565818,0.899545,0.902715,0.769223,0.925248,1.000000,0.343062,1.0
601,2023-07-03,0.624216,0.555372,0.752285,0.552667,0.109647,0.502082,0.731688,0.000000,0.726554,0.525940,0.093628,0.910096,0.913484,0.793724,0.903223,0.965650,0.333116,1.0
602,2023-07-04,0.599458,0.577071,0.645745,0.627585,0.160502,0.503904,0.685740,0.487792,0.507363,0.559681,0.114217,0.940569,0.940465,0.827527,0.905433,0.938610,0.306276,1.0
603,2023-07-05,0.531616,0.553989,0.516398,0.402427,0.117986,0.472410,0.534433,0.512028,0.340102,0.486874,0.106517,0.953932,0.954360,0.853267,0.889235,0.913751,0.177072,0.0
604,2023-07-06,0.094152,0.135816,0.181333,0.052407,0.433864,0.666840,0.350735,0.500270,0.415905,0.109453,0.429705,0.902198,0.915491,0.845601,0.800664,0.876223,0.076973,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,2023-11-23,0.398972,0.443121,0.542125,0.502155,0.110144,0.413587,0.764746,0.504069,0.475585,0.512016,0.156896,0.880560,0.838978,0.663346,0.974181,0.814529,0.957189,1.0
701,2023-11-24,0.510887,0.465478,0.542125,0.401524,0.059634,0.525508,0.554881,0.505327,0.491087,0.444932,0.130393,0.880220,0.847641,0.688277,0.945593,0.825712,0.907446,0.0
702,2023-11-27,0.354455,0.443082,0.386153,0.300018,0.209176,0.414888,0.616076,0.501040,0.500698,0.408172,0.301072,0.854245,0.834124,0.698392,0.888198,0.823227,0.810693,0.0
703,2023-11-28,0.264019,0.443044,0.463315,0.656092,0.335597,0.660854,0.741903,0.499166,0.615874,0.497523,0.431995,0.859169,0.843534,0.720727,0.867021,0.817020,0.765008,1.0


1.7.整併預測DataFrame

In [13]:
def concate_pred_data(Library, Prediction, th): # th=-1就是Library

    if th < 0:
        Lib_Pred_df = Library
    
    else:
        row_to_add = Prediction.iloc[th]
        Lib_Pred_df = pd.concat([Library, row_to_add.to_frame().T], ignore_index=True)
        
    # 這種concate方法會有非數值問題, 要這樣修正
    Lib_Pred_df[Library.columns.to_list()[1:]] = Lib_Pred_df[Library.columns.to_list()[1:]].apply(pd.to_numeric, errors='coerce')
    Lib_Pred_df['Date'] = pd.to_datetime(Lib_Pred_df['Date'])

    return Lib_Pred_df

In [14]:
ticker_Lib_Pred_df = concate_pred_data(Library=ticker_Library, Prediction=ticker_Prediction, th=0)
ticker_Lib_Pred_df.tail()

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,bs
596,2023-06-26,0.471196,0.420345,0.518317,0.427449,0.156368,0.542843,0.569898,0.512422,0.321104,0.445355,0.202976,0.707717,0.709761,0.673675,0.650284,0.744644,0.508022,0.0
597,2023-06-27,0.470564,0.462043,0.50731,0.4788,0.103541,0.499237,0.591838,0.484297,0.332064,0.468425,0.13816,0.702469,0.707008,0.676538,0.62489,0.733508,0.453906,0.0
598,2023-06-28,0.624932,0.536797,0.582472,0.52066,0.076768,0.523773,0.592271,0.52521,0.324855,0.459063,0.127218,0.699748,0.705846,0.679815,0.605314,0.720099,0.410194,1.0
599,2023-06-29,0.52175,0.504614,0.550096,0.489247,0.07222,0.547038,0.60276,0.504931,0.385161,0.466145,0.115186,0.696833,0.704279,0.682536,0.586186,0.704976,0.34888,0.0
600,2023-06-30,0.265573,0.376204,0.489769,0.553064,0.467544,0.530453,0.61758,1.0,0.609983,0.465376,0.565818,0.899545,0.902715,0.769223,0.925248,1.0,0.343062,1.0


1.8.整併整個市場的股票作為Library

In [15]:
def make_market_Library(data, ticker, ticker_Prediction):

    ### 取得市場中的所有股號 ###
    unique_ticker = list(data['ticker'].unique())

    ### 指定target ticker到unique_ticker list最後面 ###
    ticker_to_move_to_end = ticker # 指定某個值為最後一個值
    index = unique_ticker.index(ticker_to_move_to_end) # 找到值在列表中的索引
    unique_ticker.pop(index) # 移除該值
    unique_ticker.append(ticker_to_move_to_end) # 將該值添加到列表的末尾

    ### 整併Library dataframe ###
    market_Library = pd.DataFrame()
    for ticker in unique_ticker:

        try:
            ticker_data = get_ticker_data(data=data, ticker=ticker)
            ticker_data = make_action_df(ticker_data=ticker_data, Tp=1)
            ticker_data, _ = feature_engineering(ticker_data=ticker_data)
            ticker_Library, _ = splite_Lib_Pred(ticker_data=ticker_data, 
                                                start_date='2023-06-30', end_date='2023-11-29')
            ticker_Library, _, _ = make_data_minmax(Library=ticker_Library, Prediction=ticker_Prediction)
            market_Library = pd.concat([market_Library, ticker_Library], axis=0, ignore_index=True)

        except:
            print(f'error: {ticker}')
            continue
    
    return market_Library

In [16]:
market_Library = make_market_Library(data=data, ticker=2330, ticker_Prediction=ticker_Prediction)
market_Library

error: 6901
error: 6526
error: 6805


Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,bs
0,2021-01-06,0.883479,0.648888,0.685527,0.466327,0.682684,0.461470,0.618896,0.479112,0.000000,0.476265,0.578114,0.000000,0.000000,0.000000,0.680521,0.568372,0.998365,1.0
1,2021-01-07,0.501491,0.349103,0.853684,0.529250,0.338767,0.829826,0.521466,0.477592,0.449523,0.565319,0.375923,0.067790,0.059066,0.039550,0.730728,0.608184,0.998673,1.0
2,2021-01-08,0.530974,0.495370,0.649973,0.464528,0.341625,0.337680,0.536903,0.484231,0.376709,0.493261,0.321902,0.128611,0.114003,0.078384,0.772301,0.649175,0.998852,1.0
3,2021-01-11,0.413038,0.378328,0.649819,0.371297,0.197543,0.767289,0.354483,0.501715,0.384698,0.340030,0.230500,0.173966,0.157966,0.112754,0.797326,0.687180,0.998938,0.0
4,2021-01-12,0.382874,0.290919,0.390959,0.184983,0.313063,0.361589,0.675353,0.500076,0.046860,0.415979,0.306938,0.187163,0.177677,0.135435,0.790356,0.715287,0.978456,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
518336,2023-06-21,0.562445,0.493985,0.550245,0.479123,0.112593,0.539919,0.574944,0.505976,0.337279,0.468185,0.132847,0.712430,0.711635,0.669907,0.675360,0.751267,0.625757,0.0
518337,2023-06-26,0.471196,0.420345,0.518317,0.427449,0.156368,0.542843,0.569898,0.512422,0.321104,0.445355,0.202976,0.707717,0.709761,0.673675,0.650284,0.744644,0.508022,0.0
518338,2023-06-27,0.470564,0.462043,0.507310,0.478800,0.103541,0.499237,0.591838,0.484297,0.332064,0.468425,0.138160,0.702469,0.707008,0.676538,0.624890,0.733508,0.453906,0.0
518339,2023-06-28,0.624932,0.536797,0.582472,0.520660,0.076768,0.523773,0.592271,0.525210,0.324855,0.459063,0.127218,0.699748,0.705846,0.679815,0.605314,0.720099,0.410194,1.0


1.9.將market_Library整併預測DataFrame

In [17]:
market_Lib_Pred_df = concate_pred_data(Library=market_Library, Prediction=ticker_Prediction, th=0)
market_Lib_Pred_df['Date'] = range(len(market_Lib_Pred_df))
market_Lib_Pred_df

Unnamed: 0,Date,open,high,low,close,volume,financing,fi,ii,di,rp,capital,EMA9,EMA12,EMA26,MACD,Signal,RSI14,bs
0,0,0.883479,0.648888,0.685527,0.466327,0.682684,0.461470,0.618896,0.479112,0.000000,0.476265,0.578114,0.000000,0.000000,0.000000,0.680521,0.568372,0.998365,1.0
1,1,0.501491,0.349103,0.853684,0.529250,0.338767,0.829826,0.521466,0.477592,0.449523,0.565319,0.375923,0.067790,0.059066,0.039550,0.730728,0.608184,0.998673,1.0
2,2,0.530974,0.495370,0.649973,0.464528,0.341625,0.337680,0.536903,0.484231,0.376709,0.493261,0.321902,0.128611,0.114003,0.078384,0.772301,0.649175,0.998852,1.0
3,3,0.413038,0.378328,0.649819,0.371297,0.197543,0.767289,0.354483,0.501715,0.384698,0.340030,0.230500,0.173966,0.157966,0.112754,0.797326,0.687180,0.998938,0.0
4,4,0.382874,0.290919,0.390959,0.184983,0.313063,0.361589,0.675353,0.500076,0.046860,0.415979,0.306938,0.187163,0.177677,0.135435,0.790356,0.715287,0.978456,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
518337,518337,0.471196,0.420345,0.518317,0.427449,0.156368,0.542843,0.569898,0.512422,0.321104,0.445355,0.202976,0.707717,0.709761,0.673675,0.650284,0.744644,0.508022,0.0
518338,518338,0.470564,0.462043,0.507310,0.478800,0.103541,0.499237,0.591838,0.484297,0.332064,0.468425,0.138160,0.702469,0.707008,0.676538,0.624890,0.733508,0.453906,0.0
518339,518339,0.624932,0.536797,0.582472,0.520660,0.076768,0.523773,0.592271,0.525210,0.324855,0.459063,0.127218,0.699748,0.705846,0.679815,0.605314,0.720099,0.410194,1.0
518340,518340,0.521750,0.504614,0.550096,0.489247,0.072220,0.547038,0.602760,0.504931,0.385161,0.466145,0.115186,0.696833,0.704279,0.682536,0.586186,0.704976,0.348880,0.0


### 二、MDRSmap演算法 (前期)

2.1. 製作可餵入EDM格式的train_feature

In [18]:
def find_train_target_feature(data, target):

    df_columns = list(data.columns)
    train_feature = df_columns.copy()
    train_feature.remove('Date') # 先拿掉日期
    
    formatted_columns = ' '.join(df_columns[1:]) # 變成可以餵給 EDM function 參數 'columns' 的形式 
    train_feature.remove(target) # 再拿掉 target_feature

    return formatted_columns, train_feature

In [19]:
# formatted_columns, train_feature = find_train_target_feature(data=ticker_Library, target='bs')
# formatted_columns

2.2. 找出target_feature最佳嵌入維度

In [20]:
def find_target_OED(data, target):

    target_OED = EmbedDimension(dataFrame=data, lib=f'1 {len(data)}', pred=f'{len(data)-21} {len(data)-1}', columns=target, showPlot=False) # 4. lib訓練全部, pred看最後20筆

    target_OED_rho = target_OED['rho'].max()
    target_OED = int(target_OED['E'][target_OED['rho'] == target_OED['rho'].max()].iloc[0])

    return target_OED, target_OED_rho


In [21]:
# target_OED, target_OED_rho = find_target_OED(data=ticker_Library, target='bs')
# print(f'target_OED: {target_OED}, target_OED_rho: {target_OED_rho}')

2.3. 找出所有有因果關係的train_feature

In [22]:
def find_rho_sig_df(data, ticker, target, target_OED, train_feature, E_max):
    
    crirho = stats.t.ppf(0.95, len(data) - 1) / (len(data) - 2 + stats.t.ppf(0.95, len(data) - 1) ** 2)
    ccm_libSizes = f'{target_OED+10} {len(data)-10} 10'
    # ccm_libSizes = list(range(10, len(data) + 1, 10)) + [len(data)]  # sequence of library size # original

    rho_sig_df = pd.DataFrame(columns=train_feature)
    for train in train_feature:

        ### 找出該train_feature最好的ccm_OED ###
        ccm_E_termRHO = pd.DataFrame(columns=['E', 'term_rho'])
        """
        這裡假設用 term_rho 來選 ccm_OED
        """
        for e in range(1, E_max+1):
            ccm_result = CCM(dataFrame=data, E=e, columns=train, target=target,
                            libSizes=ccm_libSizes, random=False, showPlot=False)
            # print(e, ccm_result[f'{target}:open'].iloc[-1]) # 有時候會有warning, 測試用
            new_data = {'E': e, 'term_rho': ccm_result[f'{target}:{train}'].iloc[-1]}
            ccm_E_termRHO.loc[len(ccm_E_termRHO)] = new_data

        max_term_rho_index = ccm_E_termRHO['term_rho'].idxmax()
        ccm_OED = ccm_E_termRHO.at[max_term_rho_index, 'E']

        ### 用最好的ccm_OED來做該feature的因果檢定 ###
        ccm_result = CCM(dataFrame=data, E=ccm_OED, columns=train, target=target, 
                        libSizes=ccm_libSizes, random=False, showPlot=False)
        """
        這裡假設用 target:train 、 LibSize 來做 kendalltau 檢定
        """
        ccm_result = ccm_result[['LibSize', f'{target}:{train}']]
        ccm_result[f'{target}:{train}'][ccm_result[f'{target}:{train}'] < 0] = 0
        term_rho = ccm_result[f'{target}:{train}'].iloc[-1]

        tau, p_value = kendalltau(ccm_result['LibSize'], ccm_result[f'{target}:{train}']) # 進行 kendalltau 相關檢定

        alpha = 0.05
        if (p_value < alpha) and (term_rho > crirho): # 顯著相關
            rho_sig_df[train] = [term_rho]

        else: # "不" 顯著相關
            rho_sig_df[train] = [0]

    rho_sig_df.index = pd.Index([f'{ticker}_{target}']) 
    
    return rho_sig_df


In [23]:
# rho_sig_df = find_rho_sig_df(data=ticker_Library, ticker=2330, target='bs', target_OED=target_OED, train_feature=train_feature, E_max=10)
# rho_sig_df

2.4. 用有因果關係的train_feature建立Embed_df

In [24]:
def make_Embed_df(data, max_lag, target, rho_sig_df):

    #用有因果關係的 train feature + target feature 製作 Embed_df #
    non_zero_columns = rho_sig_df.loc[:, (rho_sig_df != 0).any(axis = 0)] # 選取值非0的column
    train_feature_ls = list(non_zero_columns.columns)
    formatted_columns = ' '.join(train_feature_ls) # 轉成 EDM column 的 input
    columns_to_lag = formatted_columns + f' {target}' # 加入 target 本身

    Embed_df = Embed(dataFrame=data, E=max_lag, tau=-1, columns=columns_to_lag) # 製作 Embed_df
    Embed_df['Date'] = data['Date'] # 加入Date來看index, 才可以防simplex func的bug
    Embed_df.dropna(inplace=True) # 把包含NaN的資料拿掉
    Embed_df = Embed_df.reset_index(drop=True)
    Embed_df = Embed_df[['Date'] + [col for col in Embed_df.columns if col != 'Date']]

    ML_df_date = Embed_df.copy()
    # ML_df_date['Date'] = pd.to_datetime(ML_df_date['Date']) # 將index設為日期
    ML_df_date.set_index('Date', inplace=True)
    ML_df_date = ML_df_date.filter(like="(t-0)") # 只留下(t-0)的column

    return Embed_df, ML_df_date

In [25]:
# Embed_df, ML_df_date = make_Embed_df(data=ticker_Lib_Pred_df, max_lag=10, target='bs', rho_sig_df=rho_sig_df)
# Embed_df.tail()

In [26]:
# ML_df_date.tail()

2.5. 用simplex randomsearch找出最佳的view

In [27]:
def make_random_simplex(Embed_df, target, target_OED, kmax, kn):    

    Embed_for_train = Embed_df.drop(columns='Date') # 先把 Date 拿掉
    Embed_for_train = Embed_for_train.drop(columns=f'{target}(t-0)') # 先把 target 拿掉
    train_f_ls = list(Embed_for_train.columns) # train_feature
    train_f_num = len(Embed_for_train.columns) # train_feature 的個數

    rho_feature_view = pd.DataFrame(columns=['rho']) # 創建一個df去紀錄每個隨機view的資料
    new_column = pd.DataFrame(columns=['feature_' + str(i) for i in range(1, target_OED+1)])
    rho_feature_view = pd.concat([rho_feature_view, new_column], axis=1)
    k = 1
    while k <= kmax:
        random_pick_train = np.random.choice(train_f_num, target_OED, replace=False)
        # print(random_pick_train)

        train_f_ls = np.array(train_f_ls) # 變成 array 才可以一次選
        select_train_f = train_f_ls[random_pick_train] # 隨機選到的 train_feature
        formatted_random_columns = ' '.join(select_train_f) # 用成符合 EDM 的資料格式
        # print(formatted_random_columns)

        simp = Simplex(dataFrame=Embed_df, E=target_OED, # ver3: 測試近10 or 20個交易日
                       lib=f'1 {len(Embed_df)}', pred = f'{len(Embed_df)-21} {len(Embed_df)-1}', 
                       columns=formatted_random_columns, target=f'{target}(t-0)',
                       embedded = True, showPlot = False) # 原本是False現在改True
        # print(simp)

        sub_simp = simp[['Observations', 'Predictions']] # 計算rho
        rho = sub_simp['Observations'].corr(sub_simp['Predictions'])

        rho_feature_view.loc[len(rho_feature_view), 'rho'] = rho # 將 view 更新到 rho_feature_view 的 df 中
        rho_feature_view.loc[len(rho_feature_view)-1, rho_feature_view.columns[1:]] = select_train_f
        # print(rho)
        k += 1

    allscore = rho_feature_view.sort_values(by='rho', ascending=False).head(kn)
    allscore = allscore.reset_index(drop=True)

    return allscore

In [28]:
# allscore = make_random_simplex(Embed_df=Embed_df, target='bs', target_OED=target_OED, kmax=10000, kn=5)
# allscore.head()

### 三、MDRSmap演算法 (後期)

3.1. 計算每個時點的(view加權)距離

In [29]:
def compute_view_w_distance(Embed_df, allscore):

    ww = allscore['rho'] / allscore['rho'].sum() # 每個view的權重

    dmatrix_ls = []
    for j in range(allscore.shape[0]):

        view_feature = allscore.iloc[j, 1:] # 選取第j個view的所有feature
        view_feature = np.array(view_feature) # 把所有feature變成array才可以從完整Embed_df中找資料
        view_feature_value = Embed_df[view_feature]
        view_matrix = view_feature_value.to_numpy() # 從df形式變array
        view_matrix = np.vstack(view_matrix) # 這樣才能疊成matrix

        Dx_t2 = pdist(view_matrix, metric='euclidean') * ww[j] # 計算加權距離
        Dx_t2 = squareform(Dx_t2) # 將距離變成squareform
        dmatrix_ls.append(Dx_t2)

    v_w_dmatrix = np.sum(dmatrix_ls, axis=0) # 輸出每個時點的view加權距離

    return v_w_dmatrix

In [30]:
# v_w_dmatrix = compute_view_w_distance(Embed_df=Embed_df, allscore=allscore)
# v_w_dmatrix

3.2.尋找elastic-net最佳參數

In [31]:
# ### test ###
# target = 'bs'
# Tp=1

# ML_df_date_new = ML_df_date.copy()
# ML_df_date_new[f'ans(t-0)'] = ML_df_date_new[f'{target}(t-0)'].shift(-Tp) # step.1: 先將target往前移Tp, 製作y
# # ML_df_date_new = ML_df_date_new.multiply(w_tp, axis=0) # step.2: 再將data乘上距離加權
# ML_df_date_new = ML_df_date_new[:-(Tp+1)] # step.3: 拿掉最後Tp+1個, 因為最後面的data是硬拼上去的
# ML_df_date_new

In [32]:
def find_MDRSmap_param(target, ML_df_date, theta_seq, v_w_dmatrix, Tp):

    result_ls = pd.DataFrame(columns=['Theta', 'Score', 'Param']) # 創建紀錄回測結果的dataframe

    ### 將原始資料乘上空間位置權數 ###
    tp = len(ML_df_date) -1
    tp_distence = v_w_dmatrix[tp] # 第tp個時點離其他時點的距離
    mask = np.ones(len(tp_distence), dtype=bool) # 遮蔽該時點計算平均數
    mask[tp] = False
    dpar = np.mean(tp_distence[mask]) # 第tp個時點離其他時點的平均數

    for theta in theta_seq:
        w_tp = np.exp(-theta * tp_distence / dpar) # 計算每個時點資料的加權
        w_tp = np.sqrt(w_tp)

        ### 加入答案列 ###
        ML_df_date_new = ML_df_date.copy()
        ML_df_date_new[f'ans(t-0)'] = ML_df_date_new[f'{target}(t-0)'].shift(-Tp) # step.1: 先將target往前移Tp, 製作y
        ML_df_date_new = ML_df_date_new.multiply(w_tp, axis=0) # step.2: 再將data乘上距離加權
        ML_df_date_new['ans(t-0)'] = ML_df_date_new['ans(t-0)'].apply(lambda x: 1.0 if x != 0 else x) # step.3: 把ans非0的部分變成1
        ML_df_date_new = ML_df_date_new[:-(Tp+1)] # step.4: 拿掉最後Tp+1個, 因為最後面的data是硬拼上去的
        # ML_df_date_new = ML_df_date_new.drop(columns=[f'{target}(t-0)']) # step.5: 原paper有刪target啦, 這邊可選擇刪或不刪
        # ML_df_date_new = ML_df_date_new.dropna().reset_index(drop=True) # 不確定要不要用

        ### 分拆train, validation(以近60天為基準) ###
        X = ML_df_date_new.iloc[:, :-1]
        y = ML_df_date_new.iloc[:, -1]
        val_fold = [-1] * (len(X)-60) + [0] * 60 # 最後60筆當validation set
        ps = PredefinedSplit(test_fold=val_fold)

        logistic_elastic_net = LogisticRegression(penalty='elasticnet', 
                                                  solver='saga', # 只有saga支持elasticnet
                                                  random_state=87)

        ### grid search ###
        param_grid = {'l1_ratio': [0.9, 0.1, 0.01, 0.001, 0.0001],
                      'C': [0.001, 0.01, 0.1, 1, 10, 100],
                      'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
                      'fit_intercept': [True], 
                      'intercept_scaling': [0.1],
                      'warm_start': [True]}
        grid_search = GridSearchCV(estimator=logistic_elastic_net, 
                                   param_grid=param_grid, 
                                   cv=ps, scoring='accuracy', 
                                   return_train_score=True)       

        grid_search.fit(X, y)

        ### 記錄結果 ###
        result_ls.loc[len(result_ls), 'Theta'] = theta
        result_ls.loc[len(result_ls)-1, 'Score'] = grid_search.best_score_
        result_ls.loc[len(result_ls)-1, 'Param'] = [grid_search.best_params_]

        theta = result_ls['Theta'][result_ls['Score'].idxmax()]
        param = result_ls['Param'][result_ls['Score'].idxmax()][0]

    return result_ls, theta, param

In [33]:
# result_ls, theta, param = find_MDRSmap_param(target='bs', 
#                                              ML_df_date=ML_df_date, 
#                                              theta_seq=[1,2,4,7,11,16,22], 
#                                              v_w_dmatrix=v_w_dmatrix,
#                                              Tp=1)

In [34]:
# result_ls['Param'][5]

3.3.用最佳參數訓練MDRSmap

In [35]:
def MDRSmap_model(target, ML_df_date, theta, v_w_dmatrix, param, Tp):

    ### 將原始資料乘上空間位置權數 ###
    tp = len(ML_df_date) -1
    tp_distence = v_w_dmatrix[tp] # 第tp個時點離其他時點的距離
    mask = np.ones(len(tp_distence), dtype=bool) # 遮蔽該時點計算平均數
    mask[tp] = False
    dpar = np.mean(tp_distence[mask]) # 第tp個時點離其他時點的平均數

    w_tp = np.exp(-theta * tp_distence / dpar) # 計算每個時點資料的加權
    w_tp = np.sqrt(w_tp)

    ### 加入答案列 ###
    ML_df_date_new = ML_df_date.copy()
    ML_df_date_new[f'ans(t-0)'] = ML_df_date_new[f'{target}(t-0)'].shift(-Tp) # step.1: 先將target往前移Tp, 製作y
    ML_df_date_new = ML_df_date_new.multiply(w_tp, axis=0) # step.2: 再將data乘上距離加權
    ML_df_date_new['ans(t-0)'] = ML_df_date_new['ans(t-0)'].apply(lambda x: 1.0 if x != 0 else x) # step.3: 把ans非0的部分變成1
    ML_df_date_new = ML_df_date_new[:-(Tp+1)] # step.4: 拿掉最後Tp+1個, 因為最後面的data是硬拼上去的
    # ML_df_date_new = ML_df_date_new.drop(columns=[f'{target}(t-0)']) # step.5: 原paper有刪target啦, 這邊可選擇刪或不刪
    # ML_df_date_new = ML_df_date_new.dropna().reset_index(drop=True) # 不確定要不要用

    ### 分拆train, validation(以近60天為基準) ###
    X = ML_df_date_new.iloc[:, :-1]
    y = ML_df_date_new.iloc[:, -1]

    logistic_elastic_net = LogisticRegression(penalty='elasticnet', 
                                              solver='saga', # 只有saga支持elasticnet
                                              random_state=87,
                                              **param)
                             

    logistic_elastic_net.fit(X, y)

    return logistic_elastic_net

In [36]:
# logistic_elastic_net = MDRSmap_model(target='bs', ML_df_date=ML_df_date, 
#                                      theta=theta, v_w_dmatrix=v_w_dmatrix, param=param, Tp=1)

3.4.進行預測

In [37]:
# X_pred = np.array(ML_df_date.iloc[-1]).reshape(1, -1)
# y_pred = logistic_elastic_net.predict(X_pred)
# y_pred = y_pred[0]
# y_pred

3.5.製作評估dataframe

In [38]:
# Date = origi_data['Date'][(origi_data['Date']>='2023-07-01')&(origi_data['Date']<='2023-11-30')].reset_index(drop=True)
# Today = origi_data['bs'][(origi_data['Date']>='2023-07-01')&(origi_data['Date']<='2023-11-30')].reset_index(drop=True)
# Yesterday = origi_data['bs'][(origi_data['Date']>='2023-06-30')&(origi_data['Date']<='2023-11-29')].reset_index(drop=True)

# MDRSmap_result = pd.DataFrame(Date)
# MDRSmap_result['Observations'] = Today
# MDRSmap_result['Predictions'] = None
# MDRSmap_result['Yesterday'] = Yesterday
# MDRSmap_result

In [39]:
# th=0
# MDRSmap_result.loc[th, 'Predictions'] = y_pred
# MDRSmap_result

In [40]:
# MDRSmap_result['Date'][th]

### 四、完整預測流程

In [47]:
### test ###
th = 0
ticker_data = get_ticker_data(data=data, ticker=2330)
ticker_data = make_action_df(ticker_data=ticker_data, Tp=1)
ticker_data, origi_data = feature_engineering(ticker_data=ticker_data)
ticker_Library, ticker_Prediction = splite_Lib_Pred(ticker_data=ticker_data, 
                                      start_date='2023-06-30', end_date='2023-11-29')
ticker_Library, ticker_Prediction, _ = make_data_minmax(Library=ticker_Library, Prediction=ticker_Prediction) 

ticker_Lib_Pred_df = concate_pred_data(Library=ticker_Library, Prediction=ticker_Prediction, th=th)
formatted_columns, train_feature = find_train_target_feature(data=ticker_Lib_Pred_df, target='bs')
ticker_target_OED, ticker_target_OED_rho = find_target_OED(data=ticker_Lib_Pred_df, target='bs')
ticker_rho_sig_df = find_rho_sig_df(data=ticker_Lib_Pred_df, ticker=2330, target='bs', 
                                    target_OED=ticker_target_OED, train_feature=train_feature, E_max=10)
ticker_Embed_df, ticker_ML_df_date = make_Embed_df(data=ticker_Lib_Pred_df, max_lag=10, target='bs', rho_sig_df=ticker_rho_sig_df)
ticker_allscore = make_random_simplex(Embed_df=ticker_Embed_df, target='bs', target_OED=ticker_target_OED, kmax=10000, kn=5)

### 這邊要concate所有股票的Library資料 + 一個Predict ###
market_Library = make_market_Library(data=data, ticker=2330, ticker_Prediction=ticker_Prediction)

### 實驗 ###
market_Lib_Pred_df = concate_pred_data(Library=market_Library, Prediction=ticker_Prediction, th=th)
market_Lib_Pred_df['Date'] = range(len(market_Lib_Pred_df)) 
### 實驗 ###

market_Embed_df, market_ML_df_date = make_Embed_df(data=market_Lib_Pred_df, max_lag=10, target='bs', rho_sig_df=ticker_rho_sig_df)

# ### 用market_Embed_df繼續後面的演算法 ###
market_v_w_dmatrix = compute_view_w_distance(Embed_df=market_Embed_df, allscore=ticker_allscore)
# result_ls, theta, param = find_MDRSmap_param(target='bs', 
#                                                 ML_df_date=market_ML_df_date, 
#                                                 theta_seq=[1,2,4,7,11,16,22], 
#                                                 v_w_dmatrix=market_v_w_dmatrix,
#                                                 Tp=1)
# logistic_elastic_net = MDRSmap_model(target='bs', ML_df_date=market_ML_df_date, 
#                                         theta=theta, v_w_dmatrix=market_v_w_dmatrix, param=param, Tp=1)

# ### 預測 ###
# X_pred = np.array(market_ML_df_date.iloc[-1]).reshape(1, -1)
# y_pred = logistic_elastic_net.predict(X_pred)
# y_pred = y_pred[0]

error: 6901
error: 6526
error: 6805


In [50]:
market_v_w_dmatrix = compute_view_w_distance(Embed_df=market_Embed_df, allscore=ticker_allscore)

: 

4.1.完整預測流程

In [46]:
### 一、Input資料 ###
data = pd.read_csv('/Users/yitsung/git/MastersThesis/data/TaiwanStockData_Top100_EMA')
ticker_data = get_ticker_data(data=data, ticker=2330)
ticker_data = make_action_df(ticker_data=ticker_data, Tp=1)
ticker_data, origi_data = feature_engineering(ticker_data=ticker_data)
ticker_Library, ticker_Prediction = splite_Lib_Pred(ticker_data=ticker_data, 
                                      start_date='2023-06-30', end_date='2023-11-29')
ticker_Library, ticker_Prediction, _ = make_data_minmax(Library=ticker_Library, Prediction=ticker_Prediction) 

### 製作評估dataframe ###
Date = origi_data['Date'][(origi_data['Date']>='2023-07-01')&(origi_data['Date']<='2023-11-30')].reset_index(drop=True)
Today = origi_data['bs'][(origi_data['Date']>='2023-07-01')&(origi_data['Date']<='2023-11-30')].reset_index(drop=True)
Yesterday = origi_data['bs'][(origi_data['Date']>='2023-06-30')&(origi_data['Date']<='2023-11-29')].reset_index(drop=True)

MDRSmap_result = pd.DataFrame(Date)
MDRSmap_result['Observations'] = Today
MDRSmap_result['Predictions'] = None
MDRSmap_result['Yesterday'] = Yesterday

### 開始進行MDRSmap演算法 ###
for th in range(0, len(ticker_Prediction)):

    if th == 0: ### th = 0, 要找view ###
        ticker_Lib_Pred_df = concate_pred_data(Library=ticker_Library, Prediction=ticker_Prediction, th=th)
        formatted_columns, train_feature = find_train_target_feature(data=ticker_Lib_Pred_df, target='bs')
        ticker_target_OED, ticker_target_OED_rho = find_target_OED(data=ticker_Lib_Pred_df, target='bs')
        ticker_rho_sig_df = find_rho_sig_df(data=ticker_Lib_Pred_df, ticker=2330, target='bs', 
                                            target_OED=ticker_target_OED, train_feature=train_feature, E_max=10)
        ticker_Embed_df, ticker_ML_df_date = make_Embed_df(data=ticker_Lib_Pred_df, max_lag=10, target='bs', rho_sig_df=ticker_rho_sig_df)
        ticker_allscore = make_random_simplex(Embed_df=ticker_Embed_df, target='bs', target_OED=ticker_target_OED, kmax=10000, kn=5)

        ### 這邊要concate所有股票的Library資料 + 一個Predict ###
        market_Library = make_market_Library(data=data, ticker=2330, ticker_Prediction=ticker_Prediction)
        market_Lib_Pred_df = concate_pred_data(Library=market_Library, Prediction=ticker_Prediction, th=th)
        market_Embed_df, market_ML_df_date = make_Embed_df(data=market_Lib_Pred_df, max_lag=10, target='bs', rho_sig_df=ticker_rho_sig_df)

        ### 用market_Embed_df繼續後面的演算法 ###
        market_v_w_dmatrix = compute_view_w_distance(Embed_df=market_Embed_df, allscore=ticker_allscore)
        result_ls, theta, param = find_MDRSmap_param(target='bs', 
                                                     ML_df_date=market_ML_df_date, 
                                                     theta_seq=[1,2,4,7,11,16,22], 
                                                     v_w_dmatrix=market_v_w_dmatrix,
                                                     Tp=1)
        logistic_elastic_net = MDRSmap_model(target='bs', ML_df_date=market_ML_df_date, 
                                             theta=theta, v_w_dmatrix=market_v_w_dmatrix, param=param, Tp=1)
        
        ### 預測 ###
        X_pred = np.array(market_ML_df_date.iloc[-1]).reshape(1, -1)
        y_pred = logistic_elastic_net.predict(X_pred)
        y_pred = y_pred[0]

        ### 將結果併入評估dataframe ###
        MDRSmap_result.loc[th, 'Predictions'] = y_pred
        print(f"{MDRSmap_result['Date'][th]}: finished")
    
    else: ### th > 0, 僅算距離+預測 ###
        market_Lib_Pred_df = concate_pred_data(Library=market_Library, Prediction=ticker_Prediction, th=th)
        market_Embed_df, market_ML_df_date = make_Embed_df(data=market_Lib_Pred_df, max_lag=10, target='bs', rho_sig_df=ticker_rho_sig_df)
        market_v_w_dmatrix = compute_view_w_distance(Embed_df=market_Embed_df, allscore=ticker_allscore)
        result_ls, theta, param = find_MDRSmap_param(target='bs', 
                                                     ML_df_date=market_ML_df_date, 
                                                     theta_seq=[1,2,4,7,11,16,22], 
                                                     v_w_dmatrix=market_v_w_dmatrix,
                                                     Tp=1)
        logistic_elastic_net = MDRSmap_model(target='bs', ML_df_date=market_ML_df_date, 
                                             theta=theta, v_w_dmatrix=market_v_w_dmatrix, param=param, Tp=1)
        
        ### 預測 ###
        X_pred = np.array(market_ML_df_date.iloc[-1]).reshape(1, -1)
        y_pred = logistic_elastic_net.predict(X_pred)
        y_pred = y_pred[0]

        ### 將結果併入評估dataframe ###
        MDRSmap_result.loc[th, 'Predictions'] = y_pred
        print(f"{MDRSmap_result['Date'][th]}: finished")

KeyboardInterrupt: 

4.3.評估結果

In [None]:
MDRSmap_result.head(60)

In [None]:
ACC = len(MDRSmap_result[MDRSmap_result['Predictions'] == MDRSmap_result['Observations']]) / len(MDRSmap_result['Observations'])
print('ACC: ', ACC)