In [3]:
# 載入需要的套件
import os
import sys

import pandas as pd
import yfinance as yf

# utils_folder_path = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
# sys.path.append(utils_folder_path)
current_notebook_dir = os.getcwd()
project_root_path = os.path.dirname(os.path.dirname(current_notebook_dir))
sys.path.append(project_root_path)

# 載入 Chapter2/utils/ 資料夾中的 alphas191.py 模組
import Chapter2.utils.alphas as alphas # noqa: E402
import Chapter2.utils.alphas191 as alphas191 # noqa: E402

In [4]:
"""
備註:
在計算 Alpha 因子時，許多指標會依賴過去數天的歷史數據，
因此如果只選取所需的日期範圍，可能會導致早期的 Alpha 因子無法正確計算．
為了解決這個問題，建議再選取資料時擴大日期範圍，
這樣可以確保計算 Alpha 因子時有足夠的歷史數據可用。
計算完所有因子後，再篩選出需要分析的時間段資料即可。
"""

# 使用 yfinance 取得台泥 (1101.TW) 股價資料，日期範圍為 2021-11-01 到 2022-12-31
data = (pd.DataFrame(yf.download("1101.TW", start="2021-11-01", end="2022-12-31"))
        .droplevel("Ticker", axis=1)
        .reset_index()
        .ffill()
)


  data = (pd.DataFrame(yf.download("1101.TW", start="2021-11-01", end="2022-12-31"))
[*********************100%***********************]  1 of 1 completed


In [6]:
print(data)

Price       Date      Close       High        Low       Open    Volume
0     2021-11-01  39.332577  39.332577  39.210300  39.332577   9347376
1     2021-11-02  39.251057  39.740167  39.210299  39.454852  10881205
2     2021-11-03  39.251057  39.332576  39.169538  39.251057   7075200
3     2021-11-04  39.210304  39.414100  39.169543  39.291820   7817757
4     2021-11-05  39.251057  39.251057  39.006503  39.169538  15485091
..           ...        ...        ...        ...        ...       ...
285   2022-12-26  31.681591  31.727639  31.405295  31.635539  12280553
286   2022-12-27  31.589491  31.957883  31.589491  31.819735  11350887
287   2022-12-28  31.589491  31.635539  31.267151  31.451347  13791324
288   2022-12-29  31.036907  31.543444  31.036907  31.543444  13374728
289   2022-12-30  30.990858  31.313198  30.990858  31.221102   8488022

[290 rows x 6 columns]


In [15]:
data.columns.name = None
# 重新命名資料表的欄位名稱，以符合 Alpha191 模組要求
data = data.rename(
    columns={
        "Close": "close", # 收盤價
        "Open": "open", # 開盤價
        "Volume": "volume", # 交易量
        "Low": "low", # 最低價
        "High": "high", # 最高價
    }
)

In [16]:
print(data)

          Date      close       high        low       open    volume  \
0   2021-11-01  39.332577  39.332577  39.210300  39.332577   9347376   
1   2021-11-02  39.251057  39.740167  39.210299  39.454852  10881205   
2   2021-11-03  39.251057  39.332576  39.169538  39.251057   7075200   
3   2021-11-04  39.210304  39.414100  39.169543  39.291820   7817757   
4   2021-11-05  39.251057  39.251057  39.006503  39.169538  15485091   
..         ...        ...        ...        ...        ...       ...   
285 2022-12-26  31.681591  31.727639  31.405295  31.635539  12280553   
286 2022-12-27  31.589491  31.957883  31.589491  31.819735  11350887   
287 2022-12-28  31.589491  31.635539  31.267151  31.451347  13791324   
288 2022-12-29  31.036907  31.543444  31.036907  31.543444  13374728   
289 2022-12-30  30.990858  31.313198  30.990858  31.221102   8488022   

     benchmark_open  benchmark_close  
0         11.376665        11.142770  
1         11.133414        11.049212  
2         11.04921

In [17]:
# 加入基準資料 (0050.TW) 的資料，用於 Alpha191 模組比較分析
benchmark_data = (
    pd.DataFrame(yf.download("0050.tw", start="2016-01-01", end="2019-12-31"))
    .droplevel("Ticker", axis=1)
    .reset_index()
    .ffill()
)

  pd.DataFrame(yf.download("0050.tw", start="2016-01-01", end="2019-12-31"))
[*********************100%***********************]  1 of 1 completed


In [18]:

benchmark_data.columns

Index(['Date', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object', name='Price')

In [19]:
benchmark_data.columns.name = None
# 將基準資料的開盤價與收盤價加入到 data 中
data["benchmark_open"] = benchmark_data["Open"]
data["benchmark_close"] = benchmark_data["Close"]
# 填補資料中的缺失值
data = data.ffill().dropna()


In [20]:
# 初始化 Alpha191 類別，並傳入台泥的股票資料
alpha_1101 = alphas191.Alphas191(data)
# 取得所有 Alpha 方法的列表
alpha_methods = alphas.Alphas.get_alpha_methods(alphas191.Alphas191)
alpha_dict = {} # 儲存成功執行的 Alpha 因子結果
error_method = [] # 儲存執行失敗的 Alpha 方法名稱
success_mthod = [] # 儲存執行成功的 Alpha 方法名稱

In [32]:
# 逐一執行所有 Alpha 方案，並記錄執行成功或失敗的情況
for method in alpha_methods:
    try:
        # 執行每個 Alpha 方法，並將結果存入 DataFrame
        df = getattr(alpha_1101, method)()
        # 根據產生的欄位數量，為結果設定新的欄位名稱
        new_columns = [f"{method}_{i+1}" for i in range(int(df.shape[1]))]
        df.columns = new_columns
        # 將結果儲存到 alpha_dict 中
        alpha_dict[method] = df
        # 將成功的 Alpha 方法名稱加入 success_method 列表
        success_mthod.append(method)
    except Exception as e:
        # 如果執行失敗，將失敗的 Alpha 方法名稱加入 error_method 列表，並顯示錯誤訊息
        error_method.append(method)
        print(f"Error in method {method}: {e}")

# 計算執行失敗的 Alpha 方法數量
print(f"error method len: {len(error_method)}")

print(f"success_mthod len: {len(success_mthod)}")

# 將所有成功執行的 Alpha 方法的結果合併成一個 DataFrame
# 每個 column 代表一個 Alpha 因子
alpha_data = pd.concat(alpha_dict.values(), axis=1)

error method len: 0
success_mthod len: 528


In [25]:
type(alpha_1101)

Chapter2.utils.alphas191.Alphas191

In [28]:
df = getattr(alpha_1101, method)()

In [31]:
# 設定三個條件來篩選 Alpha 因子：

# 條件ㄧ：保留遺失值比例小於 10% 的因子
# 計算每個因子遺失值的比例
missing_ratios = alpha_data.isnull().mean()
# 只保留遺失值比例小於 10% 的因子
keeping_columns = missing_ratios[missing_ratios < 0.1].index
print(f"剩下 {len(keeping_columns)} 個因子")

剩下 108 個因子


In [33]:
# 條件二：保留 0 值比例小於 10% 的因子
# 計算每個因子中 0 值的比例
zero_ratios = (alpha_data == 0).mean()
# 只保留 0 值比例小於 10% 的因子
keeping_columns = [col for col in keeping_columns if zero_ratios[col] < 0.1]
print(f"剩下 {len(keeping_columns)} 個因子")

剩下 88 個因子


In [34]:
# 條件三：只保留浮點數型別的因子
# 檢查每個因子是否為浮點數型別，並只保留是浮點數型別的因子
keeping_columns = [
    col for col in keeping_columns if pd.api.types.is_float_dtype(alpha_data[col])
]
print(f"剩下 {len(keeping_columns)} 個因子")
print(f"剩下的因子名稱: {keeping_columns}")

剩下 88 個因子
剩下的因子名稱: ['alpha004_1', 'alpha010_1', 'alpha014_1', 'alpha016_1', 'alpha016_2', 'alpha016_3', 'alpha016_4', 'alpha016_5', 'alpha018_1', 'alpha019_1', 'alpha020_1', 'alpha021_1', 'alpha022_1', 'alpha023_1', 'alpha024_1', 'alpha027_1', 'alpha031_1', 'alpha032_1', 'alpha032_2', 'alpha034_1', 'alpha035_1', 'alpha036_1', 'alpha036_2', 'alpha036_3', 'alpha036_4', 'alpha036_5', 'alpha039_1', 'alpha046_1', 'alpha053_1', 'alpha058_1', 'alpha063_1', 'alpha064_1', 'alpha064_5', 'alpha065_1', 'alpha066_1', 'alpha067_1', 'alpha071_1', 'alpha074_1', 'alpha074_2', 'alpha074_3', 'alpha074_4', 'alpha074_5', 'alpha079_1', 'alpha080_1', 'alpha081_1', 'alpha086_1', 'alpha088_1', 'alpha089_1', 'alpha090_1', 'alpha090_2', 'alpha090_3', 'alpha090_4', 'alpha090_5', 'alpha097_1', 'alpha098_1', 'alpha100_1', 'alpha102_1', 'alpha106_1', 'alpha108_2', 'alpha112_1', 'alpha115_2', 'alpha115_4', 'alpha116_1', 'alpha122_1', 'alpha127_1', 'alpha129_1', 'alpha130_2', 'alpha130_3', 'alpha130_5', 'alpha135_1', 

In [35]:
# 根據篩選條件保留符合條件的 Alpha 因子
alpha_data = alpha_data[keeping_columns].ffill().dropna()


In [39]:
alpha_data

Unnamed: 0,alpha004_1,alpha010_1,alpha014_1,alpha016_1,alpha016_2,alpha016_3,alpha016_4,alpha016_5,alpha018_1,alpha019_1,...,alpha157_1,alpha160_1,alpha167_1,alpha168_1,alpha169_1,alpha173_1,alpha174_1,alpha179_3,alpha179_5,alpha189_1
28,1.0,1.0,0.896698,-0.2,-0.2,-0.2,-0.2,-0.2,1.023758,0.023207,...,2.0,0.132588,1.630363,-0.611023,-0.033493,41.873174,0.167204,0.1,0.1,0.391740
29,1.0,1.0,0.733662,-0.2,-0.2,-0.2,-0.2,-0.2,1.019396,0.019027,...,4.0,0.159647,1.630363,-0.655386,-0.032856,41.940139,0.158844,0.1,0.1,0.366831
30,1.0,1.0,-0.163033,-0.2,-0.2,-0.2,-0.2,-0.2,0.995772,-0.004228,...,3.0,0.183450,1.630363,-0.526089,-0.031816,41.926909,0.150901,0.1,0.1,0.404193
31,1.0,1.0,-0.448349,-0.2,-0.2,-0.2,-0.2,-0.2,0.988433,-0.011567,...,2.0,0.203564,1.630363,-0.642366,-0.030302,41.886461,0.143356,0.1,0.1,0.344187
32,1.0,1.0,-0.611385,-0.2,-0.2,-0.2,-0.2,-0.2,0.984277,-0.015723,...,4.0,0.219994,1.630363,-0.771400,-0.027697,41.842418,0.136189,0.1,0.1,0.267198
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285,1.0,1.0,0.506540,-0.2,-0.2,-0.2,-0.2,-0.2,1.016248,0.015988,...,2.0,0.359398,2.624777,-0.352646,0.003972,35.264155,0.394764,0.1,0.1,0.168845
286,1.0,1.0,0.552584,-0.2,-0.2,-0.2,-0.2,-0.2,1.017804,0.017493,...,6.0,0.359314,2.164289,-0.331075,0.003390,35.306307,0.375025,0.1,0.1,0.189311
287,1.0,1.0,0.230240,-0.2,-0.2,-0.2,-0.2,-0.2,1.007342,0.007288,...,4.0,0.359841,2.026146,-0.421711,0.002827,35.330074,0.356274,0.1,0.1,0.177799
288,1.0,1.0,-0.690733,-0.2,-0.2,-0.2,-0.2,-0.2,0.978229,-0.021771,...,3.0,0.360297,2.026146,-0.425121,0.002080,35.111195,0.338460,0.1,0.1,0.227685


In [40]:
print(alpha_data)

     alpha004_1  alpha010_1  alpha014_1  alpha016_1  alpha016_2  alpha016_3  \
28          1.0         1.0    0.896698        -0.2        -0.2        -0.2   
29          1.0         1.0    0.733662        -0.2        -0.2        -0.2   
30          1.0         1.0   -0.163033        -0.2        -0.2        -0.2   
31          1.0         1.0   -0.448349        -0.2        -0.2        -0.2   
32          1.0         1.0   -0.611385        -0.2        -0.2        -0.2   
..          ...         ...         ...         ...         ...         ...   
285         1.0         1.0    0.506540        -0.2        -0.2        -0.2   
286         1.0         1.0    0.552584        -0.2        -0.2        -0.2   
287         1.0         1.0    0.230240        -0.2        -0.2        -0.2   
288         1.0         1.0   -0.690733        -0.2        -0.2        -0.2   
289         1.0         1.0   -0.368393        -0.2        -0.2        -0.2   

     alpha016_4  alpha016_5  alpha018_1  alpha019_1