In [28]:
# 載入必要套件
import pandas as pd
import numpy as np
import sys
import os
import gc
from loguru import logger

# 設定路徑以匯入專案模組
sys.path.append("../../../note")
sys.path.append(os.getcwd())

# 自動重載模組 (方便開發)
%load_ext autoreload
%autoreload 2

# 匯入自定義模組
from module.get_info_FinMind import FinMindClient
from module.get_info_Finlab import FinlabClient
from utils import batch_fetch_prices, run_event_study, process_disposal_events
from analyzer import DisposalAnalyzer

# 全域參數設定
OFFSET_DAYS = 5      # 事件前後擷取的緩衝交易日 (用於觀察 s-5 ~ e+5)
START_DATE = '2018-01-01' # 資料起始年份
DATA_DIR = '../../data/disposal' # 資料儲存路徑

# 確保目錄存在
os.makedirs(DATA_DIR, exist_ok=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Setup

## Data Preparation

### 抓取處置股名單 (Finlab)

In [29]:
finlab_client = FinlabClient()
print("Fetching disposal information from Finlab...")

# 抓取處置資訊
finlab_disposal = finlab_client.get_data("disposal_information", start_date=START_DATE)

# 篩選日期與前處理
if not finlab_disposal.empty:
    finlab_disposal['date'] = pd.to_datetime(finlab_disposal['date'])
    finlab_disposal = finlab_disposal[finlab_disposal['date'] >= START_DATE]
    
    print(f"Fetched {len(finlab_disposal):,} records.")
    
    # 執行前處理：計算連續處置等級 (Disposal Level)
    processed_disposal = process_disposal_events(finlab_disposal)
    
    # 儲存備份
    save_path = f'{DATA_DIR}/processed_disposal_events.csv'
    processed_disposal.to_csv(save_path, index=False, encoding='utf-8-sig')
    print(f"Saved processed events to: {save_path}")
else:
    print("[Error] No data fetched from Finlab.")
    processed_disposal = pd.DataFrame()

Fetching disposal information from Finlab...
Fetched 3,383 records.
Columns before processing: ['Stock_id', 'date', '證券名稱', 'condition', '處置措施', '處置內容', 'event_start_date', 'event_end_date', 'interval', 'key_date']
Saved processed events to: ../../data/disposal/processed_disposal_events.csv


### 抓取個股股價 (FinMind)

In [30]:
# 初始化 FinMind Client
fm_client = FinMindClient()

# 讀取已處理的事件表 (若 Kernal 重啟可直接從這裡開始)
if 'processed_disposal' not in locals():
    processed_disposal = pd.read_csv(f'{DATA_DIR}/processed_disposal_events.csv')

# 批次抓取股價 (平行運算)
if not processed_disposal.empty:
    print("Starting batch price fetch (this may take a while)...")
    price_df = batch_fetch_prices(fm_client, processed_disposal, offset_days=OFFSET_DAYS, max_workers=10)
    
    if not price_df.empty:
        save_path = f'{DATA_DIR}/price_df.csv'
        price_df.to_csv(save_path, index=False)
        print(f"Fetched {len(price_df):,} rows over {price_df['Stock_id'].nunique()} stocks.")
        print(f"Saved prices to: {save_path}")
    else:
        print("[Warning] No price data fetched.")
else:
    print("[Error] Processed disposal dataframe is empty.")

# 釋放記憶體
gc.collect()

Starting batch price fetch (this may take a while)...
Using pre-processed columns 'event_start_date' and 'event_end_date'.
Starting batch fetch for 1317 stocks with 10 workers...


Fetching Prices: 100%|██████████| 1317/1317 [00:35<00:00, 36.85it/s]


Fetched total 45255 rows.
Fetched 45,255 rows over 1042 stocks.
Saved prices to: ../../data/disposal/price_df.csv


5446

## Market Benchmark

In [35]:
print("Fetching TAIEX (Taiwan Stock Index) data...")
# Initialize if not already
if 'fm_client' not in locals(): fm_client = FinMindClient()

# 抓取大盤指數 (TAIEX)
try:
    taiex = fm_client.get_data(
        dataset="TaiwanStockPrice",
        data_id="TAIEX",
        start_date=START_DATE,
        end_date=pd.Timestamp.now().strftime('%Y-%m-%d')
    )
except KeyError as e:
    if 'data' in str(e):
        print(f"[Warning] FinMind API returned invalid response: {e}")
        print("          Proceeding without Market Return data (AR calculation will be partial).")
        taiex = pd.DataFrame()
    else:
        print(f"[Error] Unexpected KeyError: {e}")
        taiex = pd.DataFrame()
except Exception as e:
    print(f"[Error] Failed to fetch TAIEX data: {e}")
    taiex = pd.DataFrame()

if not taiex.empty:
    # 整理欄位
    # FinMind TAIEX columns: [date, stock_id, spread, open, high, low, close, volume...]
    # We only need Date, Open, Close to calculate Return
    market_df = taiex[['date', 'open', 'close']].copy()
    market_df.columns = ['Date', 'market_open', 'market_close']
    market_df['Date'] = pd.to_datetime(market_df['Date'])
    
    # 計算大盤報酬率
    market_df['market_ret'] = (market_df['market_close'] / market_df['market_open']) - 1
    
    # 移除異常值 (Option)
    market_df = market_df.dropna(subset=['market_ret'])
    
    print(f"Fetched {len(market_df):,} market records.")
else:
    print("[Warning] No TAIEX data available. Market return (market_ret) will be NaN.")
    market_df = pd.DataFrame()

Fetching TAIEX (Taiwan Stock Index) data...
Fetched 1,947 market records.


## 事件整合 (Event Integration)

In [40]:
# 讀取最新的 Checkpoint (防止變數遺失)
price_df = pd.read_csv(f'{DATA_DIR}/price_df.csv')
processed_disposal = pd.read_csv(f'{DATA_DIR}/processed_disposal_events.csv')

# 執行核心邏輯 (位於 utils.py)
# 回傳：
# 1. disposal_wide: 寬表格 (Signal Use)
# 2. disposal_long: 長表格 (Analysis Use)
print("Running Event Study algorithm...")
disposal_wide, disposal_long = run_event_study(price_df, processed_disposal, offset_days=OFFSET_DAYS)

if not disposal_long.empty:
    # 合併大盤數據
    print("Merging Market Data...")
    if 'market_df' in locals() and not market_df.empty:
        # Merge on Date
        # Ensure output is datetime
        disposal_long['Date'] = pd.to_datetime(disposal_long['Date'])
        disposal_long = disposal_long.merge(market_df, on='Date', how='left')
        
        # 計算超額報酬 (Abnormal Return)
        # AR = R_stock - R_market
#         disposal_long['abnormal_ret'] = disposal_long['daily_ret'] - disposal_long['market_ret']
    
    # 儲存最終結果
    disposal_wide.to_csv(f'{DATA_DIR}/disposal_df_wide.csv', index=False, encoding='utf-8-sig')
    disposal_long.to_csv(f'{DATA_DIR}/disposal_df_long.csv', index=False, encoding='utf-8-sig')
    
    print(f"Analysis Ready! Data shapes: Wide {disposal_wide.shape}, Long {disposal_long.shape}")
else:
    print("[Error] Event study returned empty result.")

Running Event Study algorithm...






Detected Disposal Levels: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24), np.int64(25), np.int64(26), np.int64(27), np.int64(28), np.int64(29), np.int64(30), np.int64(31), np.int64(32)]
Converting to Wide Format...
Analysis completed. Wide shape: (45045, 354), Long shape: (55599, 55)
Merging Market Data...
Analysis Ready! Data shapes: Wide (45045, 354), Long (55599, 58)


# Analysis

## overall

In [44]:
# 載入分析模組
DATA_DIR = '../../data/disposal'
if 'disposal_long' not in locals():
    disposal_long = pd.read_csv(f'{DATA_DIR}/disposal_df_long.csv', parse_dates=['Date'])

# 初始化分析器
analyzer = DisposalAnalyzer(disposal_long)

# 執行全域分析
# - 處置原因分佈
# - 處置等級分佈 (Days vs Events)
# - 報酬率圖表
analyzer.overall_analysis()


[Disposal Condition Distribution]


Unnamed: 0,condition,days_count,event_count,days_pct
0,因連續3個營業日達本中心作業要點第四條第一項第一款,24525,1253,44.11%
1,連續三次,13674,760,24.59%
2,最近十個營業日已有六次,3602,183,6.48%
3,連續五次,3174,161,5.71%
4,最近10個營業日內有6個營業日,2631,132,4.73%
5,連續5個營業日,1769,89,3.18%
6,連續3個營業日及沖銷標準,1693,77,3.05%
7,連續三次及當日沖銷標準,1444,66,2.60%
8,監視業務督導會報決議,1300,65,2.34%
9,連續5個營業日及沖銷標準,701,32,1.26%



[Disposal Level Statistics]


Unnamed: 0,disposal_level,days_count,event_count,mean,std
0,1,43670,2192,0.45%,21.99%
1,2,7548,379,0.44%,5.62%
2,3,1783,93,0.35%,6.32%
3,4,626,31,0.61%,6.17%
4,5,339,17,0.33%,6.03%
5,6,200,10,0.79%,5.73%
6,7,139,7,0.74%,5.06%
7,8,118,6,0.66%,5.19%
8,9,99,5,0.90%,4.25%
9,10,98,5,0.51%,3.76%


## seperate