# 處置股事件研究

In [1]:
# [Env Setup] 載入必要套件與設定
import pandas as pd
from tqdm import tqdm
from loguru import logger
import sys
import os
import gc
import matplotlib.pyplot as plt
import seaborn as sns

%load_ext autoreload
%autoreload 2
sys.path.append("/Users/xinc./Documents/GitHub/note")
sys.path.append(os.getcwd()) # 加入目前路徑以匯入 utils

from module.get_info_FinMind import FinMindClient, FinMindConfig
from module.get_info_Finlab import FinlabClient
from module.plot_func import plot
from utils import batch_fetch_prices, run_event_study, process_disposal_events

# Part 1: Data Preparation

### 核心功能
1. **資料整合**：自動對接 Finlab (處置資訊) 與 FinMind (股價) API。
2. **智慧分級**：實作連續處置判斷邏輯 (Strict Overlap)，自動識別第 1, 2...N 次處置事件。
3. **動態標籤**：產生時間軸標籤 `s+N` (處置開始/期間) 與 `e+N` (處置結束後)，包含解禁日 `e+0`。
4. **雙軌輸出**：
   - **Wide Format (`disposal_df_wide.csv`)**：訊號表 (Signal Table)，不含價格，專供回測系統產生交易訊號。
   - **Long Format (`disposal_df_long.csv`)**：分析表 (Analysis Table)，含完整價量與報酬率，專供統計研究與視覺化。

### 使用流程
- **Step 1**: 抓取處置公告。
- **Step 1.5**: 執行前處理 (分級與濾網)。
- **Step 2**: 平行化抓取處置期間股價。
- **Step 3**: 執行 Event Study 轉換，產出 Wide/Long CSV。

In [33]:
# [Step 1] 抓取 Finlab 處置股資料
# 若需要 Token，請在初始化時傳入，例如 FinlabClient(token="YOUR_TOKEN")
finlab_client = FinlabClient()
print("Fetching disposal information from Finlab...")

# 抓取資料 (設定較大的範圍以確保涵蓋需求)
finlab_disposal = finlab_client.get_data("disposal_information", start_date='2018-01-01')

# [Manual Filter] 手動篩選日期 (修正 Finlab API 時間過濾限制)
if not finlab_disposal.empty:
    finlab_disposal['date'] = pd.to_datetime(finlab_disposal['date'])
    finlab_disposal = finlab_disposal[finlab_disposal['date'] >= '2018-01-01']
    print(f"Fetched {len(finlab_disposal)} records from Finlab.")
    print(f"Data Range: {finlab_disposal['date'].min()} to {finlab_disposal['date'].max()}")
else:
    print("No data fetched from Finlab.")

Fetching disposal information from Finlab...
Fetched 3383 records from Finlab.
Data Range: 2018-01-04 00:00:00 to 2025-09-26 00:00:00


In [38]:
# [Step 1.5] 前處理與分級 (Preprocessing)
# 這一步會先將 Finlab 處置資料整理格式，並標記 First/Second Disposal
if 'finlab_disposal' in locals() and not finlab_disposal.empty:
    print("Processing disposal events...")
    processed_disposal = process_disposal_events(finlab_disposal)
    
    # print(f"Processed Data Shape: {processed_disposal.shape}")
    # display(processed_disposal.head())
    
    # Optional: Save for inspection
    processed_disposal.to_csv('../../data/disposal/processed_disposal_events.csv', index=False, encoding='utf-8-sig')
else:
    print("Finlab data not available. Please run Step 1 first.")
    processed_disposal = pd.DataFrame()

Processing disposal events...
Columns before processing: ['Stock_id', 'date', '證券名稱', 'condition', '處置措施', '處置內容', 'event_start_date', 'event_end_date', 'interval', 'key_date']
Processed 3383 events.
Level Distribution:
disposal_level
1     2702
2      453
3      110
4       36
5       17
6        8
7        6
8        5
9        5
10       4
11       4
12       4
13       3
14       3
15       3
16       3
17       3
18       3
19       3
20       2
21       2
22       2
23       1
24       1
Name: count, dtype: int64


In [35]:
# [Step 2] 平行化抓取股價 (FinMind)
# 使用處理過的事件表 (processed_disposal) 以確保連續處置區間不遺漏
logger.remove()
logger.add(sys.stderr, level="WARNING")

# Initialize FinMind Client
fm_client = FinMindClient()

offset_days = 5

if 'processed_disposal' in locals() and not processed_disposal.empty:
    # 開始抓取
    price_df = batch_fetch_prices(fm_client, processed_disposal, offset_days=offset_days, max_workers=10)

    if not price_df.empty:
        print(f"Fetched Price Data Shape: {price_df.shape}")
        display(price_df.head())
    else:
        print("No price data fetched.")
else:
    print("No processed disposal data found. Please run Step 1.5.")

gc.collect()

Using pre-processed columns 'event_start_date' and 'event_end_date'.
Starting batch fetch for 1317 stocks with 10 workers...


Fetching Prices: 100%|██████████| 1317/1317 [00:39<00:00, 33.57it/s]

Fetched total 37289 rows.
Fetched Price Data Shape: (37289, 8)





Unnamed: 0,Date,Stock_id,Open,High,Low,Close,Volume,TradingAmount
0,2021-07-02,30001,24.5,26.8,22.1,26.6,16000,409800
1,2021-07-05,30001,26.6,26.6,25.5,26.0,4000,104100
2,2021-07-06,30001,27.0,28.0,27.0,27.0,8000,218100
3,2021-07-07,30001,27.0,27.0,25.3,25.3,16000,421800
4,2021-07-08,30001,25.3,25.3,25.0,25.3,8000,201200


602

In [14]:
offset_days = 5

In [16]:
# [Step 3] 執行 Event Study 分析
# 使用 processed_disposal，其中已經包含 is_first_disposal 等標記

price_df, processed_disposal = pd.read_csv('../../data/disposal/price_df.csv'), pd.read_csv('../../data/disposal/processed_disposal_events.csv')
disposal_wide, disposal_long = run_event_study(price_df, processed_disposal, offset_days=offset_days)

if not disposal_wide.empty:
    print(f"Wide Format Shape: {disposal_wide.shape}")
    print(f"Long Format Shape: {disposal_long.shape}")
    
    print("\n[Wide Head]")
    display(disposal_wide.head())
    
    print("\n[Long Head]")
    display(disposal_long.head())
    
    # Save both files
    disposal_wide.to_csv('../../data/disposal/disposal_df_wide.csv', index=False, encoding='utf-8-sig')
    disposal_long.to_csv('../../data/disposal/disposal_df_long.csv', index=False, encoding='utf-8-sig')
    print("Saved 'disposal_df_wide.csv' and 'disposal_df_long.csv'.")

else:
    print("Analysis returned empty DataFrame.")

Detected Disposal Levels: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23)]
Converting to Wide Format...
Analysis completed. Wide shape: (12561, 186), Long shape: (16813, 49)
Wide Format Shape: (12561, 186)
Long Format Shape: (16813, 49)

[Wide Head]


Unnamed: 0,Date,Stock_id,t_label_first,condition_first,interval_first,event_start_date_first,event_end_date_first,relative_day_first,gap_days_first,calendar_relative_day_first,...,gap_days_level_22,calendar_relative_day_level_22,t_label_level_23,condition_level_23,interval_level_23,event_start_date_level_23,event_end_date_level_23,relative_day_level_23,gap_days_level_23,calendar_relative_day_level_23
0,2020-03-24,00642U,s-3,監視業務督導會報決議,5.0,2020-03-27,2020-04-13,-3.0,0.0,-3.0,...,,,,,,NaT,NaT,,,
1,2020-03-25,00642U,s-2,監視業務督導會報決議,5.0,2020-03-27,2020-04-13,-2.0,0.0,-2.0,...,,,,,,NaT,NaT,,,
2,2020-03-26,00642U,s-1,監視業務督導會報決議,5.0,2020-03-27,2020-04-13,-1.0,0.0,-1.0,...,,,,,,NaT,NaT,,,
3,2020-03-27,00642U,s+0,監視業務督導會報決議,5.0,2020-03-27,2020-04-13,0.0,0.0,0.0,...,,,,,,NaT,NaT,,,
4,2020-03-30,00642U,s+1,監視業務督導會報決議,5.0,2020-03-27,2020-04-13,1.0,2.0,3.0,...,,,,,,NaT,NaT,,,



[Long Head]


Unnamed: 0.1,Unnamed: 0,Date,Stock_id,Open,High,Low,Close,Volume,TradingAmount,trading_idx,...,t_label_level_15,t_label_level_16,t_label_level_17,t_label_level_18,t_label_level_19,t_label_level_20,t_label_level_21,t_label_level_22,t_label_level_23,daily_ret
0,0,2020-03-24,00642U,10.32,10.57,10.17,10.36,42796200,444039684,0,...,,,,,,,,,,0.003876
12,1,2020-03-25,00642U,10.55,10.59,10.44,10.46,39627340,416239309,1,...,,,,,,,,,,-0.008531
24,2,2020-03-26,00642U,10.44,10.46,10.11,10.33,25842235,264724135,2,...,,,,,,,,,,-0.010536
36,3,2020-03-27,00642U,10.24,10.24,10.1,10.17,20479766,207667697,3,...,,,,,,,,,,-0.006836
48,4,2020-03-30,00642U,9.88,9.88,9.65,9.79,33984332,331368684,4,...,,,,,,,,,,-0.009109


Saved 'disposal_df_wide.csv' and 'disposal_df_long.csv'.


In [20]:
# [Step 3.5] 加入大盤報酬 (Integration of TAIEX Benchmark)
# Check if Step 3 output exists
if 'disposal_long' in locals() and not disposal_long.empty:
    print("Fetching TAIEX data...")
    
    # 1. 決定抓取區間
    start_date = disposal_long['Date'].min().strftime('%Y-%m-%d')
    end_date = disposal_long['Date'].max().strftime('%Y-%m-%d')
    print(f"Date Range: {start_date} to {end_date}")
    
    # 2. 透過 FinMind 抓取 TAIEX
    client = FinMindClient()
    taiex_data = client.get_data(
        dataset='TaiwanStockPrice',
        data_id='TAIEX',
        start_date=start_date,
        end_date=end_date
    )
    
    if taiex_data is not None and not taiex_data.empty:
        # 3. 計算大盤報酬
        taiex_df = taiex_data[['date', 'open', 'close']].rename(columns={'date': 'Date', 'open': 'market_open', 'close': 'market_close'}).copy()
        taiex_df['Date'] = pd.to_datetime(taiex_df['Date'])
        taiex_df = taiex_df.sort_values('Date')
        taiex_df['market_ret'] = (taiex_df['market_close']/taiex_df['market_open']) - 1
        
        # 4. 合併回 disposal_long
        # 若已存在先移除避免重複 join
        cols_to_drop = [c for c in ['market_ret', 'market_close'] if c in disposal_long.columns]
        if cols_to_drop:
            disposal_long = disposal_long.drop(columns=cols_to_drop)
            
        disposal_long = pd.merge(disposal_long, taiex_df, on='Date', how='left')
                
        print("Integration Complete! Added columns: 'market_ret'")
        display(disposal_long)
        
    else:
        print("Failed to fetch TAIEX data. Please check connection or token.")
else:
    print("DataFrame 'disposal_long' not found. Please run Step 3 first.")

[32m2025-12-13 16:09:04.323[0m | [1mINFO    [0m | [36mFinMind.data.finmind_api[0m:[36mget_data[0m:[36m171[0m - [1mdownload TaiwanStockPrice, data_id: TAIEX[0m


Fetching TAIEX data...
Date Range: 2018-01-02 to 2025-10-03
Integration Complete! Added columns: 'market_ret'


Unnamed: 0.1,Unnamed: 0,Date,Stock_id,Open,High,Low,Close,Volume,TradingAmount,trading_idx,...,t_label_level_19,t_label_level_20,t_label_level_21,t_label_level_22,t_label_level_23,daily_ret,abnormal_ret,market_open,market_close,market_ret
0,0,2020-03-24,00642U,10.32,10.57,10.17,10.36,42796200,444039684,0,...,,,,,,0.003876,-0.040622,9083.78,9285.62,0.022220
1,1,2020-03-25,00642U,10.55,10.59,10.44,10.46,39627340,416239309,1,...,,,,,,-0.008531,-0.047207,9426.43,9644.75,0.023160
2,2,2020-03-26,00642U,10.44,10.46,10.11,10.33,25842235,264724135,2,...,,,,,,-0.010536,-0.020035,9667.14,9736.36,0.007160
3,3,2020-03-27,00642U,10.24,10.24,10.10,10.17,20479766,207667697,3,...,,,,,,-0.006836,-0.002991,9807.90,9698.92,-0.011111
4,4,2020-03-30,00642U,9.88,9.88,9.65,9.79,33984332,331368684,4,...,,,,,,-0.009109,-0.001945,9571.22,9629.43,0.006082
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16808,15,2021-07-23,9962,24.00,25.60,24.00,25.60,22693000,569363250,5,...,,,,,,0.066667,0.066633,17603.95,17572.92,-0.001763
16809,16,2021-07-26,9962,23.05,24.70,23.05,23.05,7764000,180799800,6,...,,,,,,0.000000,0.009638,17554.93,17403.56,-0.008623
16810,17,2021-07-27,9962,23.80,23.80,22.25,22.50,3173000,71872650,7,...,,,,,,-0.054622,-0.046940,17394.77,17269.87,-0.007180
16811,18,2021-07-28,9962,22.50,22.60,21.80,21.80,1769000,38968600,8,...,,,,,,-0.031111,-0.023314,17252.87,17135.22,-0.006819


In [21]:
# [Step 4] 最終篩選 (Final Filter: Common Stocks Only)
# 應用篩選邏輯：只保留代碼長度為 4 且非 00 開頭的股票 (只保留股票)

def is_common_stock(stock_id):
    sid = str(stock_id)
    return len(sid) == 4 and not sid.startswith('00')

if 'disposal_wide' in locals() and not disposal_wide.empty:
    print("Filtering Final Output for Common Stocks Only...")
    
    # Filter Wide Format
    mask_wide = disposal_wide['Stock_id'].apply(is_common_stock)
    final_wide = disposal_wide[mask_wide].copy()
    
    # Filter Long Format
    mask_long = disposal_long['Stock_id'].apply(is_common_stock)
    df = disposal_long[mask_long].copy()
    
    print(f"Wide Format: {len(disposal_wide)} -> {len(final_wide)} rows")
    print(f"Long Format: {len(disposal_long)} -> {len(df)} rows")
    
    # Save filtered versions
    final_wide.to_csv('../../data/disposal/disposal_df_wide.csv', index=False, encoding='utf-8-sig')
    df.to_csv('../../data/disposal/disposal_df_long.csv', index=False, encoding='utf-8-sig')
    print("Saved 'disposal_df_wide.csv' and 'disposal_df_long.csv'.")
    
    # Preview
    display(final_wide.head())
else:
    print("Output dataframes not found. Please run Step 3 first.")

Filtering Final Output for Common Stocks Only...
Wide Format: 12561 -> 11546 rows
Long Format: 16813 -> 14742 rows
Saved 'disposal_df_wide.csv' and 'disposal_df_long.csv'.


Unnamed: 0,Date,Stock_id,t_label_first,condition_first,interval_first,event_start_date_first,event_end_date_first,relative_day_first,gap_days_first,calendar_relative_day_first,...,gap_days_level_22,calendar_relative_day_level_22,t_label_level_23,condition_level_23,interval_level_23,event_start_date_level_23,event_end_date_level_23,relative_day_level_23,gap_days_level_23,calendar_relative_day_level_23
482,2020-07-20,1213,s-1,連續三次,,2020-07-21,2020-08-03,-1.0,0.0,-1.0,...,,,,,,NaT,NaT,,,
483,2020-07-21,1213,s+0,連續三次,,2020-07-21,2020-08-03,0.0,0.0,0.0,...,,,,,,NaT,NaT,,,
484,2020-07-22,1213,s+1,連續三次,,2020-07-21,2020-08-03,1.0,0.0,1.0,...,,,,,,NaT,NaT,,,
485,2020-07-23,1213,s+2,連續三次,,2020-07-21,2020-08-03,2.0,0.0,2.0,...,,,,,,NaT,NaT,,,
486,2020-07-24,1213,s+3,連續三次,,2020-07-21,2020-08-03,3.0,0.0,3.0,...,,,,,,NaT,NaT,,,


---
# Part 2: Statistical Analysis

### 資料欄位說明 (Data Dictionary)

在開始分析前，了解資料集中的關鍵變數定義：

| 欄位名稱 | 說明 | 用途 |
| :--- | :--- | :--- |
| **`t_label`** | 時間軸標籤 | **核心分析欄位**。`s+N` (處置期間), `e+N` (解禁後)。`e+0` 為解禁當日。 |
| **`trading_idx`** | 交易日序號 | 該股票的累積交易日數 (Index)。用來計算精確的 `relative_day`，避免假日干擾。 |
| **`gap_days`** | 交易間隔(日曆日) | 距離上一個交易日過了幾天。`1`=正常, `3`=跨週末, `>3`=長假或暫停交易。 |
| **`relative_day`** | 相對天數 | 距離事件開始日 (`s+0`) 的交易日數差。 |
| **`disposal_level`** | 處置連續等級 | `1`=首次, `2`=續處置 (中間無間斷)。用於觀察連續處置的邊際效應遞減。 |
| **`daily_ret`** | 當日報酬率 | 計算方式：`(Close / Open) - 1`。 |
| **`Stock_id`** | 股票代號 | 經過 Filter 後應僅包含普通股 (4碼)。 |

## overall

In [23]:
# [Analysis Step 1] 載入資料 (Data Loading)
paths = '../../data/disposal/disposal_df_long.csv'

print(f"Reading data from: {paths}")

# 1. 讀取 CSV (解決 DtypeWarning, 解析日期)
df = pd.read_csv(
    paths,
    low_memory=False,
    dtype={'Stock_id': str}, # 強制 Stock_id 為字串
    parse_dates=['Date', 'event_start_date', 'event_end_date'] # 自動轉 datetime
)

print(f"Loaded {len(df):,} rows.")
print(f"Columns: {list(df.columns)} ...")

# Preview
display(df)

Reading data from: ../../data/disposal/disposal_df_long.csv
Loaded 14,742 rows.
Columns: ['Unnamed: 0', 'Date', 'Stock_id', 'Open', 'High', 'Low', 'Close', 'Volume', 'TradingAmount', 'trading_idx', 'prev_trade_date', 'trade_date_diff', 'gap_days', 'event_start_date', 'event_end_date', 'interval', 'condition', 'is_first_disposal', 'is_second_disposal', 'disposal_level', 'calendar_relative_day', 'trading_idx_start', 'trading_idx_end', 'relative_day', 'relative_day_end', 't_label_first', 't_label_second', 't_label_third', 't_label_fourth', 't_label_level_5', 't_label_level_6', 't_label_level_7', 't_label_level_8', 't_label_level_9', 't_label_level_10', 't_label_level_11', 't_label_level_12', 't_label_level_13', 't_label_level_14', 't_label_level_15', 't_label_level_16', 't_label_level_17', 't_label_level_18', 't_label_level_19', 't_label_level_20', 't_label_level_21', 't_label_level_22', 't_label_level_23', 'daily_ret', 'abnormal_ret', 'market_open', 'market_close', 'market_ret'] ...


Unnamed: 0.1,Unnamed: 0,Date,Stock_id,Open,High,Low,Close,Volume,TradingAmount,trading_idx,...,t_label_level_19,t_label_level_20,t_label_level_21,t_label_level_22,t_label_level_23,daily_ret,abnormal_ret,market_open,market_close,market_ret
0,0,2020-07-20,1213,10.75,10.75,9.43,10.00,503610,5266828,0,...,,,,,,-0.069767,-0.069191,12205.25,12174.54,-0.002516
1,0,2020-07-20,1213,10.75,10.75,9.43,10.00,503610,5266828,0,...,,,,,,-0.069767,-0.069191,12205.25,12174.54,-0.002516
2,1,2020-07-21,1213,11.00,11.00,10.25,10.35,182100,1951863,1,...,,,,,,-0.059091,-0.077409,12242.32,12397.55,0.012680
3,1,2020-07-21,1213,11.00,11.00,10.25,10.35,182100,1951863,1,...,,,,,,-0.059091,-0.077409,12242.32,12397.55,0.012680
4,2,2020-07-22,1213,10.80,10.80,10.20,10.50,195049,2045958,2,...,,,,,,-0.027778,-0.033885,12389.76,12473.27,0.006740
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14737,15,2021-07-23,9962,24.00,25.60,24.00,25.60,22693000,569363250,5,...,,,,,,0.066667,0.066633,17603.95,17572.92,-0.001763
14738,16,2021-07-26,9962,23.05,24.70,23.05,23.05,7764000,180799800,6,...,,,,,,0.000000,0.009638,17554.93,17403.56,-0.008623
14739,17,2021-07-27,9962,23.80,23.80,22.25,22.50,3173000,71872650,7,...,,,,,,-0.054622,-0.046940,17394.77,17269.87,-0.007180
14740,18,2021-07-28,9962,22.50,22.60,21.80,21.80,1769000,38968600,8,...,,,,,,-0.031111,-0.023314,17252.87,17135.22,-0.006819


In [24]:
# [Analysis Step 2] 計算各時間點的平均報酬與樣本數

if 'df' in locals() and not df.empty:
    # 1. 定義排序邏輯 (同前)
    def parse_t_val(t_str):
        if not isinstance(t_str, str): return 999
        prefix = t_str[0]
        try:
            val = int(t_str.split('+')[-1]) if '+' in t_str else int(t_str.split('s')[-1]) 
            if 's-' in t_str: val = -int(t_str.split('-')[-1])
            if prefix == 's': return val
            elif prefix == 'e': return 1000 + val
        except:
            return 999
        return 999

    target_col = 't_label_first' 
    
    if target_col in df.columns:
        # 2. 計算統計量
        stats = df.groupby(target_col)['daily_ret'].agg(['mean', 'count', 'std']).reset_index()
        
        # 3. 排序
        stats['sort_key'] = stats[target_col].apply(parse_t_val)
        stats = stats.sort_values('sort_key').drop(columns=['sort_key'])
        
        # 4. 呼叫 plot_func
        print(f"Plotting Dual Bar Chart (via plot_func) for {target_col}...")
        
        plot(
            df=stats,
            x=target_col,
            ly='mean',      # 上圖：樣本數
            bar_col='count',  # 下圖：平均報酬
            ly_type='bar',   # <--- 新參數！上圖也畫成長條圖
            note=f"Disposal Event Study: {target_col}",
            max_ly=False,
            min_ly=False,
            bar_kwargs={'width': 0.8} # 下圖寬度
        )
        
        # Display raw table
        display(stats.style.format({'mean': '{:.4%}', 'std': '{:.4%}'}).background_gradient(subset=['mean'], cmap='RdYlGn'))
        
    else:
        print(f"Column {target_col} not found.")
else:
    print("Dataframe 'df' not ready.")

Plotting Dual Bar Chart (via plot_func) for t_label_first...


Unnamed: 0,t_label_first,mean,count,std
16,s-3,2.0080%,759,4.6760%
15,s-2,1.0825%,1173,4.5242%
14,s-1,-0.0411%,2035,5.2007%
5,s+0,-1.0329%,2088,5.1018%
6,s+1,-0.5160%,2028,4.9024%
7,s+2,-0.0706%,1390,4.7791%
8,s+3,-0.2187%,1034,4.8141%
9,s+4,-0.1481%,343,5.5460%
10,s+5,0.2820%,224,5.7794%
11,s+6,0.1875%,168,5.3013%


# Analysis Step 2 (統計與繪圖) 詳細運作原理解析

這份文件詳細解釋了 Notebook 中 **Part 2 - Analysis Step 2** 的運作邏輯。這是您用來生成「雙長條圖（樣本數 + 平均報酬）」的關鍵區塊。

## 1. 核心邏輯流程

這個區塊做了三件主要工作：
1.  **排序 (Sorting)**：定義 `s+1`、`e-1` 這些標籤誰先誰後。
2.  **統計 (Aggregation)**：把成千上萬筆資料，按天算平均。
3.  **繪圖 (Plotting)**：畫出含有雙 Y 軸的圖表。

---

## 2. 程式碼逐段解析

### A. 定義排序規則 (`parse_t_val`)
因為電腦不懂 `s+1` 比 `s-1` 大，所以我們要寫一個翻譯機：

```python
def parse_t_val(t_str):
    # 如果標籤是 's-4'，就翻譯成 -4
    # 如果標籤是 's+2'，就翻譯成 2
    # 如果標籤是 'e+1'，為了讓它排在最後面，我們故意加個大數字 (例如 1000 + 1)
    ...
```
*   **目的**：確保畫出來的圖，X 軸是依照時間順序排列的 (處置前 -> 處置中 -> 解禁後)。

### B. 計算統計量 (`groupby`)
```python
target_col = 't_label_first'  # 我們只分析第一次處置
stats = final_long.groupby(target_col)['abnormal_ret'].agg(['mean', 'count', 'std']).reset_index()
```
*   **輸入**：`final_long` (包含所有股票每一天的資料)。
*   **動作**：
    1.  把标籤一樣的日子（例如所有股票的 `s+1`）抓成一堆。
    2.  算這一堆的 **`mean` (平均超額報酬)** -> 用來畫前景長條。
    3.  算這一堆的 **`count` (有幾支股票)** -> 用來畫背景長條。
*   **輸出**：一個小表格 `stats`，大概長這樣：
    | t_label | mean | count |
    | :--- | :--- | :--- |
    | s-1 | 0.5% | 1500 |
    | s+0 | -1.2% | 1500 |

### C. 呼叫繪圖函式 (`plot`)
```python
plot(
    df=stats,
    x=target_col,       # X軸: 時間標籤 (s+1, s+2...)
    ly='count',         # 左Y軸 (上圖/背景): 樣本數
    ly_type='bar',      # 設定左軸也畫「長條圖」
    bar_col='mean',     # 右Y軸 (下圖/前景): 平均報酬
    bar_kwargs={'width': 0.8} # 固定長條寬度
)
```
這是我們剛剛升級過的地方！
*   **`ly='count' + ly_type='bar'`**：
    *   這畫出了圖表上那層**淺灰色的長條**。
    *   它的高度代表「樣本數」。如果某一天忽然變矮，代表那天很多股票沒資料（可能遇到假日或沒交易）。
*   **`bar_col='mean'`**：
    *   這畫出了圖表上那層**紅綠色的長條**。
    *   **紅色**代表平均賺錢 (Process > 0)，**綠色**代表平均賠錢。

---

## 3. 這張圖怎麼看？ (觀察重點)

1.  **處置效應 (The Effect)：**
    *   看 **紅色/綠色長條**。
    *   如果在 `s+0` (處置開始日) 附近出現 **顯著的綠柱**，代表處置消息一出，股價通常會跌。
    *   如果在 `e+0` (解禁日) 附近出現 **紅柱**，代表解禁後股價會反彈。

2.  **樣本信賴度 (Reliability)：**
    *   看 **灰色長條 (Count)**。
    *   灰色柱子越高，代表這個平均值是用成千上萬筆資料算出來的，**可信度高**。
    *   如果越往右邊 (e+N) 灰色柱子越矮，代表有很多股票根本撐不到解禁（可能下市或資料缺失），這時候算出來的平均報酬可能會有偏差 (Survivorship Bias)。

## 4. 常見問題

*   **Q: 為什麼有些標籤 (`s+10`) 不見了？**
    *   A: 因為我們只取了 `head(20)` 或是資料本身過濾掉了。您可以調整程式碼中的篩選範圍。
*   **Q: 為什麼是 `abnormal_ret` 而不是 `daily_ret`？**
    *   A: 因為我們在 Step 1.5 扣掉了大盤漲跌。這樣看到的才是「因為處置而產生」的漲跌，而不是因為那天大盤剛好大漲。

```mermaid
graph LR
    RawData[final_long] --> GroupBy(分組計算平均)
    GroupBy --> StatsTable[統計表 stats]
    StatsTable --> Plotting{Plotly 繪圖}
    
    Plotting --> Layer1[灰色長條: Count]
    Plotting --> Layer2[紅綠長條: Return]
    Layer1 & Layer2 --> FinalChart[雙長條圖]
```
