In [26]:
import pandas as pd
import numpy as np
import sys
import os
import gc
from loguru import logger
from sqlalchemy import text

sys.path.append("../../../note")
sys.path.append(os.getcwd())

%load_ext autoreload
%autoreload 2

from module.get_info_FinMind import FinMindClient
from module.get_info_Finlab import FinlabClient
from module.get_info_Postgre import PostgreClient
pg_client = PostgreClient(system='windows', database='stock_daily')
from utils import batch_fetch_prices, run_event_study, process_disposal_events, fetch_and_merge_indexes_from_postgres, fetch_prices_from_postgres
from analyzer import DisposalAnalyzer

# 全域參數設定
OFFSET_DAYS = 5      # 事件前後擷取的緩衝交易日 (用於觀察 s-5 ~ e+5)
START_DATE = '2018-01-01' # 資料起始年份
DATA_DIR = '../../data/disposal' # 資料儲存路徑

os.makedirs(DATA_DIR, exist_ok=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Setup

## Data Preparation

### 抓取處置股名單 (Finlab)

In [29]:
finlab_client = FinlabClient()
print("Fetching disposal information from Finlab...")

# 抓取處置資訊
finlab_disposal = finlab_client.get_data("disposal_information", start_date=START_DATE)

# 篩選日期與前處理
if not finlab_disposal.empty:
    finlab_disposal['date'] = pd.to_datetime(finlab_disposal['date'])
    finlab_disposal = finlab_disposal[finlab_disposal['date'] >= START_DATE]
    
    print(f"Fetched {len(finlab_disposal):,} records.")
    
    # 執行前處理：計算連續處置等級 (Disposal Level)
    processed_disposal = process_disposal_events(finlab_disposal)
    
    # 儲存備份
    save_path = f'{DATA_DIR}/processed_disposal_events.csv'
    processed_disposal.to_csv(save_path, index=False, encoding='utf-8-sig')
    print(f"Saved processed events to: {save_path}")
else:
    print("[Error] No data fetched from Finlab.")
    processed_disposal = pd.DataFrame()

Fetching disposal information from Finlab...
Fetched 3,383 records.
Columns before processing: ['Stock_id', 'date', '證券名稱', 'condition', '處置措施', '處置內容', 'event_start_date', 'event_end_date', 'interval', 'key_date']
Saved processed events to: ../../data/disposal/processed_disposal_events.csv


### 抓取個股股價 (FinMind) - 用 Postgres 就好

In [30]:
# 初始化 FinMind Client
fm_client = FinMindClient()

# 讀取已處理的事件表 (若 Kernal 重啟可直接從這裡開始)
if 'processed_disposal' not in locals():
    processed_disposal = pd.read_csv(f'{DATA_DIR}/processed_disposal_events.csv')

# 批次抓取股價 (平行運算)
if not processed_disposal.empty:
    print("Starting batch price fetch (this may take a while)...")
    price_df = batch_fetch_prices(fm_client, processed_disposal, offset_days=OFFSET_DAYS, max_workers=10)
    
    if not price_df.empty:
        save_path = f'{DATA_DIR}/price_df.csv'
        price_df.to_csv(save_path, index=False)
        print(f"Fetched {len(price_df):,} rows over {price_df['Stock_id'].nunique()} stocks.")
        print(f"Saved prices to: {save_path}")
    else:
        print("[Warning] No price data fetched.")
else:
    print("[Error] Processed disposal dataframe is empty.")

# 釋放記憶體
gc.collect()

Starting batch price fetch (this may take a while)...
Using pre-processed columns 'event_start_date' and 'event_end_date'.
Starting batch fetch for 1317 stocks with 10 workers...


Fetching Prices: 100%|██████████| 1317/1317 [00:35<00:00, 36.85it/s]


Fetched total 45255 rows.
Fetched 45,255 rows over 1042 stocks.
Saved prices to: ../../data/disposal/price_df.csv


5446

### 抓個股股價(Postgres)

In [7]:
if 'processed_disposal' not in locals():
    processed_disposal = pd.read_csv(f'{DATA_DIR}/processed_disposal_events.csv', dtype={'Stock_id': str})

price_df = fetch_prices_from_postgres(pg_client, processed_disposal)

if not price_df.empty:
    save_path = f'{DATA_DIR}/price_df.csv'
    price_df.to_csv(save_path, index=False)
else:
    print("[Warning] No price data fetched.")

## 抓大盤 & 指數

In [8]:
price_df = pd.read_csv(f'{DATA_DIR}/price_df.csv', dtype={'Stock_id': str})

price_df = fetch_and_merge_indexes_from_postgres(price_df, pg_client)
price_df.to_csv(f'{DATA_DIR}/price_df.csv', index=False)

Required Industry Indices: ['Food', 'Plastics', 'Other', 'Textiles', 'TradingConsumersGoods', 'BuildingMaterialConstruction', 'ElectronicPartsComponents', 'ElectricMachinery', 'Automobile', 'ComputerPeripheralEquipment', 'SportLeisure', 'ChemicalBiotechnologyMedicalCare', 'ElectricalCable', 'Chemical', 'BiotechnologyMedicalCare', 'OtherElectronic', 'GlassCeramic', 'PaperPulp', 'IronSteel', 'Electronic', 'Rubber', 'ShippingTransportation', 'Semiconductor', 'Optoelectronic', 'CommunicationsInternet', 'InformationService', 'Tourism', 'FinancialInsurance', 'HomeLife', 'ElectronicProductsDistribution', 'CulturalCreative', 'GreenEnergyEnvironmental', 'ECommerce', 'AgriculturalTechnology', 'DigitalCloud']


## Event Integration

In [10]:
if 'price_df' not in locals():
    price_df = pd.read_csv(f'{DATA_DIR}/price_df.csv', dtype={'Stock_id': str})

if 'processed_disposal' not in locals():
    processed_disposal = pd.read_csv(f'{DATA_DIR}/processed_disposal_events.csv', dtype={'Stock_id': str})

# 回傳：
# 1. disposal_wide: 寬表格 (Signal Use)
# 2. disposal_long: 長表格 (Analysis Use)
disposal_wide, disposal_long = run_event_study(price_df, processed_disposal, offset_days=OFFSET_DAYS)

if not disposal_long.empty:
    disposal_wide.to_csv(f'{DATA_DIR}/disposal_df_wide.csv', index=False, encoding='utf-8-sig')
    disposal_long.to_csv(f'{DATA_DIR}/disposal_df_long.csv', index=False, encoding='utf-8-sig')
    
else:
    print("[Error] Event study returned empty result.")

Detected Disposal Levels: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24), np.int64(25), np.int64(26), np.int64(27), np.int64(28), np.int64(29), np.int64(30), np.int64(31), np.int64(32)]
Converting to Wide Format...
Analysis completed. Wide shape: (43721, 354), Long shape: (53753, 64)


## 僅保留股票資訊

In [5]:
disposal_long = pd.read_csv(f'{DATA_DIR}/disposal_df_long.csv', dtype={'Stock_id': str}, low_memory=False)

# 篩選條件：優先使用 industry 排除 ETF 與 DR (Cell 15 Modified)
if 'industry' in disposal_long.columns:
    print("Filtering by industry...")
    # 定義要排除的產業
    exclude_industries = [
        'ETF', '存託憑證', '受益證券', 'ETN', '創新板股票', '上櫃指數股票型基金(ETF)'
    ]
    mask_exclude = disposal_long['industry'].isin(exclude_industries)
    
    # 同時過濾掉 91 (DR) 與 00 (ETF) 以防萬一
    s_id = disposal_long['Stock_id'].astype(str)
    mask_id_exclude = (s_id.str.startswith('00')) | (s_id.str.startswith('91'))
    
    disposal_long = disposal_long[~mask_exclude & ~mask_id_exclude]
    print(f"Remaining rows after industry filter: {len(disposal_long)}")
    
else:
    print("[Warning] 'industry' column not found. Fallback to Stock_id filtering.")
    s_id = disposal_long['Stock_id'].astype(str)
    disposal_long = disposal_long[
        (s_id.str.len() == 4) &
        (~s_id.str.startswith('00')) &
        (~s_id.str.startswith('91'))
    ]

if 'Unnamed: 0' in disposal_long.columns:
    disposal_long.drop(columns=['Unnamed: 0'], inplace=True)

disposal_long.dropna(axis=1, how='all').to_csv(f'{DATA_DIR}/disposal_df_long_stock.csv', index=False)

Filtering by industry...
Remaining rows after industry filter: 47581


# Analysis

In [18]:
disposal_long = pd.read_csv(f'{DATA_DIR}/disposal_df_long_stock.csv', parse_dates=['Date'], low_memory=False)
analyzer = DisposalAnalyzer(disposal_long)
# analyzer.display_dataframe()

## overall
`[Disposal Level Statistics]` 中的統計數據基於 **日當沖報酬率** 計算

In [19]:
analyzer.overall_analysis()


[Disposal Condition Distribution]


Unnamed: 0,condition,days_count,event_count,days_pct,5min_count,20min_count
0,因連續3個營業日達本中心作業要點第四條第一項第一款,23982,1210,50.40%,808,307
1,連續三次,11702,590,24.59%,428,114
2,最近10個營業日內有6個營業日,2631,132,5.53%,74,55
3,連續5個營業日,1769,89,3.72%,36,53
4,連續3個營業日及沖銷標準,1693,77,3.56%,62,15
5,最近十個營業日已有六次,1670,85,3.51%,64,17
6,連續三次及當日沖銷標準,1444,66,3.03%,59,7
7,連續五次,1255,63,2.64%,45,16
8,連續5個營業日及沖銷標準,701,32,1.47%,17,15
9,連續五次及當日沖銷標準,594,27,1.25%,25,2



[Disposal Level Statistics]


Unnamed: 0,disposal_level,days_count,event_count,mean,std
0,1,39527,1970,0.35%,4.96%
1,2,6476,325,0.49%,5.45%
2,3,1183,60,0.41%,6.00%
3,4,275,14,0.67%,6.14%
4,5,100,5,0.20%,6.94%
5,6,20,1,0.67%,4.07%


## seperate by trend

In [33]:
seperated_df = analyzer.seprate_by_trend()


[處置前趨勢分佈 (s-3 ~ s-1)]


Unnamed: 0,direction,days_count,event_count,days_pct,5min_count,20min_count
0,Overbought,36330,1809,76.35%,1313,398
1,Oversold,11217,566,23.57%,309,204
2,Unknown,34,3,0.07%,1,0



[方向 vs 層級 交叉表 (交易天數)]


disposal_level,1,2,3,4,5,6,Total
direction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Overbought,30854,4661,617,138,60,0,36330
Oversold,8650,1804,566,137,40,20,11217
Unknown,23,11,0,0,0,0,34
Total,39527,6476,1183,275,100,20,47581


### 日盤

In [37]:
analyzer.plot_trend_return(df=seperated_df)

### 夜盤

In [11]:
analyzer.plot_trend_return(seperated_df, 'after_market')

### 夜 + 日

In [12]:
analyzer.plot_trend_return(seperated_df, 'all')

## Industry Analysis

In [12]:
analyzer.plot_3d_return_surface(seperated_df, session='after_market', bins=20, split_by_direction=True, use_browser=True, show_metrics='mean')

\n[Auto Split] 檢測到多種 Direction: ['Overbought' 'Oversold' 'Unknown']，將分開繪圖...
\n>>> Plotting for Direction: Overbought
\n[3D Surface Analysis] Session: after_market
\n>>> Plotting for Direction: Oversold
\n[3D Surface Analysis] Session: after_market
\n>>> Plotting for Direction: Unknown
\n[3D Surface Analysis] Session: after_market


In [14]:
# given dimension
# slice_by: ["ind_ret", "time"]
analyzer.plot_2d_slice(seperated_df, session='after_market', slice_by='time', target='e+0')


[Auto Split] 檢測到多種 Direction: ['Overbought' 'Oversold' 'Unknown']，將分開繪圖...

>>> Plotting for Direction: Overbought

[2D Slice Analysis] Session: after_market, Slice By: time, Target: e+0
Selected closest relative day: e+0 (val=1000.0)



>>> Plotting for Direction: Oversold

[2D Slice Analysis] Session: after_market, Slice By: time, Target: e+0
Selected closest relative day: e+0 (val=1000.0)



>>> Plotting for Direction: Unknown

[2D Slice Analysis] Session: after_market, Slice By: time, Target: e+0
Selected closest relative day: e+1 (val=1001.0)
