In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind

In [2]:
target_dates = pd.date_range(start="2025-03-01", end="2025-05-16").strftime("%Y-%m-%d").tolist()

# 印出前幾天確認
print(target_dates[:5])
print(f"總共事件日數: {len(target_dates)}") 

['2025-03-01', '2025-03-02', '2025-03-03', '2025-03-04', '2025-03-05']
總共事件日數: 77


In [3]:
# === 1. 讀取資料 ===
df = pd.read_csv("daily_sentiment_features.csv")
df["date"] = pd.to_datetime(df["date"])

# === 2. 設定事件日與觀察區間 ===
#target_dates = ["2025-03-11", "2025-04-02", "2025-05-12"]
window = 4 # 前後天數

# === 3. 結果紀錄列表 ===
significant_events = []

# === 4. 遍歷每個事件日 ===
for d in target_dates:
    df["group"] = None  # 重置分組欄位
    center = pd.to_datetime(d)

    # 標記 group
    df.loc[(df["date"] >= center - pd.Timedelta(days=window)) & 
           (df["date"] < center), "group"] = f"{d}_before"
    df.loc[(df["date"] > center) & 
           (df["date"] <= center + pd.Timedelta(days=window)), "group"] = f"{d}_after"

    df_subset = df[df["group"].notna()]

    g1 = df_subset[df_subset["group"] == f"{d}_before"]["avg_sentiment_score"]
    g2 = df_subset[df_subset["group"] == f"{d}_after"]["avg_sentiment_score"]

    # 檢查樣本數是否足夠
    if len(g1) >= 2 and len(g2) >= 2:
        t_stat, p_val = ttest_ind(g1, g2)
        #print(f"🔍 {d} | t = {t_stat:.4f}, p = {p_val:.4f}")

        if p_val < 0.05:
            significant_events.append((d, t_stat, p_val))
    else:
        print(f"⚠️ {d} 樣本不足（before={len(g1)}, after={len(g2)}）")

# === 5. 列出顯著事件總表 ===
if significant_events:
    print("\n✅ 顯著事件列表（p < 0.05）:")
    for d, t, p in significant_events:
        print(f"- {d}: t = {t:.4f}, p = {p:.4f}")
else:
    print("\n❌ 無顯著事件（p < 0.05）")


⚠️ 2025-03-01 樣本不足（before=0, after=4）
⚠️ 2025-03-02 樣本不足（before=1, after=4）
⚠️ 2025-05-15 樣本不足（before=4, after=1）
⚠️ 2025-05-16 樣本不足（before=4, after=0）

✅ 顯著事件列表（p < 0.05）:
- 2025-03-19: t = -2.5500, p = 0.0435
- 2025-04-16: t = 2.8657, p = 0.0286
- 2025-05-07: t = -2.6965, p = 0.0357
