In [1]:
!pip install statsmodels linearmodels

Collecting statsmodels
  Downloading statsmodels-0.14.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (9.5 kB)
Collecting linearmodels
  Downloading linearmodels-6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Downloading patsy-1.0.1-py2.py3-none-any.whl.metadata (3.3 kB)
Collecting mypy-extensions>=0.4 (from linearmodels)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Collecting Cython>=3.0.10 (from linearmodels)
  Downloading cython-3.1.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting pyhdfe>=0.1 (from linearmodels)
  Downloading pyhdfe-0.2.0-py3-none-any.whl.metadata (4.0 kB)
Collecting formulaic>=1.0.0 (from linearmodels)
  Downloading formulaic-1.2.0-py3-none-any.whl.metadata (7.0 kB)
Collecting setuptools-scm<9.0.0,>=8.0.0 (from setuptools-scm[toml]<9.0.0,>=8.0.0->linearmode

In [4]:
# === 1. Imports & Paths ===
import os
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from pathlib import Path

# 避免中文字亂碼（Codespaces常用字型）
plt.rcParams["font.family"] = "DejaVu Sans"

# 以 Codespaces 專案根目錄為基準
BASE = Path("/workspaces/data-visualization-labs")  # 你的 repo 根目錄
PROJ = BASE / "self-extended-practice" / "semiconductor-tariff-impact-taiwan"
DATA = PROJ / "data"               # 資料資料夾
OUT = PROJ / "output" / "ai"       # AI 分析輸出資料夾
OUT.mkdir(parents=True, exist_ok=True)

print("目前資料路徑：", DATA)

# === 2. 讀取既有 IC 出口整理後資料 ===
ic_path = DATA / "processed" / "ic_exports_comparison.csv"
print("ic_exports_comparison.csv exists? ->", ic_path.exists())  # 檢查是否存在
df = pd.read_csv(ic_path)
print("資料筆數：", len(df))


目前資料路徑： /workspaces/data-visualization-labs/self-extended-practice/semiconductor-tariff-impact-taiwan/data
ic_exports_comparison.csv exists? -> True
資料筆數： 48


In [5]:
# === 3. 讀取 AI 需求指標 ===
ai_index_path = DATA / "external" / "ai_demand_index_2015_2025.csv"
print("ai_demand_index_2015_2025.csv exists? ->", ai_index_path.exists())

ai_df = pd.read_csv(ai_index_path)
print("AI 指標資料筆數：", len(ai_df))
ai_df.head()


ai_demand_index_2015_2025.csv exists? -> True
AI 指標資料筆數： 11


Unnamed: 0,Year,AI_Demand_Index,Source
0,2015,,
1,2016,,
2,2017,,
3,2018,,
4,2019,,


In [None]:
# === 可選：簡單檢視 AI_Index 與世界 IC 出口對照 ===
# 若要畫圖、或計算某些摘要便於 sanity check
group = df.groupby("Year", as_index=False).agg(
    WorldLogExp=("log_exports", "mean")
)
fig, ax = plt.subplots(figsize=(7,4))
ax.plot(group["Year"], group["WorldLogExp"], marker="o", label="Avg log(exports)")
ax2 = ax.twinx()
ax2.plot(ai["Year"], ai["AI_Index"], marker="s", linestyle="--", label="AI Index", alpha=0.6, color="tab:orange")
ax.set_title("Avg log(exports) vs AI Index")
ax.set_xlabel("Year")
ax.legend(loc="upper left")
ax2.legend(loc="upper right")
plt.tight_layout()
plt.show()


In [None]:
# === TWFE Regression: log_exports ~ Country FE + Year FE + TW_AIBoom（核心） ===
# 注意：AIBoom 本身會被 Year FE 吸收（所以只留交互項 TW_AIBoom）
# 與你原本模型一致的思路。

# 只跑我們要的國家（如果檔內就這四國：Taiwan/USA/China/Korea，可省略）
# df = df[df["Country"].isin(["Taiwan", "USA", "China", "Korea"])]

# OLS with two-way FE: C(Country) + C(Year)
import statsmodels.api as sm
mod = smf.ols("log_exports ~ C(Country) + C(Year) + TW_AIBoom", data=df)
res = mod.fit(cov_type="HAC", cov_kwds={"maxlags":1})  # 時序誤差調整
print(res.summary())

# 存回歸結果表（CSV）
res_df = pd.concat([
    res.params.rename("coef"),
    res.bse.rename("stderr"),
    res.tvalues.rename("t"),
    res.pvalues.rename("p")
], axis=1).reset_index().rename(columns={"index":"term"})
res_path = OUT / "ai_regression_results_TWFE.csv"
res_df.to_csv(res_path, index=False)
print("✅ Saved regression results ->", res_path)


In [None]:
# === 抽出 TW_* 係數並畫圖（這裡只有 TW_AIBoom 一個） ===
coef_tbl = res_df.copy()

row = coef_tbl.loc[coef_tbl["term"]=="TW_AIBoom"]
if row.empty:
    raise RuntimeError("抓不到 TW_AIBoom 係數，請檢查模型式或欄位命名。")

coef = float(row["coef"])
stderr = float(row["stderr"])
p = float(row["p"])
low = coef - 1.96*stderr
high = coef + 1.96*stderr

label = "TW_AIBoom"
bar_color = "#8dd3c7"

fig, ax = plt.subplots(figsize=(8,5))
ax.bar([label], [coef], color=bar_color, edgecolor="k", alpha=0.7, yerr=[[coef-low],[high-coef]], capsize=8)

# 數值標籤
ax.text(0, coef + (0.02 if coef>=0 else -0.02), f"{coef:.2f}", ha="center", va="bottom" if coef>=0 else "top", fontsize=11)

# 美化
ax.axhline(0, color="gray", lw=1, linestyle="--")
ax.set_ylabel("Effect on log(exports) (≈ % change)")
ax.set_title("Taiwan Relative Effect — AI Demand Boom (TWFE)")
ax.set_ylim(min(-0.25, low-0.05), max(0.30, high+0.05))

png_path = OUT / "tw_ai_effects_bar_labeled.png"
plt.tight_layout()
plt.savefig(png_path, dpi=150)
plt.show()
print("✅ Saved figure ->", png_path)


In [None]:
# === 匯出這次 AI 需求回歸所用的資料集 ===
save_cols = [
    "Country", "Year", "log_exports",
    "AI_Index", "AIBoom", "TW_AIBoom"
]
reg_data_path = OUT / "ai_regression_dataset_TWFE.csv"
df[save_cols].to_csv(reg_data_path, index=False)
print("✅ Saved dataset ->", reg_data_path)
