In [None]:
!pip install wbgapi pandas

In [1]:
import os
os.environ["LOKY_MAX_CPU_COUNT"] = str(int(os.cpu_count() or 1))
import warnings
warnings.filterwarnings("ignore")
import wbgapi as wb
import pandas as pd
from datetime import date

### 1) 设定区域与年份 

In [2]:
APAC_REGIONS = ['EAS', 'SAS']   # 东亚太平洋 + 南亚
END_YEAR = date.today().year - 1         # 用去年作为最近完整年
START_YEAR = END_YEAR - 9                 # 近10年
YEARS = range(START_YEAR, END_YEAR + 1)

### 2) 选择指标（>=8 个，含时间维度）

In [3]:
INDICATORS = {
    'NY.GDP.MKTP.CD'      : 'gdp_current_usd',        # GDP(现价美元)
    'NY.GDP.MKTP.KD.ZG'   : 'gdp_real_growth_pct',    # 实际GDP增速(%)
    'NY.GDP.PCAP.CD'      : 'gdp_per_capita_usd',     # 人均GDP(美元)
    'FP.CPI.TOTL.ZG'      : 'inflation_cpi_pct',      # 居民消费价格通胀(%)
    'SL.UEM.TOTL.ZS'      : 'unemployment_pct',       # 失业率(%)
    'NE.EXP.GNFS.ZS'      : 'exports_gdp_pct',        # 货物与服务出口/GDP(%)
    'NE.IMP.GNFS.ZS'      : 'imports_gdp_pct',        # 货物与服务进口/GDP(%)
    'GC.NLD.TOTL.GD.ZS'   : 'fiscal_balance_gdp_pct', # 政府现金收支差/GDP(%)
    'BX.KLT.DINV.WD.GD.ZS': 'fdi_inflow_gdp_pct',     # 外商直接投资净流入/GDP(%)
}

# === 3) 取亚太地区成员经济体（返回 ISO3 代码列表）===
# WBGAPI 支持按“地区→成员国”查询
economies = sorted({e for r in APAC_REGIONS for e in wb.region.members(r)})



### 4) 批量拉取数据：多个指标 × 多国 × 多年 → DataFrame

In [4]:
df_wide = wb.data.DataFrame(
    list(INDICATORS.keys()),
    economies,
    time=YEARS,
    index=['economy','time'],
    columns='series',
    labels=True,
    numericTimeKeys=True
)

# 3) 整理列名
df = (df_wide
      .rename(columns=INDICATORS)
      .reset_index()
      .rename(columns={'economy': 'iso3', 'time': 'year', 'Country': 'country_name'}))


In [5]:
df_wide.columns

Index(['Country', 'Time', 'BX.KLT.DINV.WD.GD.ZS', 'FP.CPI.TOTL.ZG',
       'GC.NLD.TOTL.GD.ZS', 'NE.EXP.GNFS.ZS', 'NE.IMP.GNFS.ZS',
       'NY.GDP.MKTP.CD', 'NY.GDP.MKTP.KD.ZG', 'NY.GDP.PCAP.CD',
       'SL.UEM.TOTL.ZS'],
      dtype='object')

In [6]:
df

Unnamed: 0,iso3,year,country_name,Time,fdi_inflow_gdp_pct,inflation_cpi_pct,fiscal_balance_gdp_pct,exports_gdp_pct,imports_gdp_pct,gdp_current_usd,gdp_real_growth_pct,gdp_per_capita_usd,unemployment_pct
0,WSM,2024,Samoa,2024,0.350274,2.172455,,29.315509,53.816154,1.068025e+09,9.422350,4898.771408,4.551
1,WSM,2023,Samoa,2023,0.255376,7.921647,4.885785,28.753800,61.903681,9.381894e+08,9.208649,4330.178405,4.977
2,WSM,2022,Samoa,2022,0.576323,10.961882,6.559183,12.266383,52.716798,8.329452e+08,-5.306291,3869.466395,5.048
3,WSM,2021,Samoa,2021,1.060395,3.133205,0.983685,11.685690,49.203196,8.439236e+08,-7.078831,3947.645179,6.612
4,WSM,2020,Samoa,2020,0.509390,-1.568912,7.638424,29.195960,48.287640,8.688984e+08,-3.108444,4099.660091,7.435
...,...,...,...,...,...,...,...,...,...,...,...,...,...
425,ASM,2019,American Samoa,2019,,,,61.669243,94.899536,6.470000e+08,-0.487805,12886.135952,
426,ASM,2018,American Samoa,2018,,,,68.231612,103.599374,6.390000e+08,2.671119,12552.054687,
427,ASM,2017,American Samoa,2017,,,,59.150327,102.287582,6.120000e+08,-6.987578,11863.683945,
428,ASM,2016,American Samoa,2016,,,,63.934426,95.230999,6.710000e+08,-1.679389,12843.334290,


In [7]:
meta = wb.economy.DataFrame(economies).reset_index().rename(columns={'index': 'iso3'})
#re colum name id to iso3
meta = meta.rename(columns={'id': 'iso3'})

In [8]:

df = df.merge(meta[['iso3', 'region', 'incomeLevel']], on='iso3', how='left')

# 5) 数值列转数值
value_cols = list(INDICATORS.values())
df[value_cols] = df[value_cols].apply(pd.to_numeric, errors='coerce')

# 6) 保存
df = df[['iso3','country_name','region','incomeLevel','year'] + value_cols].sort_values(['iso3','year'])
df.to_csv('apac_wdi_10y.csv', index=False)
print(f"OK: {START_YEAR}-{END_YEAR}, {len(economies)} 个经济体, {len(INDICATORS)} 指标 → apac_wdi_10y.csv")

OK: 2015-2024, 43 个经济体, 9 指标 → apac_wdi_10y.csv


In [9]:
# 你计划使用的“可读指标名”（有多少就用多少）
all_candidates = [
    'gdp_current_usd', 'gdp_per_capita_usd', 'gdp_real_growth_pct',
    'inflation_cpi_pct', 'unemployment_pct',
    'exports_gdp_pct', 'imports_gdp_pct',
    'fdi_inflow_gdp_pct', 'fiscal_balance_gdp_pct'
]
value_cols = [c for c in all_candidates if c in df.columns]

# 类型划分
level_cols = [c for c in ['gdp_current_usd', 'gdp_per_capita_usd'] if c in value_cols]
rate_cols  = [c for c in value_cols if c not in level_cols]  # 百分比/比率/增速等

# ---- 1) 缺失诊断报告（可选打印） ----
missing_by_col = df[value_cols].isna().mean().sort_values(ascending=False)
print('[各指标缺失率]')
print((missing_by_col*100).round(1).astype(str) + '%')

coverage_by_country = 1 - df.groupby('iso3')[value_cols].apply(lambda x: x.isna().mean()).mean(axis=1)
print('\n[各国家平均覆盖率Top/Bottom]')
print(coverage_by_country.sort_values(ascending=False).head(5))
print(coverage_by_country.sort_values().head(5))



[各指标缺失率]
fiscal_balance_gdp_pct    45.8%
inflation_cpi_pct         23.3%
unemployment_pct          18.6%
exports_gdp_pct           18.1%
imports_gdp_pct           18.1%
fdi_inflow_gdp_pct        14.2%
gdp_real_growth_pct        6.3%
gdp_current_usd            5.6%
gdp_per_capita_usd         5.6%
dtype: object

[各国家平均覆盖率Top/Bottom]
iso3
WSM    0.988889
PHL    0.988889
MYS    0.988889
THA    0.988889
LKA    0.988889
dtype: float64
iso3
PRK    0.111111
TUV    0.400000
ASM    0.444444
MNP    0.444444
NCL    0.522222
dtype: float64


In [10]:
#knn imputation
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=5)
df_imputed = df.copy()
df_imputed[value_cols] = imputer.fit_transform(df[value_cols])
df_imputed

Unnamed: 0,iso3,country_name,region,incomeLevel,year,gdp_current_usd,gdp_real_growth_pct,gdp_per_capita_usd,inflation_cpi_pct,unemployment_pct,exports_gdp_pct,imports_gdp_pct,fiscal_balance_gdp_pct,fdi_inflow_gdp_pct
429,ASM,American Samoa,EAS,HIC,2015,6.730000e+08,3.149606,12727.410265,4.132337,4.7656,63.595840,98.811293,1.998238,2.689566
428,ASM,American Samoa,EAS,HIC,2016,6.710000e+08,-1.679389,12843.334290,4.132337,4.7656,63.934426,95.230999,1.998238,2.689566
427,ASM,American Samoa,EAS,HIC,2017,6.120000e+08,-6.987578,11863.683945,4.758562,2.3062,59.150327,102.287582,2.594881,0.222758
426,ASM,American Samoa,EAS,HIC,2018,6.390000e+08,2.671119,12552.054687,5.019308,2.9256,68.231612,103.599374,3.574335,1.286643
425,ASM,American Samoa,EAS,HIC,2019,6.470000e+08,-0.487805,12886.135952,5.019308,2.9256,61.669243,94.899536,3.574335,1.286643
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,WSM,Samoa,EAS,UMC,2020,8.688984e+08,-3.108444,4099.660091,-1.568912,7.4350,29.195960,48.287640,7.638424,0.509390
3,WSM,Samoa,EAS,UMC,2021,8.439236e+08,-7.078831,3947.645179,3.133205,6.6120,11.685690,49.203196,0.983685,1.060395
2,WSM,Samoa,EAS,UMC,2022,8.329452e+08,-5.306291,3869.466395,10.961882,5.0480,12.266383,52.716798,6.559183,0.576323
1,WSM,Samoa,EAS,UMC,2023,9.381894e+08,9.208649,4330.178405,7.921647,4.9770,28.753800,61.903681,4.885785,0.255376


In [11]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist
import numpy as np
import pandas as pd


def kmeans_by_year_stable(df, value_cols, k=4, random_state=42, align='overlap'):
    """
    稳定版：全局标准化 → 每年KMeans（若可用则用上一年质心做 init）→ 逐年对齐
    align: 'overlap'（推荐，更稳）或 'dist'（用质心欧氏距离）
    """
    base = df.copy()

    # 1) 全局标准化（关键：把所有年份放到同一坐标系）
    scaler = StandardScaler().fit(base[value_cols].values)
    base_std = base.copy()
    base_std[value_cols] = scaler.transform(base[value_cols].values)

    years = sorted(base_std['year'].unique())
    n = len(base_std)
    labels_raw = np.full(n, -1, dtype=int)
    labels_aln = np.full(n, -1, dtype=int)

    prev_centers = None
    prev_year = None

    for y in years:
        idx = (base_std['year'] == y).values
        Xy  = base_std.loc[idx, value_cols].values
        iso_y = base_std.loc[idx, 'iso3'].astype(str).values

        k_eff = min(k, max(1, Xy.shape[0]))

        # 2) 以“上一年质心”作为初始化（若簇数不一致则退化为默认初始化）
        use_init = False
        init_centers = None
        if prev_centers is not None and prev_centers.shape[0] == k_eff:
            init_centers = prev_centers
            use_init = True

        km = KMeans(
            n_clusters=k_eff,
            init=init_centers if use_init else 'k-means++',
            n_init=1 if use_init else 10,
            random_state=random_state
        )
        raw_y = km.fit_predict(Xy)
        centers_y = km.cluster_centers_
        labels_raw[idx] = raw_y

        # 3) 对齐
        if prev_year is None:
            # 基准年：aligned = raw
            labels_aln[idx] = raw_y
        else:
            if align == 'overlap':
                # —— 成员重叠最大化（推荐）——
                prev_idx = (base_std['year'] == prev_year).values
                iso_prev = base_std.loc[prev_idx, 'iso3'].astype(str).values
                prev_aln = labels_aln[prev_idx].astype(int)

                # 只在两年都出现的 iso 上统计交叉表
                inter = np.intersect1d(iso_prev, iso_y)
                if len(inter) == 0:
                    # 若没有交集，退化为距离对齐
                    C = cdist(prev_centers, centers_y, metric='euclidean')
                    row_ind, col_ind = linear_sum_assignment(C)
                    mapping = {int(col_ind[j]): int(row_ind[j]) for j in range(len(row_ind))}
                else:
                    # 把 label 值映射到 [0..a-1]，避免标签不连续导致矩阵越界
                    prev_series = pd.Series(prev_aln, index=iso_prev).loc[inter]
                    cur_series  = pd.Series(raw_y,   index=iso_y ).loc[inter]
                    prev_codes, prev_uniques = pd.factorize(prev_series.values)
                    cur_codes,  cur_uniques  = pd.factorize(cur_series.values)

                    C_count = np.zeros((len(prev_uniques), len(cur_uniques)), dtype=int)
                    for i, j in zip(prev_codes, cur_codes):
                        C_count[i, j] += 1

                    # 匈牙利：最大化重叠 => 最小化 -count
                    row_ind, col_ind = linear_sum_assignment(-C_count)
                    # 把“curr唯一标签值”映射到“prev唯一标签值”
                    mapping = {int(cur_uniques[col_ind[j]]): int(prev_uniques[row_ind[j]])
                               for j in range(len(row_ind))}
            else:
                # —— 质心距离对齐（修复：不再用 prev_idx）——
                # 直接用上一年的质心矩阵与今年质心矩阵做 cdist
                C = cdist(prev_centers, centers_y, metric='euclidean')
                row_ind, col_ind = linear_sum_assignment(C)
                mapping = {int(col_ind[j]): int(row_ind[j]) for j in range(len(row_ind))}

            # 应用映射；若有未匹配的簇 id，则保持原样
            aligned_y = np.array([mapping.get(int(c), int(c)) for c in raw_y], dtype=int)
            labels_aln[idx] = aligned_y

        # 4) 更新 prev 信息
        prev_centers = centers_y
        prev_year = y

    out = df.copy()
    out['cluster_raw'] = labels_raw
    out['cluster_aligned'] = labels_aln

    
    return out

In [12]:
def add_taiwan(df):
    for y in sorted(df['year'].unique()):
        has_twn = ((df['iso3'] == 'TWN') & (df['year'] == y)).any()
        if not has_twn:
            chn_row = df[(df['iso3']=='CHN') & (df['year']==y)]
            if not chn_row.empty:
                taiwan_row = chn_row.copy()
                taiwan_row['iso3'] = 'TWN'
                df = pd.concat([df, taiwan_row], ignore_index=True)
    return df


In [13]:

def kmeans_by_all_years(df, k = 4):
    df_clustered = df.copy()
    X = StandardScaler().fit_transform(df[value_cols].values)
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10).fit(X)
    df_clustered['cluster'] = kmeans.labels_
    return df_clustered


df_clustered_year = kmeans_by_year_stable(
    df_imputed, value_cols=value_cols, k=4, random_state=42, align='overlap'
)
df_clustered_year.drop(columns=['cluster_raw'], inplace=True)
df_clustered_year.to_csv('apac_wdi_10y_kmeans_by_year_stable_k4.csv', index=False)

df_clustered_year = add_taiwan(df_clustered_year)

for y in sorted(df_clustered_year['year'].unique()):
    has_twn = ((df_clustered_year['iso3'] == 'TWN') & (df_clustered_year['year'] == y)).any()
    if not has_twn:
        chn_row = df_clustered_year[(df_clustered_year['iso3']=='CHN') & (df_clustered_year['year']==y)]
        if not chn_row.empty:
            taiwan_row = chn_row.copy()
            taiwan_row['iso3'] = 'TWN'
            df_clustered_year = pd.concat([df_clustered_year, taiwan_row], ignore_index=True)


In [14]:
import plotly.express as px


In [15]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd

# ========= 0) 准备数据：对齐后簇标签转字符串、固定颜色映射 =========
df_plot = df_clustered_year.copy()

# 强制为 int -> str，避免出现 float 标签
df_plot['cluster_aligned'] = df_plot['cluster_aligned'].astype(int).astype(str)

# 固定簇的类别顺序与配色（Set2）
category_order = ['0', '1', '2', '3']
palette = px.colors.qualitative.Set2
color_discrete_map = {c: palette[i % len(palette)] for i, c in enumerate(category_order)}

# ========= 1) 主底图：choropleth，含逐年动画 =========
fig = px.choropleth(
    df_plot,
    locations='iso3',
    locationmode='ISO-3',
    color='cluster_aligned',
    category_orders={'cluster_aligned': category_order},
    color_discrete_map=color_discrete_map,
    hover_name='country_name',
    animation_frame='year',
    title='KMeans Clustering by Year (k=4, aligned labels)'
)

# ========= 2) 亚太区域裁剪范围（可按需微调）=========
LON_RANGE = [0, 230]     # 经度：中东~西太平洋
LAT_RANGE = [-70, 60]    # 纬度：印尼~日本
CENTER    = dict(lon=110, lat=5)

geo_cfg = dict(
    projection=dict(type="equirectangular", scale=1.2),
    center=CENTER,
    lonaxis=dict(range=LON_RANGE),
    lataxis=dict(range=LAT_RANGE),
    showcoastlines=True,
    showcountries=True,
)

fig.update_layout(geo=geo_cfg)

# ========= 3) 叠加微型地区（SGP/HKG）为点，并与动画帧同步 =========
MICRO_ISO3 = ["SGP", "HKG"]
MICRO_LATLON = {
    "SGP": (1.3521, 103.8198),   # (lat, lon)
    "HKG": (22.3193, 114.1694),
}

# 初始帧：取动画的第一个年份（如果有）
years_sorted = sorted(df_plot['year'].unique().tolist())
if len(years_sorted) > 0:
    first_year = int(years_sorted[0])
    pts0 = df_plot[(df_plot['year'] == first_year) & (df_plot['iso3'].isin(MICRO_ISO3))].copy()
    if not pts0.empty:
        pts0['lat'] = pts0['iso3'].map(lambda c: MICRO_LATLON[c][0])
        pts0['lon'] = pts0['iso3'].map(lambda c: MICRO_LATLON[c][1])
        fig.add_trace(go.Scattergeo(
            lon=pts0['lon'],
            lat=pts0['lat'],
            mode="markers",
            marker=dict(
                size=10,
                color=[color_discrete_map.get(v, palette[0]) for v in pts0['cluster_aligned']],
                line=dict(width=0.6, color="rgba(0,0,0,0.6)")
            ),
            text=pts0['country_name'] + " | group " + pts0['cluster_aligned'],
            hoverinfo="text",
            showlegend=False
        ))

# 每一帧：追加对应年份的微型地区点
for fr in fig.frames:
    # 帧名就是该年的字符串
    try:
        y = int(fr.name)
    except Exception:
        # 应对某些环境帧名是字符串且不可转 int
        y = int(str(fr.name))
    pts = df_plot[(df_plot['year'] == y) & (df_plot['iso3'].isin(MICRO_ISO3))].copy()
    if not pts.empty:
        pts['lat'] = pts['iso3'].map(lambda c: MICRO_LATLON[c][0])
        pts['lon'] = pts['iso3'].map(lambda c: MICRO_LATLON[c][1])
        scatter = go.Scattergeo(
            lon=pts['lon'],
            lat=pts['lat'],
            mode="markers",
            marker=dict(
                size=10,
                color=[color_discrete_map.get(v, palette[0]) for v in pts['cluster_aligned']],
                line=dict(width=0.6, color="rgba(0,0,0,0.6)")
            ),
            text=pts['country_name'] + " | group " + pts['cluster_aligned'],
            hoverinfo="text",
            showlegend=False
        )
        fr.data = fr.data + (scatter,)

    # 同步每帧的 geo 配置，避免动画重置范围/投影
    if not hasattr(fr, 'layout') or fr.layout is None:
        fr.layout = {}
    # 复制 geo 到帧
    fr.layout.update(geo=geo_cfg)

# ========= 4) 交互后保持缩放/拖拽状态（帧切换不重置）=========
fig.update_layout(
    height=700, width=1100,
    margin=dict(l=0, r=0, t=60, b=0),
    geo_uirevision=True  # 用户交互后的缩放范围在不同帧间保持
)

fig.show()


In [None]:
df_clustered_year

In [None]:
df_clustered_year[df_clustered_year['iso3']!='TWN'  ]

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import plotly.express as px

# 1) 构建按年训练集（使用“同一年”的横截面；为避免规模偏置，对该年做 z-score）
def rf_importance_by_year(df_clustered_year, value_cols, seed=42):
    value_cols_scale = StandardScaler().fit_transform(df_clustered_year[value_cols])
    df_scale = df_clustered_year.copy()
    df_scale[value_cols] = value_cols_scale
    records = []
    for y, g in df_scale.groupby('year'):
        X = g[value_cols].to_numpy()
        ylab = g['cluster_aligned'].astype(int).to_numpy()
        if len(np.unique(ylab)) < 2 or len(g) < 10:
            continue

        rf = RandomForestClassifier(random_state=seed, n_jobs=-1)
        rf.fit(X, ylab)
        for f, w in zip(value_cols, rf.feature_importances_):
            records.append({'year': int(y), 'feature': f, 'importance': float(w)})
    return pd.DataFrame(records)

imp_df = rf_importance_by_year(df_clustered_year[df_clustered_year['iso3']!='TWN'], value_cols)

# 排序 + 动画
fig_imp = px.bar(
    imp_df.sort_values(['year','importance']),
    x='feature', y='importance',
    animation_frame='year',
    title='Which attributes define the groupings? (RandomForest feature importance by year)'
)
fig_imp.update_layout(xaxis_title='Indicator', yaxis_title='Importance')
fig_imp.show()


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
import plotly.express as px

def build_cluster_radar_dropdown(df_clustered_year: pd.DataFrame, value_cols, title="Cluster Profiles (z-scored)"):
    df = df_clustered_year.copy()

    # 1) 强制标签为 int，避免 '0'/'1' 的字符串导致聚合问题
    df['cluster_aligned'] = df['cluster_aligned'].astype(int)

    # 2) 年份排序 + 颜色
    years = sorted(df['year'].unique().tolist())
    groups_all = sorted(df['cluster_aligned'].unique().tolist())  # 全局可能的组（如 0..3）
    palette = px.colors.qualitative.Set2
    color_map = {g: palette[g % len(palette)] for g in groups_all}

    # 3) 预先计算：每年、每组的 z-score 画像（均值）
    scaler = StandardScaler().fit(df[value_cols])
    df[value_cols] = scaler.transform(df[value_cols])


    profiles_by_year = {}  # year -> DataFrame(index=cluster, columns=value_cols(z))
    for y, g in df.groupby('year'):
        g = g.copy()
        # 仅使用当年的列做 z-score，避免量纲混淆
        #Xz = StandardScaler().fit_transform(g[value_cols].to_numpy())
        #g[value_cols] = Xz
        prof = g.groupby('cluster_aligned')[value_cols].mean().sort_index()
        profiles_by_year[int(y)] = prof

    # 4) 生成图：为每个年中的实际存在的组各建一条 trace
    fig = go.Figure()
    theta = list(value_cols) + [value_cols[0]]  # 闭合
    vis_map = []  # 每个年份对应一个可见性布尔列表
    trace_year_index = []  # 记录每条 trace 属于哪个 year（用于构造 visibility）

    for y in years:
        prof = profiles_by_year.get(int(y))
        if prof is None or prof.empty:
            continue
        for grp in prof.index.tolist():
            r = prof.loc[grp, value_cols].values
            r_closed = np.r_[r, r[0]]  # 闭合
            fig.add_trace(go.Scatterpolar(
                r=r_closed, theta=theta,
                mode='lines+markers',
                name=f'Year {y} — Group {grp}',
                line=dict(width=2, color=color_map.get(int(grp), 'gray')),
                marker=dict(size=5),
                showlegend=True  # 保留图例，方便确认
            ))
            trace_year_index.append(int(y))

    # 5) 构造每个年份按钮的 visibility
    n_traces = len(fig.data)
    for y in years:
        vis = [ (trace_year_index[i] == int(y)) for i in range(n_traces) ]
        vis_map.append(vis)

    # 6) 初始显示第一个年份
    initial_vis = vis_map[0] if vis_map else [True]*n_traces
    for i, visible in enumerate(initial_vis):
        fig.data[i].visible = visible

    # 7) 下拉菜单（切换年份）
    buttons = []
    for i, y in enumerate(years):
        buttons.append(dict(
            label=str(y),
            method='update',
            args=[
                {'visible': vis_map[i]},
                {'title': f"{title} — {y}",
                 'polar': dict(radialaxis=dict(visible=True))}
            ]
        ))

    fig.update_layout(
        title=f"{title} — {years[0] if years else ''}",
        height=650,
        width=1000,
        polar=dict(radialaxis=dict(visible=True)),
        updatemenus=[dict(
            type="dropdown",
            buttons=buttons,
            showactive=True,
            x=1.02, y=1.1,            # ← 位置
            xanchor="left",         # ← 锚点
            yanchor="top",
            pad={"t": 6, "r": 6}    # ← 与边缘留白
        )],
        margin=dict(l=40, r=40, t=80, b=40)
    )

    return fig

# ==== 调用 ====
fig_radar = build_cluster_radar_dropdown(df_clustered_year, value_cols,
                                         title="Cluster Profiles (z-scored)")
fig_radar.show()


In [None]:
import plotly.graph_objects as go

def early_late_sankey(df_clustered_year):
    years = sorted(df_clustered_year['year'].unique())
    early, late = years[:len(years)//2], years[len(years)//2:]
    e_mode = (df_clustered_year[df_clustered_year['year'].isin(early)]
              .groupby('iso3')['cluster_aligned'].agg(lambda s: s.astype(int).mode().iloc[0]))
    l_mode = (df_clustered_year[df_clustered_year['year'].isin(late)]
              .groupby('iso3')['cluster_aligned'].agg(lambda s: s.astype(int).mode().iloc[0]))
    flows = (pd.DataFrame({'e':e_mode, 'l':l_mode})
             .dropna().astype(int).value_counts().reset_index(name='count'))
    flows['e'] = 'E'+flows['e'].astype(str); flows['l'] = 'L'+flows['l'].astype(str)

    nodes = sorted(set(flows['e'])|set(flows['l']))
    idx = {n:i for i,n in enumerate(nodes)}
    fig = go.Figure(data=[go.Sankey(
        node=dict(label=nodes),
        link=dict(
            source=[idx[s] for s in flows['e']],
            target=[idx[t] for t in flows['l']],
            value=flows['count'].tolist()
        )
    )])
    fig.update_layout(title='Early → Late Group Transitions (mode per country)')
    return fig

fig_sankey = early_late_sankey(df_clustered_year)
fig_sankey.show()
