In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.cluster.hierarchy import linkage

# -------------------------------------------------------
# 1. データ読み込み：OWID 食料グループ別カロリー供給
# -------------------------------------------------------
url = "https://ourworldindata.org/grapher/calorie-supply-by-food-group.csv"
df_raw = pd.read_csv(url)

# 国別データだけ（Code が3文字）
df_raw = df_raw[df_raw["Code"].str.len() == 3]

# 各国の最新年を1行に
df_raw = df_raw.sort_values(["Entity", "Year"])
df_latest = df_raw.groupby("Entity").tail(1).set_index("Entity")

# -------------------------------------------------------
# 2. 食料グループ列の指定（長い列名）→ 短い列名に差し替え
# -------------------------------------------------------
food_cols_long = {
    "vegetal": "Vegetal Products | 00002903 || Food available for consumption | 0664pc || kilocalories per day per capita",
    "meat": "Meat, total | 00002943 || Food available for consumption | 0664pc || kilocalories per day per capita",
    "dairy": "Milk - Excluding Butter | 00002848 || Food available for consumption | 0664pc || kilocalories per day per capita",
    "eggs": "All egg products | 00002744 || Food available for consumption | 0664pc || kilocalories per day per capita",
    "animal_fats": "Animal fats group | 00002946 || Food available for consumption | 0664pc || kilocalories per day per capita",
    "fish_seafood": "Fish and seafood | 00002960 || Food available for consumption | 0664pc || kilocalories per day per capita",
}

selected_keys = ["vegetal", "meat", "dairy", "animal_fats", "fish_seafood"]
feature_cols_long = [food_cols_long[k] for k in selected_keys]

for col in feature_cols_long:
    if col not in df_latest.columns:
        raise ValueError(f"Column not found in df_latest: {col}")

df_features = df_latest[feature_cols_long].copy()

# 列名を短いもの（vegetal, meat, ...）に変更
df_features.columns = selected_keys

# 欠損を含む国は除外
df_features = df_features.dropna()

# データの表示
df_features

In [None]:
# -------------------------------------------------------
# 3. 日本 + ランダム n_random カ国を選ぶ部分
# -------------------------------------------------------
np.random.seed(5)

n_random = 49 # 日本以外の国数指定ｓ

all_countries = df_features.index.tolist()
if "Japan" not in all_countries:
    raise ValueError("Japan not found in the dataset; check the dataset format.")

other_countries = [c for c in all_countries if c != "Japan"]

if len(other_countries) < n_random:
    raise ValueError("Not enough countries with complete data to sample from.")

sampled_countries = np.random.choice(other_countries, size=n_random, replace=False)
selected_countries = ["Japan"] + list(sampled_countries)

df_selected = df_features.loc[selected_countries]

# ★ ここで実際に何カ国選ばれているか確認 ★
print("\nNumber of selected countries:", len(df_selected))
print("Selected countries:")
print(df_selected.index.tolist())

# -------------------------------------------------------
# 4. z-score 標準化 → DataFrame に変換
# -------------------------------------------------------
X = df_selected.values.astype(float)
means = X.mean(axis=0)
stds = X.std(axis=0, ddof=0)
X_std = (X - means) / stds

df_std = pd.DataFrame(
    X_std,
    index=df_selected.index,
    columns=selected_keys,
)

# -------------------------------------------------------
# 5. 行方向のリンクage（ウォード法）
# -------------------------------------------------------
row_linkage = linkage(X_std, method="ward")

# -------------------------------------------------------
# 6. clustermap 描画（行数に応じて縦サイズを調整）
# -------------------------------------------------------
row_colors = ["red" if idx == "Japan" else "gray" for idx in df_std.index]

sns.set(context="notebook", style="white")

n_rows = df_std.shape[0]
fig_height = max(6, 0.3 * n_rows)  # 行数に応じて高さを調整

g = sns.clustermap(
    df_std,
    row_linkage=row_linkage,
    col_cluster=False,
    row_colors=row_colors,
    figsize=(8, fig_height),
)

g.ax_heatmap.set_title("Ward Clustering of Countries\n(Dietary Calorie Supply, standardized)")
g.ax_heatmap.set_xlabel("Food groups (standardized)")
g.ax_heatmap.set_ylabel("Countries")

plt.show()