In [2]:
import pandas as pd
import plotly.express as px
import numpy as np

In [3]:
# Load data
df = pd.read_csv("gdp_per_capita.csv")

# Use 2024 if available, else 2023
df["gdp_per_capita"] = df["2024"].fillna(df["2023"])

# Drop regional aggregates (non-countries)
df = df[df["Country Code"].str.len() == 3]  # usually real countries have 3-letter ISO codes

# compute the burden
df["LLM_cost_burden_percent"] = (20 / df["gdp_per_capita"]) * 100
df["LLM_cost_burden_log"] = np.log10(df["LLM_cost_burden_percent"] + 1e-6)

In [4]:
df.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,2022,2023,2024,gdp_per_capita,LLM_cost_burden_percent,LLM_cost_burden_log
0,Aruba,ABW,"GDP per capita, PPP (constant 2021 internation...",38877.67444,40516.80856,,40516.80856,0.049362,-1.306596
1,Africa Eastern and Southern,AFE,"GDP per capita, PPP (constant 2021 internation...",3974.244214,3948.142721,3968.963751,3968.963751,0.50391,-0.297646
2,Afghanistan,AFG,"GDP per capita, PPP (constant 2021 internation...",1981.710168,1983.81262,,1983.81262,1.00816,0.00353
3,Africa Western and Central,AFW,"GDP per capita, PPP (constant 2021 internation...",4825.13824,4872.768001,4961.663996,4961.663996,0.403091,-0.394596
4,Angola,AGO,"GDP per capita, PPP (constant 2021 internation...",7397.486427,7250.402699,7344.145379,7344.145379,0.272326,-0.56491


In [15]:
# --- Plot ---
fig = px.choropleth(
    df,
    locations="Country Code",
    color="LLM_cost_burden_log",
    hover_name="Country Name",
    hover_data={"LLM_cost_burden_percent": ":.2f"},
    color_continuous_scale="Viridis",   # good for log data
    title="Log-scaled LLM Cost Burden (% of GDP per Capita for $20)",
    width=1800,
    height=1080,
)

fig.update_layout(
    geo=dict(showframe=False, showcoastlines=True, projection_type="equirectangular")
)

fig.show()