In [1]:
import pandas as pd
import numpy as np
import altair as alt
import eco_style
alt.themes.enable('report')
import pycountry

### Theme Config:

In [2]:
def e4e_basic():
    return {
        'config': {
            'axisX': {
                'labelPadding': 6,
                'tickOpacity': 0,
                'title': None,
                'grid': False,
                'ticks': False,
                'labelSeparation': 10
            },
            'axisY': {
                'title': None,
                'grid': False,
                'tickOpacity': 0,
            },
            'title': {
                'fontSize': 16,
                'subtitleFontSize': 12,
                'subtitleFontStyle': 'italic',
                'anchor': 'start',
                'color': 'black'
            },
            'view': {
                'stroke': 'transparent'
            }
        }
    }

# Register the custom theme under a chosen name
alt.themes.register('e4e_basic', e4e_basic)

# Enable the theme
alt.themes.enable('e4e_basic')

ThemeRegistry.enable('e4e_basic')

### debt/gdp data - from IMF
data from https://www.imf.org/external/datamapper/GGXWDG_NGDP@WEO/OEMDC/ADVEC/WEOWORLD

In [3]:
debt_gdp_df = pd.read_csv("data/imf_central_gov_debt_gdp.csv")

debt_gdp_df = debt_gdp_df.iloc[1:]

debt_gdp_df = debt_gdp_df.rename(columns={"General government gross debt (Percent of GDP)": "country"})
debt_gdp_df = debt_gdp_df.melt(id_vars=["country"], var_name="year", value_name="debt_gdp")
debt_gdp_df["debt_gdp"] = pd.to_numeric(debt_gdp_df["debt_gdp"], errors="coerce")
debt_gdp_df = debt_gdp_df.dropna()
debt_gdp_df["debt_gdp"] = debt_gdp_df["debt_gdp"] / 100

### GDP - from imf
https://www.imf.org/external/datamapper/NGDPD@WEO/WEOWORLD

In [4]:
gdp_df = pd.read_csv("data/imf_gdp_nominal.csv")
gdp_df = gdp_df.iloc[1:]

gdp_df = gdp_df.rename(columns={"GDP, current prices (Billions of U.S. dollars)": "country"})
gdp_df = gdp_df.melt(id_vars=["country"], var_name="year", value_name="gdp")
gdp_df["gdp"] = pd.to_numeric(gdp_df["gdp"], errors="coerce")
gdp_df = gdp_df.dropna()
gdp_df

Unnamed: 0,country,year,gdp
1,Albania,1980,1.946
2,Algeria,1980,42.346
4,Angola,1980,6.639
5,Antigua and Barbuda,1980,0.131
6,Argentina,1980,233.696
...,...,...,...
11263,Major advanced economies (G7),2028,56789.445
11264,Middle East and Central Asia,2028,6066.951
11265,Other advanced economies,2028,10975.395
11266,Sub-Saharan Africa,2028,2875.895


### Merge

In [5]:
df = pd.merge(debt_gdp_df, gdp_df, on=["country", "year"])
df["debt"] = df["debt_gdp"] * df["gdp"]

df['year'] = pd.to_numeric(df['year'])
df.query("year == 2023").sort_values("debt", ascending=False).head(10)

df['country'] = df['country'].str.strip()

countries = df["country"].unique()

iso3s = {}

manual_iso3s = {
    "Congo, Republic of": "COG",
    "China, People's Republic of": "CHN",
    "Bahamas, The": "BHS",
    "Taiwan Province of China": "TWN",
    "Congo, Dem. Rep. of the": "COD",
    "Gambia, The": "GMB",
    "Türkiye, Republic of": "TUR",
    "South Sudan, Republic of": "SSD",
    "Hong Kong SAR": "HKG",
    "Macao SAR": "MAC",
    "Nigeria": "NGA",
    "Niger": "NER",
}





for country in countries:
    try:
        if country in manual_iso3s:
            iso3s[country] = manual_iso3s[country.strip()]
        else:
            iso3s[country] = pycountry.countries.search_fuzzy(country)[0].alpha_3
    except:
        print(f"'{country}' not found", end=", ")

df["iso3"] = df["country"].map(iso3s)
df = df.dropna(subset=["iso3"])

df['short_country'] = df.iso3.apply(lambda x: pycountry.countries.get(alpha_3=x).name)

short_manual = {
    "IRN": "Iran",
    "COD": "DR Congo",
    "RUS" : "Russia",
    
}

df['short_country'] = df.apply(lambda x: short_manual.get(x['iso3'], x['short_country']), axis=1)

'Australia and New Zealand' not found, 'Western Europe' not found, 'Pacific Islands' not found, 'South Asia' not found, 'Micronesia, Fed. States of' not found, 'East Asia' not found, 'Euro area' not found, 'European Union' not found, 'Europe' not found, 'Caribbean' not found, 'Eastern Europe' not found, 'Middle East and Central Asia' not found, 'West Bank and Gaza' not found, 'Africa (Region)' not found, 'Asia and Pacific' not found, 'Central America' not found, 'Middle East (Region)' not found, 'South America' not found, 'Southeast Asia' not found, 'Sub-Saharan Africa (Region)' not found, 'ASEAN-5' not found, 'Emerging and Developing Asia' not found, 'Emerging and Developing Europe' not found, 'Emerging market and developing economies' not found, 'Latin America and the Caribbean' not found, 'Other advanced economies' not found, 'Sub-Saharan Africa' not found, 'Lao P.D.R.' not found, 'North America' not found, 'Western Hemisphere (Region)' not found, 'Advanced economies' not found, 'Ma

#### Chart: Top 10 Debt by GDP, 2023

In [6]:
temp_df = df.query("year == 2023").sort_values("debt", ascending=False).head(10)

temp_df["debt"] = temp_df["debt"] / 1_000

base = alt.Chart(temp_df).encode(
    x=alt.X("debt:Q", title="", axis=alt.Axis(labelExpr=" '$'+datum.value+'T'")),
    y=alt.Y("short_country:N", title="Country", sort="-x", axis=alt.Axis(title=None)),
)    

bars = base.mark_bar(color="rgb(9, 32, 95)")

chart = bars

chart = chart.properties(
    title = alt.TitleParams(
            "Government Debt",
    subtitle=["Central Government Debt, Current USD", "Source: IMF", ""],
    ),

    width=400,
    height=300
)

chart.display()

chart.save("charts/nominal_debt/10_countries_highest_debt_2023.json")
chart.save("charts/nominal_debt/10_countries_highest_debt_2023.png", scale_factor=3.0)
chart.save("charts/nominal_debt/10_countries_highest_debt_2023.svg")



  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


### P21

In [9]:
p21_iso3s = ["BGD", "BRA", "CHN", "COD", "EGY", "ETH", "GBR", "DEU", "IND", "IDN", "IRN", "JPN", "MEX", "NGA", "PAK", "PHL", "RUS", "THA", "TUR", "USA", "VNM"]

temp_df = df.query("iso3 in @p21_iso3s")
temp_df["debt"] = temp_df["debt"] / 1_000

# add a rank column by 2023 debt
rank_df = temp_df.query("year == 2023").sort_values("debt", ascending=False).reset_index(drop=True)
rank_df["rank"] = rank_df.index + 1

temp_df = pd.merge(temp_df, rank_df[["iso3", "rank"]], on="iso3")

slider = alt.binding_range(min=2003, max=2023, step=1, name='Year:')
op_year = alt.param(value=2023, bind=slider)

base = alt.Chart(temp_df).encode(
    x=alt.X("debt:Q", title="", axis=alt.Axis(labelExpr=" '$'+datum.value+'T'")),
    y=alt.Y("short_country:N", title="Country", axis=alt.Axis(title=None), sort=alt.SortField(field="rank")),
).transform_filter(
    alt.datum.year == op_year
).add_params(op_year)

bars = base.mark_bar(color="rgb(9, 32, 95)")

# add a large text in the middle with the year

labels = base.mark_text(
    align='left',
    baseline='middle',
    dx=10,
    dy=0,
    fontSize=12,
    fontWeight="normal",
    text=alt.expr("'$' + format(datum.debt, '.1f') + 'T'"),
    color="rgb(9, 32, 95)",
    opacity=1
).encode(
    size=alt.value(12)
)

year_label_df = pd.DataFrame([{"year": y} for y in range(2003, 2024)])



text = alt.Chart(year_label_df).mark_text(
    align='center',
    baseline='middle',
    dx=0,
    dy=0,
    fontSize=64,
    fontWeight="bold",
    color="black",
    opacity=0.3
).encode(
    text="year:N"
).transform_filter(
    alt.datum.year == op_year
)



chart = bars + text + labels

chart = chart.properties(
    title = alt.TitleParams(
            "Government Debt",
    subtitle=["P21 Countries, Current USD", "Source: IMF", ""],
    ),
    width=400,
    height=300
)

chart.display()

chart.save("charts/nominal_debt/p21_countries_debt_2023_slider.json")
chart.save("charts/nominal_debt/p21_countries_debt_2023_slider.png", scale_factor=3.0)
chart.save("charts/nominal_debt/p21_countries_debt_2023_slider.svg")

# and saving without the text
textless = (bars + labels).properties(
    title = alt.TitleParams(
            "Government Debt",
    subtitle=["2023, P21 Countries, Current USD", "Source: IMF", ""],
    ),
    width=400,
    height=300
)

textless.save("charts/nominal_debt/p21_countries_debt_2023.json")
textless.save("charts/nominal_debt/p21_countries_debt_2023.png", scale_factor=3.0)
textless.save("charts/nominal_debt/p21_countries_debt_2023.svg")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df["debt"] = temp_df["debt"] / 1_000
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
WARN Domains that should be unioned has conflicting sort properties. Sort will be set to true.
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
WARN Domains that should be unioned has conflicting sort properties. Sort will be set to true.
  col = df[col_nam

In [25]:
p21_iso3s = ["BGD", "BRA", "CHN", "COD", "EGY", "ETH", "GBR", "DEU", "IND", "IDN", "IRN", "JPN", "MEX", "NGA", "PAK", "PHL", "RUS", "THA", "TUR", "USA", "VNM"]

temp_df = df.query("iso3 in @p21_iso3s")
temp_df["debt"] = temp_df["debt"] / 1_000

# add a rank column by 2023 debt
rank_df = temp_df.query("year == 2023").sort_values("debt", ascending=False).reset_index(drop=True)
rank_df["rank"] = rank_df.index + 1

temp_df = pd.merge(temp_df, rank_df[["iso3", "rank"]], on="iso3")

slider = alt.binding_range(min=2003, max=2023, step=1, name='Year:')
op_year = alt.param(value=2023, bind=slider)

base = alt.Chart(temp_df).encode(
    x=alt.X("debt:Q", title="", axis=alt.Axis(labelExpr=" '$'+datum.value+'T'")),
    y=alt.Y("short_country:N", title="Country", axis=alt.Axis(title=None, format=""), sort=alt.SortField(field="rank")),
).transform_filter(
    alt.datum.year == op_year
).add_params(op_year)

bars = base.mark_bar(color="#36B7B4")

# add a large text in the middle with the year

labels = base.mark_text(
    align='left',
    baseline='middle',
    dx=10,
    dy=0,
    fontSize=12,
    fontWeight="normal",
    text=alt.expr("'$' + format(datum.debt, '.1f') + 'T'"),
    color="#36B7B4",
    opacity=1
).encode(
    size=alt.value(12)
)

year_label_df = pd.DataFrame([{"year": y} for y in range(2003, 2024)])



text = alt.Chart(year_label_df).mark_text(
    align='center',
    baseline='middle',
    dx=0,
    dy=0,
    fontSize=64,
    fontWeight="bold",
    color="#36B7B4",
    opacity=0.3
).encode(
    text="year:N"
).transform_filter(
    alt.datum.year == op_year
)



chart = bars + text + labels

chart = chart.properties(
    title = alt.TitleParams(
            "Government Debt",
    subtitle=["P21 Countries, Current USD", "Source: IMF", ""],
    subtitleColor="rgb(133, 150, 154)",
    align="left",
    anchor="start",
    fontSize=16,
    subtitleFontSize=12,
    ),
    width=400,
    height=300
)

chart.display()

chart.save("charts/nominal_debt/p21_countries_debt_2023_slider_e4e_dark.json")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df["debt"] = temp_df["debt"] / 1_000
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [128]:
temp_df

Unnamed: 0,country,year,debt_gdp,gdp,debt,iso3,short_country,rank
0,Japan,1980,0.478,1127.876,0.539125,JPN,Japan,3
1,Japan,1981,0.529,1243.790,0.657965,JPN,Japan,3
2,Japan,1982,0.578,1157.601,0.669093,JPN,Japan,3
3,Japan,1983,0.636,1268.620,0.806842,JPN,Japan,3
4,Japan,1984,0.656,1345.204,0.882454,JPN,Japan,3
...,...,...,...,...,...,...,...,...
706,Bangladesh,2024,0.397,455.162,0.180699,BGD,Bangladesh,16
707,Bangladesh,2025,0.399,511.790,0.204204,BGD,Bangladesh,16
708,Bangladesh,2026,0.405,576.490,0.233478,BGD,Bangladesh,16
709,Bangladesh,2027,0.412,651.691,0.268497,BGD,Bangladesh,16


### Data for Cartogram

In [144]:
temp_df = df.query("year == 2023")
# get numeric iso3
temp_df["id"] = temp_df["iso3"].apply(lambda x: int(pycountry.countries.get(alpha_3=x).numeric))
temp_df.to_csv("data/nominal_debt_2023_with_ids.csv", index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df["id"] = temp_df["iso3"].apply(lambda x: int(pycountry.countries.get(alpha_3=x).numeric))


In [145]:
def get_iso3(country):
    manual_iso3s = {
    "Congo, Republic of": "COG",
    "China, People's Republic of": "CHN",
    "Bahamas, The": "BHS",
    "Taiwan Province of China": "TWN",
    "Congo, Dem. Rep. of the": "COD",
    "Gambia, The": "GMB",
    "Türkiye, Republic of": "TUR",
    "South Sudan, Republic of": "SSD",
    "Hong Kong SAR": "HKG",
    "Macao SAR": "MAC",
    "Nigeria": "NGA",
    "Niger": "NER",
    "Antigua and Barb.": "ATG",
    "Bosnia and Herz.": "BIH",
    "Cayman Is.": "CYM",
    "Central African Rep.": "CAF",
    "Dem. Rep. Congo": "COD",
    "Dominican Rep.": "DOM",
    "Eq. Guinea": "GNQ",
    "Laos": "LAO",
    "N. Cyprus": "CYP",
    "S. Sudan": "SSD",
    "Solomon Is.": "SLB",
}
    if country in manual_iso3s:
        return manual_iso3s[country]
    try:
        return pycountry.countries.search_fuzzy(country)[0].alpha_3
    except:
        print(f"Could not find iso3 for {country}", end=", ")
        return None

carto_df = pd.read_csv("data/cartogram_template.csv")
carto_df['iso3'] = carto_df.Country.apply(lambda x: get_iso3(x))

#carto_df = pd.merge(carto_df, df.query("year == 2023")[["iso3", "debt"]], on="iso3", how="left")

Could not find iso3 for Br. Indian Ocean Ter., Could not find iso3 for British Virgin Is., Could not find iso3 for Cook Is., Could not find iso3 for Faeroe Is., Could not find iso3 for Falkland Is., Could not find iso3 for Fr. Polynesia, Could not find iso3 for Fr. S. Antarctic Lands, Could not find iso3 for Heard I. and McDonald Is., Could not find iso3 for Marshall Is., Could not find iso3 for N. Mariana Is., Could not find iso3 for Pitcairn Is., Could not find iso3 for S. Geo. and the Is., Could not find iso3 for Siachen Glacier, Could not find iso3 for Somaliland, Could not find iso3 for St-Barthelemy, Could not find iso3 for St-Martin, Could not find iso3 for St. Kitts and Nevis, Could not find iso3 for St. Pierre and Miquelon, Could not find iso3 for St. Vin. and Gren., Could not find iso3 for Turks and Caicos Is., Could not find iso3 for U.S. Minor Outlying Is., Could not find iso3 for U.S. Virgin Is., Could not find iso3 for W. Sahara, Could not find iso3 for Wallis and Futuna 

In [146]:
pd.merge(df.query("year == 2023")[["iso3", "debt"]], carto_df, on="iso3", how="right").dropna(subset=["debt"])[["Country", "debt", "Population", "Colour"]].dropna(subset="debt").to_csv("data/cartogram_debt.csv", index=False)

In [111]:
carto_df

Unnamed: 0,Country,Population,GDP,Colour,iso3
0,Afghanistan,34124811,2.068200e+10,#7570b3,AFG
1,Aland,27153,,#d95f02,ALA
2,Albania,3047987,1.721000e+10,#1b9e77,ALB
3,Algeria,40969443,1.930000e+11,#1b9e77,DZA
4,American Samoa,51504,,#e6ab02,ASM
...,...,...,...,...,...
237,W. Sahara,603253,,#e6ab02,
238,Wallis and Futuna Is.,15714,,#1b9e77,
239,Yemen,28036829,3.138500e+10,#e7298a,YEM
240,Zambia,15972000,2.527200e+10,#7570b3,ZMB


In [70]:
temp_df.query("year == 2023").sort_values("iso3")

Unnamed: 0,country,year,debt_gdp,gdp,debt,iso3,short_country
6146,Brazil,2023,0.881,2126.809,1.873719,BRA,Brazil
6154,Canada,2023,1.064,2117.805,2.253345,CAN,Canada
6158,"China, People's Republic of",2023,0.83,17700.899,14.691746,CHN,China
6185,Germany,2023,0.659,4429.838,2.919263,DEU,Germany
6181,France,2023,1.1,3049.016,3.353918,FRA,France
6301,United Kingdom,2023,1.041,3332.059,3.468673,GBR,United Kingdom
6198,India,2023,0.819,3732.224,3.056691,IND,India
6204,Italy,2023,1.437,2186.082,3.1414,ITA,Italy
6206,Japan,2023,2.552,4230.862,10.79716,JPN,Japan
6302,United States,2023,1.233,26949.643,33.22891,USA,United States


In [72]:
temp_df.groupby("year").agg({"debt": "count"})

Unnamed: 0_level_0,debt
year,Unnamed: 1_level_1
1980,2
1981,2
1982,2
1983,2
1984,2
1985,2
1986,2
1987,2
1988,2
1989,2


In [18]:
df.query("year == 2023").sort_values(by="debt", ascending=False).head(10)

Unnamed: 0,country,year,debt_gdp,gdp,debt,iso3
6302,United States,2023,123.3,26949.643,3322891.0,USA
6158,"China, People's Republic of",2023,83.0,17700.899,1469175.0,CHN
6206,Japan,2023,255.2,4230.862,1079716.0,JPN
6301,United Kingdom,2023,104.1,3332.059,346867.3,GBR
6181,France,2023,110.0,3049.016,335391.8,FRA
6204,Italy,2023,143.7,2186.082,314140.0,ITA
6198,India,2023,81.9,3732.224,305669.1,IND
6185,Germany,2023,65.9,4429.838,291926.3,DEU
6154,Canada,2023,106.4,2117.805,225334.5,CAN
6146,Brazil,2023,88.1,2126.809,187371.9,BRA


In [15]:
gdp_df

Unnamed: 0,"GDP, current prices (Billions of U.S. dollars)",1980,1981,1982,1983,1984,1985,1986,1987,1988,...,2019,2020,2021,2022,2023,2024,2025,2026,2027,2028
1,Afghanistan,no data,no data,no data,no data,no data,no data,no data,no data,no data,...,18.876,20.136,14.941,no data,no data,no data,no data,no data,no data,no data
2,Albania,1.946,2.229,2.296,2.319,2.29,2.339,2.587,2.566,2.53,...,15.399,15.192,17.984,19.083,23.032,25.297,26.361,27.795,29.532,31.436
3,Algeria,42.346,44.372,44.78,47.529,51.513,61.132,61.535,63.3,51.664,...,171.673,145.656,163.138,195.06,224.107,239.209,247.742,255.315,259.347,262.804
4,Andorra,no data,no data,no data,no data,no data,no data,no data,no data,no data,...,3.155,2.885,3.325,3.352,3.692,3.919,4.07,4.205,4.329,4.461
5,Angola,6.639,6.214,6.214,6.476,6.864,8.457,7.918,9.05,9.818,...,84.516,57.139,74.861,122.781,93.796,92.925,96.895,100.785,106.43,111.822
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226,Other advanced economies,775.274,790.86,785.664,784.097,821.924,819.463,999.432,1230.294,1450.5,...,7451.982,7404.808,8568.574,8581.514,8800.731,9147.297,9605.214,10061.637,10511.283,10975.395
227,Sub-Saharan Africa,317.456,335.793,338.583,311.806,247.335,225.926,257.466,279.882,301.615,...,1768.224,1672.529,1886.949,2013.032,1957.231,2084.818,2271.584,2458.237,2654.768,2875.895
228,World,11232.069,11526.241,11310.303,11610.283,12025.117,12553.732,14793.158,17032.786,19156.083,...,87325.54,84960.903,96487.666,100135.361,104476.432,109734.27,115598.839,121580.994,127499.667,133782.888
229,,,,,,,,,,,...,,,,,,,,,,
