In [None]:
import pandas as pd
import country_converter as coco


from dfpp.transformation.column_name_template import (
    CANONICAL_COLUMN_NAMES,
    DIMENSION_COLUMN_PREFIX,
    SERIES_PROPERTY_PREFIX,
    sort_columns_canonically,
    ensure_canonical_columns,
)
from dfpp.transformation.value_handler import handle_value
from dfpp.publishing import publish_series

cc = coco.CountryConverter()

In [None]:
URL = "https://www.sipri.org/sites/default/files/SIPRI-Milex-data-1948-2023.xlsx"
df_source = pd.read_excel(URL, sheet_name="Share of Govt. spending", header=7)

In [None]:
df = df_source.copy()

In [None]:
remap_values = {". .": pd.NA, "xxx": pd.NA, "...": pd.NA}
df.replace(remap_values, inplace=True)
df.dropna(axis=0, how="all", inplace=True)
df["alpha_3_code"] = cc.pandas_convert(df["Country"], to="ISO3")
df = df[df["alpha_3_code"] != "not found"].reset_index(drop=True)
df = df.melt(
    id_vars=["alpha_3_code", "Notes", "Country"], var_name="year", value_name="value"
)
df = df[df["year"] != "Reporting year"].reset_index(drop=True)
df["value"] = df.value.astype("float", errors="ignore")
df[SERIES_PROPERTY_PREFIX + "unit"] = "percentage of government spending"
df["series_name"] = (
    """Military expenditure by country as percentage of government spending, 1948-2023 Â© SIPRI 2023"""
)
df["source"] = "https://www.sipri.org/"
series_id = "countrywisemilitaryexpenditure_sipri"
df["series_id"] = series_id
to_select_columns = [
    col
    for col in df.columns
    if any(
        [
            col.startswith(DIMENSION_COLUMN_PREFIX),
            col.startswith(SERIES_PROPERTY_PREFIX),
        ]
    )
    and col not in CANONICAL_COLUMN_NAMES
]
df[["value", SERIES_PROPERTY_PREFIX + "value_label"]] = df.apply(
    handle_value, axis=1, result_type="expand"
)
df = ensure_canonical_columns(df)
df = df[CANONICAL_COLUMN_NAMES + to_select_columns]
df = sort_columns_canonically(df)

In [None]:
await publish_series(series_id=series_id, df_series=df, source_folder="sipri_org")