In [None]:
import pandas as pd
import country_converter as coco


from dfpp.transformation.column_name_template import (
    CANONICAL_COLUMN_NAMES,
    DIMENSION_COLUMN_PREFIX,
    SERIES_PROPERTY_PREFIX,
    sort_columns_canonically,
    ensure_canonical_columns,
)
from dfpp.publishing import publish_series
from dfpp.transformation.value_handler import handle_value

cc = coco.CountryConverter()

In [None]:
URL = "https://energydata.info/dataset/b33e5af4-bd51-4ee0-a062-29438471db27/resource/6938ec3a-f7bb-4493-86ba-f28faa62f139/download/eleccap_20220404-201215.xlsx"
df_source = pd.read_excel(URL, header=1)

In [None]:
df = df_source.copy()

In [None]:
df.columns = [
    "country",
    DIMENSION_COLUMN_PREFIX + "energy_technology",
    DIMENSION_COLUMN_PREFIX + "grid_connection",
    "year",
    "value",
]
df.ffill(inplace=True)

df["value"] = df["value"].replace({"..": None})
df["alpha_3_code"] = cc.pandas_convert(df["country"], to="ISO3")
df = df[df["alpha_3_code"] != "not found"].reset_index(drop=True)

df["year"] = df["year"].astype(int)

df[SERIES_PROPERTY_PREFIX + "unit"] = "Megawatt"
df["value"] = df["value"].astype("float")
series_id = "irena_eleccap"

df["source"] = "https://energydata.info/"
df["series_id"] = "irena_eleccap"
df["series_name"] = (
    """Installed electricity capacity by country/area (MW) by Country/area, Technology, Grid connection and Year"""
)

to_select_columns = [
    col
    for col in df.columns
    if any(
        [
            col.startswith(DIMENSION_COLUMN_PREFIX),
            col.startswith(SERIES_PROPERTY_PREFIX),
        ]
    )
    and col not in CANONICAL_COLUMN_NAMES
]
df[["value", SERIES_PROPERTY_PREFIX + "value_label"]] = df.apply(handle_value, axis=1, result_type="expand")
df = ensure_canonical_columns(df)
df = df[CANONICAL_COLUMN_NAMES + to_select_columns]
df = sort_columns_canonically(df)

In [None]:
await publish_series(series_id=series_id, df_series=df, source_folder="energydata_info")