In [5]:
import pandas as pd
import json

In [6]:
def clean_data(df):
    df["PROV"] = df["idep"] + df["iprov"]
    df["MUN"] = df["PROV"] + df["imun"]
    df.columns = df.columns.str.upper()
    df = df.rename(
        columns={
            "IDEP": "DEPAR",
            "I00": "N_VIV",
        }
    )
    df = df.drop(columns=["IMUN", "IPROV"])
    df = df[
        [
            "DEPAR",
            "PROV",
            "MUN",
            "N_VIV",
            "M212A_MES",
            "M212B_AN",
            "M213_EDAD",
            "M214_COV",
            "M215_SEXO",
            "M216_PARTO",
        ]
    ]
    df = df.sort_values(by=["DEPAR", "PROV", "MUN", "N_VIV"])
    return df


df = pd.read_csv(r"Mortalidad_CPV-2024.csv", dtype=str, sep=";", low_memory=False)

df_clean = clean_data(df.copy())
df_clean.to_csv("mortalidad.csv", index=False)

In [7]:
fields_path = "../diccionario/fields.json"

In [8]:
with open(fields_path, "r", encoding="utf-8") as f:
    fields = json.load(f)

necessary_columns = df_clean.columns.tolist()
fields = [field for field in fields if field.get("name") in necessary_columns]

# Crear el datapackage
datapackage = {
    "name": "mortalidad",
    "resources": [
        {
            "name": "Base de Datos de Mortalidad del Censo de Población y Vivienda",
            "path": "mortalidad.csv",
            "schema": {"fields": fields},
        }
    ],
}

# Guardar el datapackage en un archivo JSON
with open("datapackage.json", "w", encoding="utf-8") as f:
    json.dump(datapackage, f, ensure_ascii=False, indent=4)