In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os

import pandas as pd
from loguru import logger
from pyprojroot import here

os.chdir(here())

In [37]:
from municipal_performance_scraping.extractors import get_performance_data
from municipal_performance_scraping.transformers import json_to_df, transform_level, transform_performance_response

In [4]:
perf_json = get_performance_data(1313, 2018)

In [5]:
json_as_df = json_to_df(perf_json)

In [6]:
criterion_mask = json_as_df["sub_domain_id"].notna()
sub_domain_mask = (
    json_as_df["criterion_id"].isna() & json_as_df["sub_domain_id"].notna()
)
domain_mask = json_as_df["criterion_id"].isna() & json_as_df["sub_domain_id"].isna()

In [7]:
criteria_df = json_as_df.pipe(
    lambda df: transform_level(
        df,
        criterion_mask,
        id_var="criterion_id",
        parent_id_var="sub_domain_id",
        var_name_column="criterion_var_name",
    )
).rename(
    columns={
        "nom_ar": "name_ar",
        "nom_fr": "name_fr",
        "notecrit": "score",
        "crmaxnote": "max_score",
    }
)

In [8]:
sub_domains_df = json_as_df.pipe(
    lambda df: transform_level(
        df,
        sub_domain_mask,
        id_var="sub_domain_id",
        parent_id_var="domain_id",
        var_name_column="sub_domain_var_name",
    )
).rename(
    columns={
        "maxnote": "max_score",
        "nom_ar": "name_ar",
        "nom_fr": "name_fr",
        "note": "score",
    }
)

In [32]:
domains_df = json_as_df.pipe(
    lambda df: transform_level(
        df, domain_mask, id_var="domain_id", var_name_column="domain_var_name",
    )
)

In [35]:
standardized_column_names = {
        "nom_ar": "name_ar",
        "nom_fr": "name_fr",
        "notecrit": "score",
        "crmaxnote": "max_score",
        "maxnote": "max_score",
        "note": "score",
        "ar": "name_ar",
        "fr": "name_fr",
        "maxdom": "max_score",
        "notedom": "score",
    }
domains_df = domains_df.rename(columns=standardized_column_names)

In [36]:
final = pd.concat([domains_df, sub_domains_df, criteria_df])
final

Unnamed: 0,criterion_id,name_ar,name_fr,max_score,score,parent_id
0,10,الحوكمة,Gouvernance,30,21,
1,11,التصرف,Gestion,30,20,
2,12,الديمومة,Pérennité,40,40,
0,101,تطبيق المقاربة التشاركية,Approche Participative,12,8,10.0
1,102,الشفافية والنفاذ إلى المعلومة,Transparence et acces a l'information,8,8,10.0
2,103,آليات التصرف في الشكاوى,Mécanisme de gestion des plaintes,10,5,10.0
3,114,التصرف في الموارد البشرية,Ressources Humaines,10,10,11.0
4,115,التصرف في الموراد المالية\r\n,Gestion Financière,10,2,11.0
5,116,التصرف في الصفقات العمومية,Commandes Publiques\r\n,10,8,11.0
6,1210,التصرف البيئي والاجتماعي,Sauvegardes Sociales et Environnementales\r\n,8,8,12.0


In [39]:
transform_performance_response(perf_json)

Unnamed: 0,criterion_id,name_ar,name_fr,max_score,score,parent_id
0,10,الحوكمة,Gouvernance,30,21,
1,11,التصرف,Gestion,30,20,
2,12,الديمومة,Pérennité,40,40,
0,101,تطبيق المقاربة التشاركية,Approche Participative,12,8,10.0
1,102,الشفافية والنفاذ إلى المعلومة,Transparence et acces a l'information,8,8,10.0
2,103,آليات التصرف في الشكاوى,Mécanisme de gestion des plaintes,10,5,10.0
3,114,التصرف في الموارد البشرية,Ressources Humaines,10,10,11.0
4,115,التصرف في الموراد المالية\r\n,Gestion Financière,10,2,11.0
5,116,التصرف في الصفقات العمومية,Commandes Publiques\r\n,10,8,11.0
6,1210,التصرف البيئي والاجتماعي,Sauvegardes Sociales et Environnementales\r\n,8,8,12.0
