In [1]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Project:           EU-S A2J Codebook
##
## Script:            Codebook in Python (Jupyter Notebook)
##
## Author(s):         A. Santiago Pardo G.        (spardo@worldjusticeproject.org)
##
## Dependencies:      World Justice Project
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# 1. Required Packages                                                                  
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

import os
import pandas as pd
from IPython.display import display, Markdown
import numpy as np
from copy import deepcopy

# Mostrar título del proyecto en Markdown
display(Markdown("## 📌 EU-S A2J Codebook"))

def avg_estimation(df, info_col):
    """
    Function to calculate the weighted access to information at the country level.

    Parameters:
    - df (pd.DataFrame): The input DataFrame containing data.
    - info_col (str): Column name representing access to information.

    Returns:
    - pd.DataFrame: Aggregated DataFrame with `access2info` calculated at the country level.
    """

    # Step 1: Group by 'country_name_ltn' and 'nuts_id', computing mean values
    grouped_df = (
        df
        .groupby(["country_name_ltn", "nuts_id"], as_index=False)
        .agg(
            pop_weight=("regionpoppct", lambda x: np.nanmean(x)),  # Mean of pop_weight
            info_col=(info_col, lambda x: np.nanmean(x))   # Mean of access2info
        )
    )

    # Step 2: Multiply 'access2info' by 'pop_weight'
    grouped_df["info_col"] = grouped_df["info_col"] * grouped_df["pop_weight"]

    # Step 3: Aggregate at the 'country_name_ltn' level, summing 'access2info'
    final_df = (
        grouped_df
        .groupby("country_name_ltn", as_index=False)
        .agg(final_value=("info_col", lambda x: np.nansum(x)))  # Sum, ignoring NaN
    )

    return final_df


## 📌 EU-S A2J Codebook

In [None]:
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# 2. SharePoint Path                                                                    
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Obtener el usuario actual del sistema
user = os.getenv("USER") or os.getenv("USERNAME")

# Definir las rutas de SharePoint según el usuario
eu_paths = {
    "santiagopardo": "/Users/santiagopardo/Library/CloudStorage/OneDrive-WorldJusticeProject/EU Subnational/EU-S Data",
}

# Seleccionar la ruta según el usuario, con un mensaje si no está definido
path2eu = eu_paths.get(user, None)

if path2eu:
    display(Markdown(f"🔹 **EU Subnational Path:** `{path2eu}`"))
else:
    display(Markdown("🚨 **Error:** Ruta de SharePoint no encontrada. Revisa la configuración del usuario."))



In [None]:
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# 3. Cargar Base de Datos                                                               
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Definir la subcarpeta y el archivo específico
sub_path = "eu-gpp/1. Data/3. Merge"
GPP_file = "EU_GPP_2024.dta"

weights_path = "reports/eu-thematic-reports/data-viz/inputs"
weigths_file = "region_labels.xlsx"

# Construir la ruta completa
file_path = os.path.abspath(os.path.join(path2eu, sub_path, GPP_file)) if path2eu else None
file_weights_path = os.path.abspath(os.path.join(path2eu, weights_path, weigths_file)) if path2eu else None

# Mostrar la ruta generada
if file_path:
    display(Markdown(f"🔹 **Ruta generada:** `{file_path}`, `{file_weights_path}`"))
else:
    display(Markdown("🚨 **Error:** No se pudo construir la ruta del archivo."))

# Verificar si el archivo existe antes de intentar cargarlo
if file_path and os.path.exists(file_path):
    display(Markdown("✅ **Archivo encontrado. Cargando datos...**"))

    try:
        # Cargar el archivo .dta (Stata)
        df = pd.read_stata(file_path)

        # Mostrar primeras filas y estructura del dataset
        display(Markdown("### 📊 Primeras filas del dataset:"))
        display(df.head())

        display(Markdown("### 📋 Información del dataset:"))
        display(Markdown(f"- **Número de filas:** {df.shape[0]}"))
        display(Markdown(f"- **Número de columnas:** {df.shape[1]}"))

    except Exception as e:
        display(Markdown(f"❌ **Error al cargar el archivo:** `{e}`"))

else:
    display(Markdown("🚨 **Error:** No se encontró el archivo en la ruta especificada."))
    
# Verificar si el archivo existe antes de intentar cargarlo
if file_path and os.path.exists(file_weights_path):
    display(Markdown("✅ **Archivo encontrado. Cargando datos...**"))

    try:
        # Cargar el archivo .dta (Stata)
        region_weights = pd.read_excel(file_weights_path)

        # Mostrar primeras filas y estructura del dataset
        display(Markdown("### 📊 Primeras filas del dataset:"))
        display(region_weights.head())

        display(Markdown("### 📋 Información del dataset:"))
        display(Markdown(f"- **Número de filas:** {region_weights.shape[0]}"))
        display(Markdown(f"- **Número de columnas:** {region_weights.shape[1]}"))

    except Exception as e:
        display(Markdown(f"❌ **Error al cargar el archivo:** `{e}`"))

else:
    display(Markdown("🚨 **Error:** No se encontró el archivo en la ruta especificada."))
    
    
df = df.merge(
    region_weights.rename(columns = {"country": "country_name_ltn"}),
    on = ["country_name_ltn", "nuts_id"],
    how = "left"
)

display(Markdown("### 📋 Información del dataset:"))
display(Markdown(f"- **Número de filas:** {df.shape[0]}"))
display(Markdown(f"- **Número de columnas:** {df.shape[1]}"))
display(Markdown(f"- **Columnas pegadas:** {df[['nuts_id', 'regionpop']].drop_duplicates().to_string(index=False)}"))

In [None]:
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# 4. Análisis de legal needs survey                                                              
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

df


In [None]:
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# Problem prevalence                                                            
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

master_data = df.copy()

legal_problems = [
    "A1", "A2", "A3", 
    "B1", "B2", "B3", "B4", 
    "C1", "C2", "C3", "C4", 
    "D1", "D2", "D3", "D4", "D5", "D6", 
    "E1", "E2", "E3", 
    "F1", "F2", 
    "G1", "G2", "G3", 
    "H1", "H2", "H3", 
    "I1", 
    "J1", "J2", "J3", "J4", 
    "K1", "K2", "K3",
    "L1", "L2"
]

legprob_bin = [f"AJP_{lp}_bin" for lp in legal_problems]
legprob_sev = [f"AJP_{lp}_sev" for lp in legal_problems]

# Step 1: Convert legprob_bin columns to binary (1 if value == 1, otherwise 0)

master_data[legprob_bin] = master_data[legprob_bin].replace({"Yes": 1, "No": 0}).fillna(0).astype(int)


master_data[legprob_bin] = master_data[legprob_bin].applymap(
    lambda x: 1 if x == 1 else (0 if x == 2 else np.nan)
)

# Step 2: Create 'legprob' column: 1 if any legprob_bin column is 1, otherwise 0

master_data["legprob"] = (master_data[legprob_bin].sum(axis=1) > 0).astype(int)

# Step 3: Convert legprob_sev columns:
# - 1 if value is between 4 and 97
# - 0 if value is less than 4
# - NaN otherwise

master_data[legprob_sev] = master_data[legprob_sev].applymap(
    lambda x: 1 if 4 <= x < 98 else (0 if x < 4 else np.nan)
)
# Step 4: Create 'legprob_sev' column: 1 if any legprob_sev column is 1, otherwise 0

master_data["legprob_sev"] = (master_data[legprob_sev].sum(axis=1) > 0).astype(int)

# Step 5: Apply the function of estimation

legprob_sev = avg_estimation(master_data, "legprob_sev")

display(legprob_sev)


In [None]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Access to proper information and advice
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode 'AJE_infosource' into 'access2info'

# Create a new column `access2info` based on `AJE_infosource`:
# - Assign 1 if the person was able to access information (values 1 or 2)
# - Assign 0 if the person was NOT able to access information (values 3, 4, or 98)
# - Assign NaN (missing value) for all other cases

A2J_problems["access2info"] = A2J_problems["AJE_infosource"].apply(
    lambda x: 1 if x in ['Agree', 'Strongly agree'] 
    else (0 if x in ['Disagree', 'Strongly disagree', "Don't know"] else np.nan)
)

# Step 3: Apply the function of estimation

access2info = avg_estimation(A2J_problems, "access2info")

display(access2info)


In [None]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Access to proper representation
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Replace `99` in `AJD_noadvice_reason` with NaN

A2J_problems["AJD_noadvice_reason"] = A2J_problems["AJD_noadvice_reason"].replace("No answer", np.nan)

# Step 3 : Recode 'AJD_noadvice_reason' into 'access2rep'

A2J_problems["access2rep"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. Able to access advice from a formal source --> not in gap
        (
            row["AJD_adviser_2"] == 'Yes' or 
            row["AJD_adviser_3"] == 'Yes' or
            row["AJD_adviser_4"] == 'Yes' or 
            row["AJD_adviser_5"] == 'Yes' or
            row["AJD_adviser_6"] == 'Yes' or 
            row["AJD_adviser_8"] == 'Yes'
        ) and (
            row["AJD_inst_advice"] == 'Yes'
        )
    ) else 1 if (
        # 2. Did not access advice because problem was not important --> not in gap
        (row["AJD_noadvice_reason"] in ["I thought the issue was not important or not difficult to resolve", 
                                       "I did not think I needed advice"])
        and 
        (row["AJD_inst_advice"] == 'No')
    ) else 1 if (
        # 3. If friend or family has a legal background --> not in gap
        row["AJD_inst_advice"] == 'Yes' 
        and 
        row["AJD_adviser_1"] == 'Yes' 
        and 
        row["AJD_expert_adviser"] == 'Yes'
    ) else 0 if (
        # 4. Accessed advice from a friend, religious org, or other --> in the justice gap
        (
            row["AJD_adviser_1"]  == 'Yes' or 
            row["AJD_adviser_7"]  == 'Yes' or 
            row["AJD_adviser_9"]  == 'Yes' or 
            row["AJD_adviser_98"] == 'Yes'
        )
        and 
        (
            row["AJD_inst_advice"] == 'Yes'
        )
    ) else 0 if (
        # 5. If reason for not seeking advice falls into specific categories --> in the justice gap
        row["AJD_noadvice_reason"] in ["Thought the other side was right", 
                                       "I was concerned about the financial cost", 
                                       "I had received help with a problem before and did not find it useful", 
                                       "I did not know who to call or where to get advice", 
                                       "I did not know I could get advice for this problem", 
                                       "Was scared to get advice", 
                                       "Advisers were too far away or it would take too much time", 
                                       "Other",
                                       "Don't know"] 
        and 
        row["AJD_inst_advice"] == "No"
    ) else 0 if (
        # 6. If AJD_inst_advice is 98 --> in the justice gap
        row["AJD_inst_advice"] == "Don't know"
    ) else np.nan,  # Default: Assign NaN if no condition matches
    axis=1
)


# Step 4: Apply the function of estimation
access2rep = avg_estimation(A2J_problems, "access2rep")

display(access2rep)


In [None]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Process Barriers: Timeliness
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode AJR_solving time

A2J_problems["rp_time"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. Problem resolved in one year or less 
        (0 <= row["AJR_solvingtime"] < 13) and
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"])
    ) else 0 if (
        # 2. Problem solved in more than one year 
        (row["AJR_solvingtime"] > 12) and
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"])
    ) else 0 if (
        # If solving time is -8888 
        row["AJR_solvingtime"] == -8888
    ) else np.nan if (
        # If solving time is -9999 → NA
        row["AJR_solvingtime"] == -9999
    ) else np.nan if (
        # 3. Problem ongoing → NA
        row["AJR_state_noresol"] in ["Ongoing", "Too early to say"] or 
        row["AJR_state_resol"] in ["Ongoing", "Too early to say"]
    ) else np.nan,  # Default: NA if no condition matches
    axis=1
)

# Step 3: Apply the function of estimation
rp_time = avg_estimation(A2J_problems, "rp_time")

display(rp_time)

In [None]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Process Barriers: Costliness
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode AJR_solvingcosts

A2J_problems["rp_cost"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. If you incurred costs, but they were easy to pay --> not in gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] 
         or row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_costdiff"] in ["Very easy", "Somewhat easy"]) and
        (row["AJR_solvingcosts"] == "Yes")
    ) else 0 if (
        # 2. If you incurred costs that were difficult to pay --> in the justice gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_costdiff"] in ["Difficult", "Nearly impossible", "Don't Know"]) and
        (row["AJR_solvingcosts"] == "Yes")
    ) else 1 if (
        # 3. You did not incur costs -- not in justice gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_solvingcosts"] == "No")
    ) else np.nan,  # Default: Assign NaN if no condition matches
    axis=1
)

# Step 3: Apply the function of estimation
rp_cost = avg_estimation(A2J_problems, "rp_cost")

display(rp_cost)

In [None]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Process Barriers: Fairness
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode AJR_fair

A2J_problems["rp_fair"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. If you incurred costs, but they were easy to pay --> not in gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"]   in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_fair"]          in ["Yes"]) 
    ) else 0 if (
        # 2. If you incurred costs that were difficult to pay --> in the justice gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"]   in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_fair"]          in ["No", "Don't know"]) 
    ) else np.nan,  # Default: Assign NaN if no condition matches
    axis=1
)

# Step 3: Apply the function of estimation
rp_fair = avg_estimation(A2J_problems, "rp_fair")

display(rp_fair)

In [None]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Status of Legal Problem: Outcomes
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode AJR_fair

A2J_problems["rp_outcome"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. If you incurred costs, but they were easy to pay --> not in gap
        (row["AJR_state_noresol"] in ["Done with, problem fully resolved"] or 
         row["AJR_state_resol"]   in ["Done with, problem fully resolved"])
    ) else 0 if (
        # 2. If you incurred costs that were difficult to pay --> in the justice gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists"] or 
         row["AJR_state_resol"]   in ["Done with, but problem persists"])
    ) else np.nan,  # Default: Assign NaN if no condition matches
    axis=1
)

# Step 3: Apply the function of estimation
rp_outcome = avg_estimation(A2J_problems, "rp_outcome")

display(rp_outcome)

In [None]:
unique_reasons = pd.DataFrame(A2J_problems["AJR_fair"].unique(), columns=["Unique Reasons"])
display(unique_reasons)
