In [1]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Project:           EU-S A2J Codebook
##
## Script:            Codebook in Python (Jupyter Notebook)
##
## Author(s):         A. Santiago Pardo G.        (spardo@worldjusticeproject.org)
##
## Dependencies:      World Justice Project
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# 1. Required Packages                                                                  
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

import os
import pandas as pd
from IPython.display import display, Markdown
import numpy as np
from copy import deepcopy

# Mostrar título del proyecto en Markdown
display(Markdown("## 📌 EU-S A2J Codebook"))

def avg_estimation(df, info_col):
    """
    Function to calculate the weighted access to information at the country level.

    Parameters:
    - df (pd.DataFrame): The input DataFrame containing data.
    - info_col (str): Column name representing access to information.

    Returns:
    - pd.DataFrame: Aggregated DataFrame with `access2info` calculated at the country level.
    """

    # Step 1: Group by 'country_name_ltn' and 'nuts_id', computing mean values
    grouped_df = (
        df
        .groupby(["country_name_ltn", "nuts_id"], as_index=False)
        .agg(
            pop_weight=("regionpoppct", lambda x: np.nanmean(x)),  # Mean of pop_weight
            info_col=(info_col, lambda x: np.nanmean(x))   # Mean of access2info
        )
    )

    # Step 2: Multiply 'access2info' by 'pop_weight'
    grouped_df["info_col"] = grouped_df["info_col"] * grouped_df["pop_weight"]

    # Step 3: Aggregate at the 'country_name_ltn' level, summing 'access2info'
    final_df = (
        grouped_df
        .groupby("country_name_ltn", as_index=False)
        .agg(final_value=("info_col", lambda x: np.nansum(x)))  # Sum, ignoring NaN
    )

    return final_df


## 📌 EU-S A2J Codebook

In [2]:
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# 2. SharePoint Path                                                                    
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Obtener el usuario actual del sistema
user = os.getenv("USER") or os.getenv("USERNAME")

# Definir las rutas de SharePoint según el usuario
eu_paths = {
    "santiagopardo": "/Users/santiagopardo/Library/CloudStorage/OneDrive-WorldJusticeProject/EU Subnational/EU-S Data",
}

# Seleccionar la ruta según el usuario, con un mensaje si no está definido
path2eu = eu_paths.get(user, None)

if path2eu:
    display(Markdown(f"🔹 **EU Subnational Path:** `{path2eu}`"))
else:
    display(Markdown("🚨 **Error:** Ruta de SharePoint no encontrada. Revisa la configuración del usuario."))



🔹 **EU Subnational Path:** `/Users/santiagopardo/Library/CloudStorage/OneDrive-WorldJusticeProject/EU Subnational/EU-S Data`

In [3]:
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# 3. Cargar Base de Datos                                                               
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Definir la subcarpeta y el archivo específico
sub_path = "eu-gpp/1. Data/3. Merge"
GPP_file = "EU_GPP_2024.dta"

weights_path = "reports/eu-thematic-reports/data-viz/inputs"
weigths_file = "region_labels.xlsx"

# Construir la ruta completa
file_path = os.path.abspath(os.path.join(path2eu, sub_path, GPP_file)) if path2eu else None
file_weights_path = os.path.abspath(os.path.join(path2eu, weights_path, weigths_file)) if path2eu else None

# Mostrar la ruta generada
if file_path:
    display(Markdown(f"🔹 **Ruta generada:** `{file_path}`, `{file_weights_path}`"))
else:
    display(Markdown("🚨 **Error:** No se pudo construir la ruta del archivo."))

# Verificar si el archivo existe antes de intentar cargarlo
if file_path and os.path.exists(file_path):
    display(Markdown("✅ **Archivo encontrado. Cargando datos...**"))

    try:
        # Cargar el archivo .dta (Stata)
        df = pd.read_stata(file_path)

        # Mostrar primeras filas y estructura del dataset
        display(Markdown("### 📊 Primeras filas del dataset:"))
        display(df.head())

        display(Markdown("### 📋 Información del dataset:"))
        display(Markdown(f"- **Número de filas:** {df.shape[0]}"))
        display(Markdown(f"- **Número de columnas:** {df.shape[1]}"))

    except Exception as e:
        display(Markdown(f"❌ **Error al cargar el archivo:** `{e}`"))

else:
    display(Markdown("🚨 **Error:** No se encontró el archivo en la ruta especificada."))
    
# Verificar si el archivo existe antes de intentar cargarlo
if file_path and os.path.exists(file_weights_path):
    display(Markdown("✅ **Archivo encontrado. Cargando datos...**"))

    try:
        # Cargar el archivo .dta (Stata)
        region_weights = pd.read_excel(file_weights_path)

        # Mostrar primeras filas y estructura del dataset
        display(Markdown("### 📊 Primeras filas del dataset:"))
        display(region_weights.head())

        display(Markdown("### 📋 Información del dataset:"))
        display(Markdown(f"- **Número de filas:** {region_weights.shape[0]}"))
        display(Markdown(f"- **Número de columnas:** {region_weights.shape[1]}"))

    except Exception as e:
        display(Markdown(f"❌ **Error al cargar el archivo:** `{e}`"))

else:
    display(Markdown("🚨 **Error:** No se encontró el archivo en la ruta especificada."))
    
    
df = df.merge(
    region_weights.rename(columns = {"country": "country_name_ltn"}),
    on = ["country_name_ltn", "nuts_id"],
    how = "left"
)

display(Markdown("### 📋 Información del dataset:"))
display(Markdown(f"- **Número de filas:** {df.shape[0]}"))
display(Markdown(f"- **Número de columnas:** {df.shape[1]}"))
display(Markdown(f"- **Columnas pegadas:** {df[['nuts_id', 'regionpop']].drop_duplicates().to_string(index=False)}"))

🔹 **Ruta generada:** `/Users/santiagopardo/Library/CloudStorage/OneDrive-WorldJusticeProject/EU Subnational/EU-S Data/eu-gpp/1. Data/3. Merge/EU_GPP_2024.dta`, `/Users/santiagopardo/Library/CloudStorage/OneDrive-WorldJusticeProject/EU Subnational/EU-S Data/reports/eu-thematic-reports/data-viz/inputs/region_labels.xlsx`

✅ **Archivo encontrado. Cargando datos...**

### 📊 Primeras filas del dataset:

Unnamed: 0,country_year_id,country_name_ltn,country_name_off,country_code_nuts,country_code_iso,nuts_ltn,nuts_id,year,id,gend,...,qpi2d,qpi2e,qpi2f,qpi3a,qpi3b,qpi3c,qpi3d,COLOR,dweight,Strata
0,Austria_2024_36,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,36.0,Male,...,,,,,,,,,1.188589,
1,Austria_2024_108,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,108.0,Male,...,,,,,,,,,1.13418,
2,Austria_2024_161,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,161.0,Male,...,,,,,,,,,1.5e-05,
3,Austria_2024_208,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,208.0,Female,...,,,,,,,,,0.838305,
4,Austria_2024_243,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,243.0,Female,...,,,,,,,,,1.092063,


### 📋 Información del dataset:

- **Número de filas:** 64089

- **Número de columnas:** 444

✅ **Archivo encontrado. Cargando datos...**

### 📊 Primeras filas del dataset:

Unnamed: 0,country,nuts_level,nuts_id,nameOFF,nameENG,nameSHORT,countrypop,regionpop,regionpoppct,border,label
0,Austria,1,AT1,Ostösterreich,Eastern Austria,East Austria,9104772,4001720,0.439519,#A3A9A6,#21222E
1,Austria,1,AT2,Südösterreich,Southern Austria,South Austria,9104772,1834182,0.201453,#A4ABA6,#221E30
2,Austria,1,AT3,Westösterreich,Western Austria,West Austria,9104772,3268870,0.359028,#A5A6A4,#1A1C2F
3,Belgium,1,BE1,Région de Bruxelles-Capitale/Brussels Hoofdste...,Brussels Capital Region,Brussels Region,11742796,1253178,0.106719,#A1A8A6,#1A212B
4,Belgium,1,BE2,Vlaams Gewest,Flemish Region,Flemish Region,11742796,6787969,0.578054,#A3ABA6,#23222C


### 📋 Información del dataset:

- **Número de filas:** 110

- **Número de columnas:** 11

### 📋 Información del dataset:

- **Número de filas:** 64089

- **Número de columnas:** 453

- **Columnas pegadas:**  nuts_id  regionpop
     AT1    4001720
     AT2    1834182
     AT3    3268870
     BE1    1253178
     BE2    6787969
     BE3    3701649
     BG3    3130130
     BG4    3317580
    HR02     999106
    HR03    1297987
    HR05     768624
    HR06     785177
     CY0     920701
    CZ01    1357326
CZ020304    3803014
  CZ0506    3265182
  CZ0708    2402007
    DK01    1891871
    DK02     849857
    DK03    1237413
    DK04    1358879
    DK05     594634
     EE0    1365884
    FI19    1384761
    FI1B    1733033
  FI1C20    1174832
    FI1D    1271344
     FR1   12388388
     FRB    2572735
     FRC    2793968
     FRD    3326575
     FRE    5986464
     FRF    5565282
     FRG    3902115
     FRH    3432901
     FRI    6125089
     FRJ    6110099
     FRK    8195542
     FRL    5173952
     FRM     352851
     DE1   11280257
     DE2   13369393
     DE3    3755251
     DE4    2573135
     DE5     684864
     DE6    1892122
     DE7    6391360
     DE8    1628378
     DE9    8140242
     DEA   18139116
     DEB    4159150
     DEC     992666
     DED    4086152
     DEE    2186643
     DEF    2953270
     DEG    2126846
     EL3    3790842
     EL4    1145363
     EL5    2911703
     EL6    2566074
     HU1    2999794
     HU2    2900998
     HU3    3698952
    IE04     930208
    IE05    1741180
    IE06    2600007
     ITC   15858626
     ITF   13464669
     ITG    6392162
     ITH   11558522
     ITI   11723222
    LV00    1883008
    LT01     848724
    LT02    2008555
    LU00     660809
    MT00     542051
     NL1    1757677
     NL2    3762742
     NL3    8536295
     NL4    3754577
     PL2    7564588
     PL4    5986070
     PL5    3710289
     PL6    5542322
     PL7    3476118
     PL8    4994586
     PL9    5479763
     PT1    9974165
     PT2     239942
     PT3     253259
     RO1    4812769
     RO2    5577375
     RO3    5131264
     RO4    3533140
    SK01     728370
    SK02    1806944
    SK03    1305883
    SK04    1587595
    SI03    1107885
    SI04    1009087
     ES1    4293871
     ES2    4552028
     ES3    6871903
     ES4    5522095
     ES5   14328064
     ES6   10304384
     ES7    2213016
     SE1    4203672
     SE2    4556857
     SE3    1761027

In [4]:
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# 4. Análisis de legal needs survey                                                              
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

df


Unnamed: 0,country_year_id,country_name_ltn,country_name_off,country_code_nuts,country_code_iso,nuts_ltn,nuts_id,year,id,gend,...,Strata,nuts_level,nameOFF,nameENG,nameSHORT,countrypop,regionpop,regionpoppct,border,label
0,Austria_2024_36,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,36.0,Male,...,,1,Ostösterreich,Eastern Austria,East Austria,9104772,4001720,0.439519,#A3A9A6,#21222E
1,Austria_2024_108,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,108.0,Male,...,,1,Ostösterreich,Eastern Austria,East Austria,9104772,4001720,0.439519,#A3A9A6,#21222E
2,Austria_2024_161,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,161.0,Male,...,,1,Ostösterreich,Eastern Austria,East Austria,9104772,4001720,0.439519,#A3A9A6,#21222E
3,Austria_2024_208,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,208.0,Female,...,,1,Ostösterreich,Eastern Austria,East Austria,9104772,4001720,0.439519,#A3A9A6,#21222E
4,Austria_2024_243,Austria,Österreich,AT,AUT,Ostoesterreich,AT1,2024.0,243.0,Female,...,,1,Ostösterreich,Eastern Austria,East Austria,9104772,4001720,0.439519,#A3A9A6,#21222E
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64084,Sweden_2024_17575,Sweden,Sverige,SE,SWE,Norra Sverige,SE3,2024.0,17575.0,Male,...,,1,Norra Sverige,North Sweden,Northern Sweden,10521556,1761027,0.167373,#A8A9A9,#1C2529
64085,Sweden_2024_17583,Sweden,Sverige,SE,SWE,Norra Sverige,SE3,2024.0,17583.0,Male,...,,1,Norra Sverige,North Sweden,Northern Sweden,10521556,1761027,0.167373,#A8A9A9,#1C2529
64086,Sweden_2024_17585,Sweden,Sverige,SE,SWE,Norra Sverige,SE3,2024.0,17585.0,Male,...,,1,Norra Sverige,North Sweden,Northern Sweden,10521556,1761027,0.167373,#A8A9A9,#1C2529
64087,Sweden_2024_17588,Sweden,Sverige,SE,SWE,Norra Sverige,SE3,2024.0,17588.0,Male,...,,1,Norra Sverige,North Sweden,Northern Sweden,10521556,1761027,0.167373,#A8A9A9,#1C2529


In [5]:
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 
# Problem prevalence                                                            
# 
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

master_data = df.copy()

legal_problems = [
    "A1", "A2", "A3", 
    "B1", "B2", "B3", "B4", 
    "C1", "C2", "C3", "C4", 
    "D1", "D2", "D3", "D4", "D5", "D6", 
    "E1", "E2", "E3", 
    "F1", "F2", 
    "G1", "G2", "G3", 
    "H1", "H2", "H3", 
    "I1", 
    "J1", "J2", "J3", "J4", 
    "K1", "K2", "K3",
    "L1", "L2"
]

legprob_bin = [f"AJP_{lp}_bin" for lp in legal_problems]
legprob_sev = [f"AJP_{lp}_sev" for lp in legal_problems]

# Step 1: Convert legprob_bin columns to binary (1 if value == 1, otherwise 0)
master_data[legprob_bin] = master_data[legprob_bin].replace({"Yes": 1, "No": 0}).fillna(0).astype(int)


master_data[legprob_bin] = master_data[legprob_bin].applymap(
    lambda x: 1 if x == 1 else (0 if x == 2 else np.nan)
)

# Step 2: Create 'legprob' column: 1 if any legprob_bin column is 1, otherwise 0
master_data["legprob"] = (master_data[legprob_bin].sum(axis=1) > 0).astype(int)

# Step 3: Convert legprob_sev columns:
# - 1 if value is between 4 and 97
# - 0 if value is less than 4
# - NaN otherwise
master_data[legprob_sev] = master_data[legprob_sev].applymap(
    lambda x: 1 if 4 <= x < 98 else (0 if x < 4 else np.nan)
)
# Step 4: Create 'legprob_sev' column: 1 if any legprob_sev column is 1, otherwise 0
master_data["legprob_sev"] = (master_data[legprob_sev].sum(axis=1) > 0).astype(int)

# Display the proportion of unique values for legprob
prop_legprob = master_data["legprob"].value_counts(normalize=True).reset_index()
prop_legprob.columns = ["legprob", "Proportion"]
display(prop_legprob)

# Display the proportion of unique values for legprob_sev
prop_legprob_sev = master_data["legprob_sev"].value_counts(normalize=True).reset_index()
prop_legprob_sev.columns = ["legprob_sev", "Proportion"]
display(prop_legprob_sev)

  master_data[legprob_bin] = master_data[legprob_bin].applymap(
  master_data[legprob_sev] = master_data[legprob_sev].applymap(


Unnamed: 0,legprob,Proportion
0,1,0.581317
1,0,0.418683


Unnamed: 0,legprob_sev,Proportion
0,0,0.506686
1,1,0.493314


In [6]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Access to proper information and advice
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode 'AJE_infosource' into 'access2info'

# Create a new column `access2info` based on `AJE_infosource`:
# - Assign 1 if the person was able to access information (values 1 or 2)
# - Assign 0 if the person was NOT able to access information (values 3, 4, or 98)
# - Assign NaN (missing value) for all other cases

A2J_problems["access2info"] = A2J_problems["AJE_infosource"].apply(
    lambda x: 1 if x in ['Agree', 'Strongly agree'] 
    else (0 if x in ['Disagree', 'Strongly disagree', "Don't know"] else np.nan)
)

# Step 3: Apply the function of estimation

access2info = avg_estimation(A2J_problems, "access2info")

display(access2info)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  A2J_problems["access2info"] = A2J_problems["AJE_infosource"].apply(


Unnamed: 0,country_name_ltn,final_value
0,Austria,0.608969
1,Belgium,0.521998
2,Bulgaria,0.643497
3,Croatia,0.539886
4,Cyprus,0.528061
5,Czechia,0.618204
6,Denmark,0.519293
7,Estonia,0.533113
8,Finland,0.663475
9,France,0.549409


In [7]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Access to adequate representation
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Replace `99` in `AJD_noadvice_reason` with NaN

A2J_problems["AJD_noadvice_reason"] = A2J_problems["AJD_noadvice_reason"].replace("No answer", np.nan)

# Step 3 : Recode 'AJD_noadvice_reason' into 'access2rep'

A2J_problems["access2rep"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. Able to access advice from a formal source --> not in gap
        (
            row["AJD_adviser_2"] == 'Yes' or 
            row["AJD_adviser_3"] == 'Yes' or
            row["AJD_adviser_4"] == 'Yes' or 
            row["AJD_adviser_5"] == 'Yes' or
            row["AJD_adviser_6"] == 'Yes' or 
            row["AJD_adviser_8"] == 'Yes'
        ) and (
            row["AJD_inst_advice"] == 'Yes'
        )
    ) else 1 if (
        # 2. Did not access advice because problem was not important --> not in gap
        (row["AJD_noadvice_reason"] in ["I thought the issue was not important or not difficult to resolve", 
                                       "I did not think I needed advice"])
        and 
        (row["AJD_inst_advice"] == 'No')
    ) else 1 if (
        # 3. If friend or family has a legal background --> not in gap
        row["AJD_inst_advice"] == 'Yes' 
        and 
        row["AJD_adviser_1"] == 'Yes' 
        and 
        row["AJD_expert_adviser"] == 'Yes'
    ) else 0 if (
        # 4. Accessed advice from a friend, religious org, or other --> in the justice gap
        (
            row["AJD_adviser_1"]  == 'Yes' or 
            row["AJD_adviser_7"]  == 'Yes' or 
            row["AJD_adviser_9"]  == 'Yes' or 
            row["AJD_adviser_98"] == 'Yes'
        )
        and 
        (
            row["AJD_inst_advice"] == 'Yes'
        )
    ) else 0 if (
        # 5. If reason for not seeking advice falls into specific categories --> in the justice gap
        row["AJD_noadvice_reason"] in ["Thought the other side was right", 
                                       "I was concerned about the financial cost", 
                                       "I had received help with a problem before and did not find it useful", 
                                       "I did not know who to call or where to get advice", 
                                       "I did not know I could get advice for this problem", 
                                       "Was scared to get advice", 
                                       "Advisers were too far away or it would take too much time", 
                                       "Other",
                                       "Don't know"] 
        and 
        row["AJD_inst_advice"] == "No"
    ) else 0 if (
        # 6. If AJD_inst_advice is 98 --> in the justice gap
        row["AJD_inst_advice"] == "Don't know"
    ) else np.nan,  # Default: Assign NaN if no condition matches
    axis=1
)


# Step 4: Apply the function of estimation
access2rep = avg_estimation(A2J_problems, "access2rep")

display(access2rep)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  A2J_problems["AJD_noadvice_reason"] = A2J_problems["AJD_noadvice_reason"].replace("No answer", np.nan)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  A2J_problems["access2rep"] = A2J_problems.apply(


Unnamed: 0,country_name_ltn,final_value
0,Austria,0.492543
1,Belgium,0.562305
2,Bulgaria,0.615435
3,Croatia,0.46777
4,Cyprus,0.571795
5,Czechia,0.452796
6,Denmark,0.51986
7,Estonia,0.39726
8,Finland,0.539533
9,France,0.431686


In [8]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Resolution process: timeliness
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode AJR_solving time

A2J_problems["rp_time"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. Problem resolved in one year or less 
        (0 <= row["AJR_solvingtime"] < 13) and
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"])
    ) else 0 if (
        # 2. Problem solved in more than one year 
        (row["AJR_solvingtime"] > 12) and
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"])
    ) else 0 if (
        # If solving time is -8888 
        row["AJR_solvingtime"] == -8888
    ) else np.nan if (
        # If solving time is -9999 → NA
        row["AJR_solvingtime"] == -9999
    ) else np.nan if (
        # 3. Problem ongoing → NA
        row["AJR_state_noresol"] in ["Ongoing", "Too early to say"] or 
        row["AJR_state_resol"] in ["Ongoing", "Too early to say"]
    ) else np.nan,  # Default: NA if no condition matches
    axis=1
)

# Step 3: Apply the function of estimation
rp_time = avg_estimation(A2J_problems, "rp_time")

display(rp_time)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  A2J_problems["rp_time"] = A2J_problems.apply(


Unnamed: 0,country_name_ltn,final_value
0,Austria,0.890486
1,Belgium,0.875714
2,Bulgaria,0.808687
3,Croatia,0.667635
4,Cyprus,0.888889
5,Czechia,0.572254
6,Denmark,0.874798
7,Estonia,0.558491
8,Finland,0.46642
9,France,0.60267


In [9]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Resolution process: costliness
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode AJR_solvingcosts

A2J_problems["rp_cost"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. If you incurred costs, but they were easy to pay --> not in gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] 
         or row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_costdiff"] in ["Very easy", "Somewhat easy"]) and
        (row["AJR_solvingcosts"] == "Yes")
    ) else 0 if (
        # 2. If you incurred costs that were difficult to pay --> in the justice gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_costdiff"] in ["Difficult", "Nearly impossible", "Don't Know"]) and
        (row["AJR_solvingcosts"] == "Yes")
    ) else 1 if (
        # 3. You did not incur costs -- not in justice gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"] in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_solvingcosts"] == "No")
    ) else np.nan,  # Default: Assign NaN if no condition matches
    axis=1
)

# Step 3: Apply the function of estimation
rp_cost = avg_estimation(A2J_problems, "rp_cost")

display(rp_cost)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  A2J_problems["rp_cost"] = A2J_problems.apply(


Unnamed: 0,country_name_ltn,final_value
0,Austria,0.824379
1,Belgium,0.819379
2,Bulgaria,0.779664
3,Croatia,0.786547
4,Cyprus,0.815686
5,Czechia,0.834618
6,Denmark,0.841205
7,Estonia,0.854599
8,Finland,0.83208
9,France,0.85299


In [10]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Resolution process: fairness
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode AJR_fair

A2J_problems["rp_fair"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. If you incurred costs, but they were easy to pay --> not in gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"]   in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_fair"]          in ["Yes"]) 
    ) else 0 if (
        # 2. If you incurred costs that were difficult to pay --> in the justice gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists", "Done with, problem fully resolved"] or 
         row["AJR_state_resol"]   in ["Done with, but problem persists", "Done with, problem fully resolved"]) and
        (row["AJR_fair"]          in ["No", "Don't know"]) 
    ) else np.nan,  # Default: Assign NaN if no condition matches
    axis=1
)

# Step 3: Apply the function of estimation
rp_fair = avg_estimation(A2J_problems, "rp_fair")

display(rp_fair)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  A2J_problems["rp_fair"] = A2J_problems.apply(


Unnamed: 0,country_name_ltn,final_value
0,Austria,0.44634
1,Belgium,0.431244
2,Bulgaria,0.676635
3,Croatia,0.562164
4,Cyprus,0.574219
5,Czechia,0.464257
6,Denmark,0.50797
7,Estonia,0.531073
8,Finland,0.452136
9,France,0.48748


In [None]:
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
##
## Resolution process: fairness
##
## +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Create a copy of the original dataset to work on, preventing accidental modifications to `master_data`

A2J_df       = master_data.copy()

# Step 1: Filter for people with a non-trivial legal problem

# Filter only individuals who reported at least one legal problem (legprob == 1)
# AND at least one severe legal problem (legprob_sev == 1)


A2J_problems = A2J_df[(A2J_df["legprob"] == 1) & (A2J_df["legprob_sev"] == 1)]

# Step 2: Recode AJR_fair

A2J_problems["rp_outcome"] = A2J_problems.apply(
    lambda row: 1 if (
        # 1. If you incurred costs, but they were easy to pay --> not in gap
        (row["AJR_state_noresol"] in ["Done with, problem fully resolved"] or 
         row["AJR_state_resol"]   in ["Done with, problem fully resolved"])
    ) else 0 if (
        # 2. If you incurred costs that were difficult to pay --> in the justice gap
        (row["AJR_state_noresol"] in ["Done with, but problem persists"] or 
         row["AJR_state_resol"]   in ["Done with, but problem persists"])
    ) else np.nan,  # Default: Assign NaN if no condition matches
    axis=1
)

# Step 3: Apply the function of estimation
rp_outcome = avg_estimation(A2J_problems, "rp_outcome")

display(rp_outcome)

In [206]:
unique_reasons = pd.DataFrame(A2J_problems["AJR_fair"].unique(), columns=["Unique Reasons"])
display(unique_reasons)


Unnamed: 0,Unique Reasons
0,
1,Don't know
2,No
3,Yes
4,No answer
