In [241]:
import json
import pandas as pd
import re

In [242]:
def remove_html(text):
    html_regex = re.compile(r"<.*?>")
    return re.sub(html_regex, "", text)

def get_legislation_label(item):
    return f"Article {item['itemsBase'][0]['value']}" + "".join([f"({mention['value']})" for mention in item["itemsBase"][1:] if mention["value"] != ""])

def get_national_provisions_info(implementation):
    # link -> [itemsBase = national implementation, celex, name], textEn, implementation type
    return {"label": get_legislation_label(implementation["link"]), "textEn": remove_html(implementation["textEn"]), "implementationType": implementation["implementationType"]["label"]}

def convert_to_id_first(transposition_json):
    new_json = {}
    for transp in transposition_json:
        # id, text, heading, subParagraph, hasTransposition, transp, nationalActs->[provisions, title, titleEn]
        aux = {}
        aux["id"] = transp["id"]
        aux["text"] = remove_html(transp["text"])
        aux["nationalActs"] = \
                            [
                                {
                                    "provisions": [get_national_provisions_info(provision) for provision in nat_act["provisions"]],\
                                    "title": nat_act["title"],
                                    "titleEn": nat_act["titleEn"]
                                }
                                for nat_act in transp["nationalActs"]
                            ]
        for key in ["hasTransposition", "transp"]:
            if key == "transp" and transp[key]:
                aux[key] = transp[key]["label"]
            else:
                aux[key] = transp[key]
        new_json[transp["id"]] = aux

    return new_json

eu_legislation = "32014L0041"
jurisdiction = "Italy"
file_name = f"../data/transposition_data/{eu_legislation}-{jurisdiction}.json"

transpositions = json.load(open(file_name, "r"))
transpositions = convert_to_id_first(transpositions["items"])

connections = json.load(open(f"joint_items_{eu_legislation}.json", "r"))

In [243]:
grouped_transps = []
added_ids = set()
for eu_instrument_id in transpositions.keys():
    if eu_instrument_id in connections.keys() and not (eu_instrument_id in added_ids):  
        if connections[eu_instrument_id]["transposable"] and len(connections[eu_instrument_id]["linkedElementsIds"]) > 0:
            connected_eu_ids = connections[eu_instrument_id]["linkedElementsIds"]
            added_ids |= set(connected_eu_ids)
            grouped_transps.append([transpositions[eu_instrument_id]] + [transpositions[linked_eu_id] for linked_eu_id in connected_eu_ids])
        else:
            grouped_transps.append(transpositions[eu_instrument_id])

In [244]:
grouped_transps[9]

[{'id': 'art_2__let_c',
  'text': '(c)  ‘issuing authority’ means:',
  'nationalActs': [{'provisions': [{'label': 'Article 2(1)(b)',
      'textEn': '1. For the purposes of this Decree:(b) issuing authority: the competent authority of a Member State of the Union, which issues the investigation order ordering the acquisition of evidence in criminal proceedings, or validates a request for the acquisition of evidence from an administrative authority;',
      'implementationType': 'Fully implemented'},
     {'label': 'Article 27(1)',
      'textEn': 'In criminal proceedings or proceedings for the application of a measure of asset prevention, the public prosecutor and the proceeding judge may, within their respective powers, issue an investigation order and transmit it directly to the enforcement authority. The judge issues the investigation order after hearing the parties.',
      'implementationType': 'Fully implemented'}],
    'title': 'Decreto Legislativo 21 giugno 2017, n. 108',
    't

In [245]:
concatenated_transps = {"id": [], "text": [], "transpType": [], "provisions": [], "provision_ids": []}

for transp in grouped_transps:
    if type(transp) == list:
        text = "\n".join([prov["text"] for prov in transp])
        transp = transp[0]
    else:
        text = transp["text"]

    concatenated_transps["id"].append(transp["id"])
    concatenated_transps["transpType"].append(transp["transp"])
    concatenated_transps["text"].append(text)
    concatenated_transps["provisions"].append(transp["nationalActs"])
    nat_acts = []
    for nat_act in transp["nationalActs"]:
        prov_labels = []
        for prov in nat_act["provisions"]:
            prov_labels.append(prov["label"])
        
        nat_acts.append(prov_labels)
    concatenated_transps["provision_ids"].append(nat_acts)


df = pd.DataFrame(concatenated_transps)
df["provisions"] = df["provisions"].apply(lambda x: "None" if len(x) < 1 else x)
df["provision_ids"] = df["provision_ids"].apply(lambda x: "None" if len(x) < 1 else x)
df.to_json(f"compiled_transpositions_{eu_legislation}_{jurisdiction}.json", orient = "records", indent = 1)
df.head()

Unnamed: 0,id,text,transpType,provisions,provision_ids
0,art_1,,,,
1,art_1__par_1,1. A European Investigation Order (EIO) is a j...,,,
2,art_1__parMD5_acafcff5747b169b7af8a20c228069e4,The EIO may also be issued for obtaining evide...,,,
3,art_1__par_2,2. Member States shall execute an EIO on the b...,,,
4,art_1__par_3,3. The issuing of an EIO may be requested by a...,Explicitly transposed,"[{'provisions': [{'label': 'Article 31', 'text...",[[Article 31]]
