# Universal Eur-Lex parser


In [3]:
import sys
from pathlib import Path

# Go up two levels: sandbox → ingestion → crss
project_root = Path.cwd().parents[1]
sys.path.append(str(project_root))


In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

def fetch_ai_act_robust(celex_id="32017R0745"):
    endpoint_url = "https://publications.europa.eu/webapi/rdf/sparql"

    # We find the 'Work' that has the CELEX ID, then look for its properties
    query = f"""
    PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>

    SELECT DISTINCT ?p ?target_celex WHERE {{
        ?work cdm:resource_legal_id_celex "{celex_id}"^^<http://www.w3.org/2001/XMLSchema#string> .
        ?work ?p ?target .

        # If the target is another legal document, get its CELEX ID too
        OPTIONAL {{ ?target cdm:resource_legal_id_celex ?target_celex . }}
    }} LIMIT 50
    """

    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    try:
        results = sparql.query().convert()
        print(f"--- Data found for {celex_id} ---")

        found = False
        for result in results["results"]["bindings"]:
            found = True
            predicate = result["p"]["value"].split("#")[-1] # Simplifies the URL to a name
            target = result.get("target_celex", {}).get("value", "N/A")

            if target != "N/A":
                print(f"Relation: {predicate} --> {target}")

        if not found:
            print("No data found. The CELEX ID might not be indexed in the RDF store yet.")

    except Exception as e:
        print(f"Error: {e}")

fetch_ai_act_robust()

--- Data found for 32017R0745 ---
Relation: work_cites_work --> 52013XX1207(01)
Relation: work_cites_work --> 52012AE2185
Relation: work_cites_work --> 32002L0098
Relation: resource_legal_amends_resource_legal --> 32001L0083
Relation: work_cites_work --> 31995L0046
Relation: work_cites_work --> 31985L0374
Relation: work_cites_work --> 32011R0182
Relation: work_cites_work --> 32016Q0512(01)
Relation: work_cites_work --> 32001R0045
Relation: resource_legal_amends_resource_legal --> 32002R0178
Relation: work_cites_work --> 12016P/TXT
Relation: resource_legal_based_on_resource_legal --> 12016E114
Relation: resource_legal_repeals_resource_legal --> 31993L0042
Relation: work_cites_work --> 32011H0696
Relation: resource_legal_repeals_resource_legal --> 31990L0385
Relation: resource_legal_implicitly_repeals_resource_legal --> 32000L0070
Relation: work_cites_work --> 31998L0079
Relation: work_cites_work --> 32004R0726
Relation: work_cites_work --> 62009CJ0400
Relation: work_cites_work --> 32008