# BUNDESDATA RAG 


In [28]:
import json, requests
from typing import Any, List 
from langchain_core.documents import Document

## Daten laden und vorbereiten 

In [39]:
BASEURL = "https://verkehr.autobahn.de/o/autobahn"
SERVICES = [
    "roadworks",
    "parking_lorry",
    "warning",
    "closure",
    "electric_charging_station",
]

**stringify_all_fields:**
Conversion of a python object (dict) into string required for document indexing and processing using langchain.

In [None]:
def stringify_all_fields(obj: Any) -> str:
    return json.dumps(obj, ensure_ascii=False, indent=2, sort_keys=True) #ensure ascii=False to handle special characters

In [None]:
def get_items(payload: Any, service: str) -> List[Any]:
    # if payload is a dict with the service as a key and the value is a list, return that list
    if isinstance(payload, dict) and isinstance(payload.get(service), list):
        return payload[service]
    # if payload is already a list, return it directly
    if isinstance(payload, list):
        return payload
    return [payload]

**to_documents:**
Helper to convert the json response (dict) into langchain documents using the stringify_all_fields() function to get a text representation of all fields.
Additionally, we capture data about the road_id, service, title, description to give unique markers to roads.

In [None]:
def to_documents(road_id: str, service: str, items: List[Any], source_url: str) -> List[Document]:
    docs: List[Document] = [] # initialize empty list
    for i, item in enumerate(items):
        # define title and description
        title = ""
        description = ""
        if isinstance(item, dict):
            title = item.get("title", "") or item.get("name", "") # if title is empty, try name
            description = item.get("description", "") or item.get("text", "") # if description is empty, try text

        page_content = "\n".join(
            # structured content
            [
                f"road: {road_id}",
                f"service: {service}",
                (f"title: {title}" if title else ""),
                (f"description: {description}" if description else ""),
                "",
                "ALL_FIELDS_JSON:",
                # use stringify_all_fields to convert item to string
                stringify_all_fields(item),
            ]
        ).strip()
        # create document and add to list
        docs.append(
            Document(
                page_content=page_content,
                metadata={
                    "road_id": road_id,
                    "service": service,
                    "source_url": source_url,
                    "item_index": i,
                    "title": title,
                },
            )
        )
    return docs

**build_documents():**
This function does the API calls. We go through all road ids and services (endpoints) available and convert them into a list of langchain documents.

In [None]:
def build_documents() -> List[Document]:
    docs: List[Document] = [] # initialize empty list
    with requests.Session() as s:
        # get list of roads
        roads_resp = s.get(BASEURL, timeout=20)
        roads_resp.raise_for_status()
        roads = roads_resp.json()["roads"]
        # iterate over roads and services
        for road_id in roads:
            for service in SERVICES:
                # fetch service data for every road
                url = f"{BASEURL}/{road_id}/services/{service}"
                resp = s.get(url, timeout=20)
                resp.raise_for_status()
                # get payload
                payload = resp.json()
                # extract items with helper function
                items = get_items(payload, service)
                docs.extend(to_documents(road_id, service, items, url)) # convert to documents with helper function
    return docs