# BUNDESDATA RAG 


In [28]:
import json, requests
from typing import Any, List 
from langchain_core.documents import Document

## Daten laden und vorbereiten 

In [39]:
BASEURL = "https://verkehr.autobahn.de/o/autobahn"
SERVICES = [
    "roadworks",
    "parking_lorry",
    "warning",
    "closure",
    "electric_charging_station",
]

**stringify_all_fields:**
Used to convert a python dict into a basic string that we can index using langchain documents.

In [40]:
def stringify_all_fields(obj: Any) -> str:
    return json.dumps(obj, ensure_ascii=False, indent=2, sort_keys=True)

In [41]:
def get_items(payload: Any, service: str) -> List[Any]:
    if isinstance(payload, dict) and isinstance(payload.get(service), list):
        return payload[service]
    if isinstance(payload, list):
        return payload
    return [payload]

**to_documents:**
Converts the json response (dict) into a langchain document using the stringify_all_fields() function to get a text representation of all fields.
Additionally, we capture data about the road_id, service, title, description to give unique markers to roads.

In [42]:
def to_documents(road_id: str, service: str, items: List[Any], source_url: str) -> List[Document]:
    docs: List[Document] = []
    for i, item in enumerate(items):
        title = ""
        description = ""
        if isinstance(item, dict):
            title = item.get("title", "") or item.get("name", "")
            description = item.get("description", "") or item.get("text", "")

        page_content = "\n".join(
            [
                f"road: {road_id}",
                f"service: {service}",
                (f"title: {title}" if title else ""),
                (f"description: {description}" if description else ""),
                "",
                "ALL_FIELDS_JSON:",
                stringify_all_fields(item),
            ]
        ).strip()

        docs.append(
            Document(
                page_content=page_content,
                metadata={
                    "road_id": road_id,
                    "service": service,
                    "source_url": source_url,
                    "item_index": i,
                    "title": title,
                },
            )
        )
    return docs

**build_documents():**
This Function does the API calls. We go through all road ids and services (endpoints) available and convert them to a langchain document which we return in a List of documents.

In [43]:
def build_documents() -> List[Document]:
    docs: List[Document] = []
    with requests.Session() as s:
        roads_resp = s.get(BASEURL, timeout=20)
        roads_resp.raise_for_status()
        roads = roads_resp.json()["roads"]

        for road_id in roads:
            for service in SERVICES:
                url = f"{BASEURL}/{road_id}/services/{service}"
                resp = s.get(url, timeout=20)
                resp.raise_for_status()
                payload = resp.json()

                items = get_items(payload, service)
                docs.extend(to_documents(road_id, service, items, url))
    return docs