In [4]:
from datasets import load_dataset
import json

# Load the EU-ADR dataset
dataset = load_dataset("bigbio/euadr", "euadr_bigbio_kb")

# Inspect dataset
print(dataset)

# Take one training sample
train0 = dataset["train"][0]


DatasetDict({
    train: Dataset({
        features: ['id', 'document_id', 'passages', 'entities', 'events', 'coreferences', 'relations'],
        num_rows: 300
    })
})


In [None]:
from datasets import load_dataset
import json

# -------------------------------
# Utility functions
# -------------------------------

def print_tree(data, prefix=""):
    """
    Pretty-print nested dict/list structure using ASCII branches.
    - Shows key and value on same line.
    - Removes type annotations.
    - Lists show all elements, but non-first ones become '|__ ...'.
    - Strings shown in quotes.
    """
    if isinstance(data, dict):
        keys = list(data.keys())
        for i, k in enumerate(keys):
            connector = "└── " if i == len(keys) - 1 else "├── "
            v = data[k]
            if isinstance(v, (dict, list)):
                print(f"{prefix}{connector}{k}")
                next_prefix = prefix + ("    " if i == len(keys) - 1 else "│   ")
                print_tree(v, next_prefix)
            else:
                val = f'"{v}"' if isinstance(v, str) else str(v)
                if len(val) > 60:
                    val = val[:57] + "..."
                print(f"{prefix}{connector}{k}: {val}")

    elif isinstance(data, list):
        if not data:
            print(f"{prefix}└── []")
        else:
            for i, item in enumerate(data):
                connector = "└── " if i == len(data) - 1 else "├── "
                if i == 0:
                    label = f"[{i}]"
                    print(f"{prefix}{connector}{label}")
                    next_prefix = prefix + ("    " if i == len(data) - 1 else "│   ")
                    print_tree(item, next_prefix)
                else:
                    print(f"{prefix}{connector}|__ ...")
    else:
        val = f'"{data}"' if isinstance(data, str) else str(data)
        if len(val) > 60:
            val = val[:57] + "..."
        print(f"{prefix}└── {val}")


def parse_to_json(data):
    """Convert any Python object (dict, list, etc.) into a formatted JSON string."""
    return json.dumps(data, indent=2, ensure_ascii=False)


# -------------------------------
# Load dataset and apply functions
# -------------------------------


print("\n--- Tree Structure ---")
print_tree(train0)

print("\n--- JSON Representation ---")
print(parse_to_json(train0))



--- Tree Structure ---
├── id (str)
│   └── "0" (str)
├── document_id (str)
│   └── "0" (str)
├── passages (list)
│   ├── [0] (dict)
│   │   ├── id (str)
│   │   │   └── "1" (str)
│   │   ├── type (str)
│   │   │   └── "title" (str)
│   │   ├── text (list)
│   │   │   └── [0] (str)
│   │   │       └── "A sequential procedure for monitoring clinical trials ag... (str)
│   │   └── offsets (list)
│   │       └── [0] (list)
│   │           ├── [0] (int)
│   │           │   └── 0 (int)
│   │           └──  ...
│   └──  ...
├── entities (list)
│   └── [empty list]
├── events (list)
│   └── [empty list]
├── coreferences (list)
│   └── [empty list]
└── relations (list)
    └── [empty list]

--- JSON Representation ---
{
  "id": "0",
  "document_id": "0",
  "passages": [
    {
      "id": "1",
      "type": "title",
      "text": [
        "A sequential procedure for monitoring clinical trials against historical controls."
      ],
      "offsets": [
        [
          0,
          82
       