### Step 5: Create the input for the [Recogito Studio](https://recogitostudio.org/)


Inputs:

- `GALAXY_INPUTS["contextfulMatchedDataset"]` - A JSONL Dataset with context-supported and matched placenames`

Outputs:

- `outputs/colllection/pausanias_book_{current_book}_chapter_{current_chapter}.xml` - An XML Dataset Collection with one file per book-chapter, ready for Recogito Studio

In [None]:
# Only run on Galaxy
!pip install srsly lxml

In [None]:
import srsly
from lxml import etree

In [None]:
data = list(srsly.read_jsonl(GALAXY_INPUTS["contextfulMatchedDataset"][0]["path"]))
NS_TEI = "http://www.tei-c.org/ns/1.0"
NS_XML = "http://www.w3.org/XML/1998/namespace"
NSMAP = {None: NS_TEI}
counter = 1
uid_counter = 0
chapter = 0
current_book = None
current_chapter = None

# Each chapter-book pair to be a different XML
for i in data:
    book = i.get("book")
    chapter = i.get("chapter")

    if current_chapter is not None and chapter != current_chapter:
        tree = etree.ElementTree(tei)
        tree.write(
            f"books_chapters/pausanias_book_{current_book}_chapter_{current_chapter}.xml",
            xml_declaration=True,
            encoding="utf-8",
            pretty_print=True
        )

        tei = etree.Element("TEI", nsmap=NSMAP, version="3.3.0")
        standoff = etree.SubElement(tei, "standOff", type="recogito_studio_annotations")
        listannotation = etree.SubElement(standoff, "listAnnotation")
        text = etree.SubElement(tei, "text")
        body = etree.SubElement(text, "body")

        counter = 1

        head = etree.SubElement(body, "head")
        head.text = f"Book {book}, Chapter {chapter}"

    if current_chapter is None:
        tei = etree.Element("TEI", nsmap=NSMAP, version="3.3.0")
        standoff = etree.SubElement(tei, "standOff", type="recogito_studio_annotations")
        listannotation = etree.SubElement(standoff, "listAnnotation")
        text = etree.SubElement(tei, "text")
        body = etree.SubElement(text, "body")

        counter = 1

        head = etree.SubElement(body, "head")
        head.text = f"Book {book}, Chapter {chapter}"

    current_book = book
    current_chapter = chapter

    p = etree.SubElement(body, "p")
    p.text = i.get("text")

    for mention in i.get("mentions_tagged"):
        annotation = etree.SubElement(listannotation, "annotation",
                                      target=f"/TEI[1]/text[1]/body[1]/p[{str(counter)}]::{str(mention.get("start"))} /TEI[1]/text[1]/body[1]/p[{str(counter)}]::{str(mention.get("end"))}")
        annotation.set(f"{{{NS_XML}}}id", f"UID-FAKE-{uid_counter}")

        topos_id = mention.get("vector_db")[0][0]

        rs = etree.SubElement(annotation, "rs", ana=f"https://topostext.org/place/{topos_id}")
        uid_counter += 1

    counter += 1

tree = etree.ElementTree(tei)
tree.write(
    f"outputs/collection/pausanias_book_{current_book}_chapter_{current_chapter}.xml",
    xml_declaration=True,
    encoding="utf-8",
    pretty_print=True
)