# Testing

## Imports

In [1]:
# Install requirements if needed
# !pip install beautifulsoup4 lxml

# 1. Import your parser
import sys
import json
import os
from pprint import pprint
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

from semantic_html.parser import parse_note

### Load HTML

In [2]:
# 2. Define a small test HTML string
html = """
<!DOCTYPE html>
<html>
    <head>
        <meta charset="utf-8" />
    </head>
    <body>
        <div class="zotero-notes">
            <div class="zotero-note">
                <h1>Header1</h1>
                <h2>header2</h2>
                <h3>header3</h3>
                <p><code>p.44</code></p>
                <p>comment related to p. 44 and header 3</p>
                <blockquote>
                    <p style="margin-left: 30px;">
                        <h1>Quote H1</h1>
                        “<em>M@áan and Susan</em> <code>[p. 45]</code>is a <span style="font-style: italic;">concept</span> like #animal oder #human#oid. <strong><a href="wd:Paris" rel="noopener noreferrer nofollow">Paris</a></strong> is a <em>city</em>. that is
                        testified by <u>wikipedia</u>.”
                    </p>
                </blockquote>
                <p>
                    <span class="citation">(<a href="zotero://select/library/items/KUHYUVEH">“test”</a>)</span>
                </p>
                <h3>header4</h3>
                <p><code>p.54</code></p>
                <p>comment related to p. 54 and header 4</p>
                <blockquote>
                    <p style="margin-left: 30px;">
                        “<em>Woman</em> is a concept. <strong><a href="wd:Rome" rel="noopener noreferrer nofollow">Rome</a></strong> is a <em>city</em>. that is testified by <u>wikipedia</u>.”
                    </p>
                </blockquote>
                <pre>This will be ignored <em>Man22</em></pre>
            </div>
        </div>
    </body>
</html>

"""

In [2]:
html="""
<p>P1</p>
<blockquote><p>quote <em>Emphasis</em></p></blockquote>
<p>P2 <h1><a href="wd:Paris">Paris</a></h1></p>
"""

### Load Config

In [3]:
metadata = {"prov:wasAttributedTo":{"@id":"https://github.com/ch-sander"}, "prov:wasGeneratedBy": "https://github.com/ch-sander/semantic-html/blob/main/tests/Testing.ipynb"}


# 3. Define your mapping
mapping = {
    # Top-level mapping keys correspond to semantic classes used during parsing.
    # These must match expected parser categories (Note, Document, Annotation, etc.)
    "@type": ["ResearchNote"],  # Default type for the root Note
    "@id": "https://semantic-html.org/note1",

    "Document": {
        # Represents text blocks (e.g. sentences or paragraphs) in the note. Each doc is the reference doc for text positions in the WADM annotations. If no parent doc is found, 
        # the entire note is used.
        "xpath": ["//p"],  # Matches all <p> tags
        "types": ["Doc"]  # This type will be assigned to these nodes
    },

    "Annotation": {
        # Annotations are inline elements that can be nested in Document.
        # Each sub-entry defines a different category of annotation. Their names, scopes, or purposes
        # do not matter for the mapping.

        "Concept": {
            # Marks concepts, e.g. keywords or philosophical terms.
            'xpath': ['//em | //i | //span[contains(@style, "italic")]'],  # e.g. <em>term</em>
            'split': r"\s*(?:,|\band\b)\s*", # pattern to split element text (first)
            # Split at either a comma or the word "and", with optional surrounding whitespace (non-capturing group)
            # 'find': r"\w+", # pattern to extract relevant text (after splitting)
            "types": ["Annotation", "Concept"]
        },
        "Entity": {
            # Marks named entities, typically rendered as bold text.
            "xpath": ["//strong | //b"],  # e.g. <strong>Rome</strong>
            "types": ["Entity"]
        },

        "Entity2": {
            # Matches entities in plain text using regex, e.g. #human# or #animal
            "regex": [r"#\w+#?"],  # Applied before IGNORE rules! Matches '#' + word + optional trailing '#'
            "types": ["Entity_hash"]
        },

        "Reference1": {
            # Marks generic references, e.g. underlined text.
            "xpath": ["//u"],  # e.g. <u>source</u>
            "types": ["Reference"]
        },

        "Reference2": {
            # Marks Zotero-style citations with specific class.
            'xpath': ['//span[@class="citation"]'],
            "types": ["Reference Zotero"]
        }
    },

    "Locator": {
        # Represents location information (e.g. page numbers).
        "xpath": ["//code"],  # e.g. <code>p.44</code>
        "types": ["Page-Locator"]
    },

    "Structure": {
        # Represents document structure and hierarchy (e.g. headings).
        # Used to model logical organization (chapter, section, etc.).
        "xpath": ["//h1", "//h2", "//h3"],
        "types": ["Structure"]
    },

    "Quotation": {
        # Represents quoted text blocks (e.g. citations or sources).
        "xpath": ["//blockquote"],  # e.g. <blockquote>...</blockquote>
        "types": ["Quotation"]
    },

    "IGNORE": {
        # Defines content to be excluded from parsing, both at HTML and text level.

        "xpath": ["//pre"],  # Removes <pre> blocks entirely
        "regex": ["#"]    # Removes standalone '#' characters from visible text
                          # (use with care – affects all visible text nodes)
    },

    # You may define a JSON-LD context here to control URI mappings (there is a reasonable default as fallback)
    # "@context": {
    #     "@vocab": "https://semantic-html.org/vocab#",
    #     "same:as": {"@id": "owl:sameAs", "@type": "@id"},
    #     "locator": {"@id": "in_locator", "@type": "@id"},
    #     "structure": {"@id": "in_structure", "@type": "@id"},
    #     "note": {"@id": "in_note", "@type": "@id"}
    # },
    "metadata": { # Each key is added to each JSON object in the @graph
        "prov:wasAttributedTo": {
            "@id": "https://github.com/ch-sander"
        },
        "prov:wasGeneratedBy": "https://github.com/ch-sander/semantic-html/blob/main/tests/Testing.ipynb"
    },
    "wadm": { # additional metadata and context for the WADM JSON-LD
        "metadata": {
            "creator": "https://github.com/ch-sander"
        },
        "@context": "https://www.w3.org/ns/anno.jsonld"
    }        
}

## Parse

In [4]:
result = parse_note(html, 
                    mapping, 
                    rdfa=True,
                    wadm=True,
                    tei=True,
                    remove_empty_tags=True,
                    conll={
                            'blacklist': ['Doc', 'Structure'], 
                            'max_span_tokens':10,
                            'type_whitelist':['Doc']
                            }
                    # note_uri="https://semantic-html.org/note1", # here given as '@id' key in mapping
                    # metadata=metadata # here included in in mapping
                    )

## Results

### JSON-LD

In [8]:
print(json.dumps(result["JSON-LD"], indent=4, ensure_ascii=False))

{
    "@context": {
        "xsd": "http://www.w3.org/2001/XMLSchema#",
        "schema": "http://schema.org/",
        "doco": "http://purl.org/spar/doco/",
        "dcterms": "http://purl.org/dc/terms/",
        "prov": "http://www.w3.org/ns/prov#",
        "owl": "http://www.w3.org/2002/07/owl#",
        "@vocab": "https://semantic-html.org/vocab#",
        "Note": "doco:Document",
        "Structure": "doco:DiscourseElement",
        "Locator": "ex:Locator",
        "Doc": "doco:Section",
        "Annotation": "schema:Comment",
        "Quotation": "doco:BlockQuotation",
        "note": {
            "@id": "inNote",
            "@type": "@id"
        },
        "structure": {
            "@id": "inStructure",
            "@type": "@id"
        },
        "locator": {
            "@id": "hasLocator",
            "@type": "@id"
        },
        "quotation": {
            "@id": "inQuotation",
            "@type": "@id"
        },
        "sameAs": {
            "@id": "owl:sameAs"

#### WADM JSON-LD

In [11]:
print(json.dumps(result["WADM"], indent=4, ensure_ascii=False))

{
    "@context": "https://www.w3.org/ns/anno.jsonld",
    "@graph": [
        {
            "@type": "Annotation",
            "@id": "urn:uuid:0a371e18-2bbc-4238-bd1d-d57ede55dbc4",
            "created": "2025-10-05T13:48:40.650197",
            "motivation": "identifying",
            "target": {
                "source": "urn:uuid:5b876637-c244-46ab-939b-4c3afef24ecb",
                "selector": {
                    "type": "Choice",
                    "items": [
                        {
                            "type": "TextQuoteSelector",
                            "exact": "Header1",
                            "prefix": "",
                            "suffix": "\n\nheader2\n\nheader3\n\np.44\n\ncomm"
                        },
                        {
                            "type": "TextPositionSelector",
                            "start": 0,
                            "end": 7
                        },
                        {
                            "

### RDFa

In [13]:
print(result["RDFa"])

<html xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:schema="http://schema.org/" xmlns:doco="http://purl.org/spar/doco/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:owl="http://www.w3.org/2002/07/owl#" vocab="https://semantic-html.org/vocab#">
    <head>
        </head>
    <body>
        <div class="zotero-notes">
            <div class="zotero-note">
                <h1 typeof="Structure">Header1</h1>
                <h2 typeof="Structure">header2</h2>
                <h3 typeof="Structure">header3</h3>
                <p typeof="Doc"><code typeof="Page-Locator">p.44</code></p>
                <p typeof="Doc">comment related to p. 44 and header 3</p>
                <blockquote typeof="Quotation">
                    <p style="margin-left: 30px;" typeof="Doc">
                        “<em typeof="Annotation Concept">M@áan and Susan</em> <code typeof="Page-Locator">[p. 45]</code>is a <span style="font-style: italic;" typeof="Annotation 

### TEI

In [14]:
print(result["TEI"])

<?xml version='1.0' encoding='utf-8'?>
<tei:TEI xmlns:tei="http://www.tei-c.org/ns/1.0">
  <tei:teiHeader/>
  <tei:text>
    <tei:body>
      <tei:div xml:id="urn:uuid:7937eef9-0352-4d90-b17d-a7bcbf70eca3">
        <tei:head xml:id="urn:uuid:7937eef9-0352-4d90-b17d-a7bcbf70eca3">Header1</tei:head>
        <tei:div xml:id="urn:uuid:9570e9d2-9465-47b1-8ffb-f7605624b6d5">
          <tei:head xml:id="urn:uuid:9570e9d2-9465-47b1-8ffb-f7605624b6d5">header2</tei:head>
          <tei:div xml:id="urn:uuid:6b8adb41-a90a-4f3f-8de3-f3cb75342026">
            <tei:head xml:id="urn:uuid:6b8adb41-a90a-4f3f-8de3-f3cb75342026">header3</tei:head>
            <tei:milestone unit="page" n="p.44" xml:id="urn:uuid:1df3a02d-f593-4806-ab75-cede64335e07"/>
            <tei:milestone unit="page" n="[p. 45]" xml:id="urn:uuid:3c75e2f3-300e-46ef-a80b-70004feb3912"/>
            <tei:quote xml:id="urn:uuid:217a4b87-1c23-4a79-9cb7-5279ba848ffa">“M@áan and Susan [p. 45]is a concept like animal oder humanoid. Paris is

### CoNLL

In [27]:
print(result["CoNLL"])

p	B-Page-Locator
.	I-Page-Locator
44	I-Page-Locator

“	O
M	B-Concept
@	I-Concept
áan	I-Concept
and	O
Susan	B-Concept
[	B-Page-Locator
p	I-Page-Locator
.	I-Page-Locator
45	I-Page-Locator
]	I-Page-Locator
is	O
a	O
concept	B-Concept
like	O
animal	B-Entity_hash
oder	O
humanoid	B-Entity_hash
.	O
Paris	B-Entity
is	O
a	O
city	B-Concept
.	O
that	O
is	O
testified	O
by	O
wikipedia	B-Reference
.	O
”	O

(	B-Reference Zotero
“	I-Reference Zotero
test	I-Reference Zotero
”	I-Reference Zotero
)	I-Reference Zotero

p	B-Page-Locator
.	I-Page-Locator
54	I-Page-Locator

“	O
Woman	B-Concept
is	O
a	O
concept	O
.	O
Rome	B-Entity
is	O
a	O
city	B-Concept
.	O
that	O
is	O
testified	O
by	O
wikipedia	B-Reference
.	O
”	O



# Demo

In [29]:
html = """

<!DOCTYPE html><html><head><meta charset="utf-8"></head><body><div class="zotero-notes"><div class="zotero-note"><p><code>MPSI III, 382-385</code></p>
<p><strong>censorship, Jesuit, class room, pedagogy, astronomy, conservatism, Eucharist, God</strong></p>
<p><code>MPSI III, 382</code></p>
<h1>Decretum R. P. N. Generalis Praepositi Francisci Borgiae in mense novembri 1565</h1>
<blockquote>
<p style="margin-left: 30px;">“1. Nihil defendatur vel doceatur quod adversetur vel deroget vel minus fidei faveat tam in philosophia quam in <strong>theolgia</strong>.</p>
<p style="margin-left: 30px;">2. Nihil defendatur quod sit contra <strong>axiomata</strong> recepta philosophorum; qualia sunt: tantum sunt <strong>quatuor genera causarum</strong>; tantum sunt <strong>quatuor elementa</strong>, et tria principia rerum naturalium. Ignis est <strong>calidus</strong> et <strong>siccus</strong>, aer <strong>humidus</strong> et <strong>calidus</strong> etc.</p>
<p style="margin-left: 30px;">3. Nihil defendatur contra communissimam <strong>philosophorum</strong> aut <strong>theologorum</strong> sententiam, ut quod agentia naturalia agant sine media, etc.</p>
<p style="margin-left: 30px;">4. Nulla opinio defendatur contra communem, inconsulto <strong>superiore</strong> aut praefecto.</p>
<p style="margin-left: 30px;">5. Nulla <strong>nova opinio</strong> in philosophia aut theologia introducatur, inconsulto superiore aut praefecto.”</p>
</blockquote>
<p><code>MPSI III, 382</code></p>
<h2>Opiniones sustenendae a nostrisque docendae ut verae ac tenendae</h2>
<blockquote>
<p style="margin-left: 30px;">“De <strong>Deo</strong>- 1. Deus est infinitae virtutis intensive, et agens liberum secundum veram philosophiam.</p>
<p style="margin-left: 30px;">2. Deus habet <strong>providentiam</strong> omnium inferiorum etiam singularium et humanarum rerum, et cognoscit omnia: <strong>praeterita</strong>, <strong>praesentia</strong> et <strong>futura</strong>, secundum veram philosophiam.</p>
<p style="margin-left: 30px;">De <strong>angelis</strong>- 3. Angeli ponuntur vere in praedicamento, et non sunt purus actus secundum veram philosophiam.</p>
<p style="margin-left: 30px;">4. Angeli sunt in <strong>loco</strong> et moventur localiter, ita ut non sit asserendum, in nullo loco esse aut non moveri, ita ut substantia sit praesens, modo aliquo uni, postea alteri loco.</p>
<p style="margin-left: 30px;">5. <strong>Anima intellectiva</strong> non est assistens, sed vere forma informans secundum <em>Aristotelem</em> et veram philosophiam.</p>
<p style="margin-left: 30px;">6. Anima intellectiva non est una numero in omnibus hominibus, sed in singulis hominibus distincta et propria secundum Aristotelem et veram philosophiam.</p>
<p style="margin-left: 30px;">7. Anima intellectiva est <strong>immortalis</strong> secundum Aristotelem et veram philosophiam.</p>
<p style="margin-left: 30px;">8. <strong>Non sunt plures animae in homine</strong> : intellectiva, sensitiva et vegetativa, secundum Aristotelem et veram philosophiam.</p>
<p style="margin-left: 30px;">9. Anima in homine aut in <strong>bruto</strong> non est <strong>pilis</strong> aut <strong>capillis</strong>.</p>
<p style="margin-left: 30px;">10. <strong>Potentiae sensitivae et vegetativae</strong> in homine aut in bruto non subiectantur in <strong>materia prima</strong> immediate.</p>
<p style="margin-left: 30px;">11. <strong>Humores</strong> aliquo modo sunt partes hominis seu animal is.</p>
<p style="margin-left: 30px;">12. Tota quidditas substantiae compositae non est sola <strong>forma</strong> sed <strong>materia</strong> et forma.[385]</p>
<p style="margin-left: 30px;">13. <strong>Praedicabilia</strong> sunt tantum quinque.</p>
<p style="margin-left: 30px;">14. <strong>Essentia divina</strong> non habet unam subsistentiam communem <strong>tribus personis</strong>, sed tantum tres subsistentias personales.</p>
<p style="margin-left: 30px;">15. <strong>Peccatum</strong> et malum formaliter est privatio et non positivum quid.</p>
<p style="margin-left: 30px;">16. <strong>Praedestinationis</strong> non datur causa ex parte nostra.</p>
<p style="margin-left: 30px;">[17. <strong>Caelum</strong> constare ex materia et forma, est communis, verius et conformius philosophis et theologis; et oppositum non tenetur secundum <em>Aristotelem</em>]</p>
<p style="margin-left: 30px;">Hic ordo praescriptus a praeceptoribus nostris omnino servetur, neque contra propositiones hic scriptas, neque publice neque privatim ullo modo loquantur, neque pietatis neque veritatis neque alterina rei praetextu aliter doceant quam constitutum et definitum est. Haec enim docenda a nostris non solum admonemus sed etiam statuimus.”</p>
</blockquote>
</div></div></body></html>

"""
context = {
    'prov': 'http://www.w3.org/ns/prov#',
    'owl': 'http://www.w3.org/2002/07/owl#',
    '@vocab': 'https://semantic-html.org/vocab#',
    'note': {
        '@id': 'inNote',
        '@type': '@id'

    },
    'structure': {
        '@id': 'inStructure',
        '@type': '@id'

    },
    'locator': {
        '@id': 'hasLocator',
        '@type': '@id'

    },
    'sameAs': {
        '@type': '@id'

    },
    'doc': {
        '@id': 'inDoc',
        '@type': '@id'

    },
    'level': {
        '@id': 'doco:hasLevel',
        '@type': 'xsd:int'

    },
    'generatedAtTime': {
        '@id': 'prov:generatedAtTime',
        '@type': 'xsd:dateTime'

    }

}

mapping = {
    "@type": ["ResearchNote"],
    "@id": "https://semantic-html.org/note1",
    "Document": {
        "xpath": ["//p"],
        "types": ["Paragraph"]
    },

    "Annotation": {

        "Entity": {
            'xpath': ['//em | //i | //span[contains(@style, "italic")]'],
            'split': r"\s*(?:,|\band\b|\+|\&)\s*",
            "types": ["Entity"]
        },
        "Concept": {
            "xpath": ["//strong"],
            "types": ["Concept"],
            'split': r"\s*(?:,|\band\b|\+|\&)\s*"
        },
        "Reference": {
            "xpath": ["//u"],
            "types": ["Reference"]
        }

    },

    "Locator": {
        "xpath": ["//code"],
        "types": ["Page-Locator"]
    },

    "Structure": {
        "xpath": ["//h1", "//h2", "//h3"],
        "types": ["Structure"]
    },

    "Quotation": {
        "xpath": ["//blockquote"],
        "types": ["Quotation"]
    },

    "IGNORE": {
        "xpath": ["//pre"],
        "regex": ["#"] 
    },
    "metadata": { 
        "prov:wasAttributedTo": {
            "@id": "https://github.com/ch-sander"
        },
        "prov:wasGeneratedBy": "https://github.com/ch-sander/semantic-html/blob/main/tests/Testing.ipynb#SCIGMA"
    },
    "wadm": {
        "metadata": {
            "creator": "https://github.com/ch-sander"
        },
        "@context": "https://www.w3.org/ns/anno.jsonld"
    },
    "@context": context
}

result = parse_note(html, 
                    mapping, 
                    rdfa=False,
                    wadm=False,
                    tei=False,
                    conll=False,
                    note_uri="https://semantic-html.org/note2"
                    )

print(json.dumps(result["MAP"], indent=4, ensure_ascii=False))


{
    "@type": [
        "ResearchNote"
    ],
    "@id": "https://semantic-html.org/note1",
    "Document": {
        "xpath": [
            "//p"
        ],
        "types": [
            "Paragraph"
        ]
    },
    "Annotation": {
        "Entity": {
            "xpath": [
                "//em | //i | //span[contains(@style, \"italic\")]"
            ],
            "split": "\\s*(?:,|\\band\\b|\\+|\\&)\\s*",
            "types": [
                "Entity"
            ]
        },
        "Concept": {
            "xpath": [
                "//strong"
            ],
            "types": [
                "Concept"
            ],
            "split": "\\s*(?:,|\\band\\b|\\+|\\&)\\s*"
        },
        "Reference": {
            "xpath": [
                "//u"
            ],
            "types": [
                "Reference"
            ]
        }
    },
    "Locator": {
        "xpath": [
            "//code"
        ],
        "types": [
            "Page-Locator"
        ]
 

In [30]:
print(json.dumps(result["JSON-LD"], indent=4, ensure_ascii=False))

{
    "@context": {
        "prov": "http://www.w3.org/ns/prov#",
        "owl": "http://www.w3.org/2002/07/owl#",
        "@vocab": "https://semantic-html.org/vocab#",
        "note": {
            "@id": "inNote",
            "@type": "@id"
        },
        "structure": {
            "@id": "inStructure",
            "@type": "@id"
        },
        "locator": {
            "@id": "hasLocator",
            "@type": "@id"
        },
        "sameAs": {
            "@type": "@id"
        },
        "doc": {
            "@id": "inDoc",
            "@type": "@id"
        },
        "level": {
            "@id": "doco:hasLevel",
            "@type": "xsd:int"
        },
        "generatedAtTime": {
            "@id": "prov:generatedAtTime",
            "@type": "xsd:dateTime"
        }
    },
    "@graph": [
        {
            "@type": [
                "ResearchNote"
            ],
            "@id": "https://semantic-html.org/note2",
            "generatedAtTime": "2025-10-05T12:2

In [31]:
from pyoxigraph import Store, RdfFormat

In [32]:
store = Store()
store.load(json.dumps(result["JSON-LD"]),format=RdfFormat.JSON_LD)

In [34]:
print(store.dump(format=RdfFormat.TRIG).decode("utf-8"))

<urn:uuid:6fe951cf-ae54-4c18-adfa-50588c1092a4> <http://www.w3.org/ns/prov#wasGeneratedBy> "https://github.com/ch-sander/semantic-html/blob/main/tests/Testing.ipynb#SCIGMA" ;
	<http://www.w3.org/ns/prov#wasAttributedTo> <https://github.com/ch-sander> ;
	<https://semantic-html.org/vocab#inDoc> <https://semantic-html.org/note2> ;
	<https://semantic-html.org/vocab#hasLocator> <urn:uuid:6da78a07-6162-4026-aac8-9296f797c805> ;
	<https://semantic-html.org/vocab#inStructure> <urn:uuid:cce76a6c-88ef-40bd-8b85-aa717fe3a81d> ;
	<https://semantic-html.org/vocab#inNote> <https://semantic-html.org/note2> ;
	<https://semantic-html.org/vocab#treeIndex> 77 ;
	<https://semantic-html.org/vocab#orderIndex> 80 ;
	<https://semantic-html.org/vocab#text> "Hic ordo praescriptus a praeceptoribus nostris omnino servetur, neque contra propositiones hic scriptas, neque publice neque privatim ullo modo loquantur, neque pietatis neque veritatis neque alterina rei praetextu aliter doceant quam constitutum et definit

# NER

In [None]:

from semantic_html.parser import project_entities_to_html

# Test HTML and entities
html_input = """
<div class="zotero-note">
    <p>Susan and Paris are mentioned together.</p>
</div>
"""

entities = [
    {"start": 0, "end": 5, "label": "PERSON", "text": "Susan"},
    {"start": 10, "end": 15, "label": "LOC", "text": "Paris"},
]

label2tag = {
    "PERSON": "i",
    "LOC": "strong"
}

output_html = project_entities_to_html(html_input, entities, label2tag)
output_html


In [None]:
from semantic_html.plugins import SpacyNERWrapper

In [None]:
wrapper = SpacyNERWrapper()

In [None]:
conll_data = """Susan   B-PERSON
and     O
Paris   B-LOC
are     O
mentioned   O
together    O
.       O
"""

wrapper.train(conll_data, n_iter=5, resume=True)

In [None]:
plain_text = "Susan and Paris are mentioned together."

label2md = {
    "PERSON": ("*", "*"),   # kursiv für Person
    "LOC": ("`", "`")       # code-style für Location
}

markdown_result = wrapper.predict_markdown(plain_text, label2md)
print(markdown_result)

In [None]:
html_input = """
<div class="zotero-note">
    <p>Susanna and Paris are mentioned together.</p>
</div>
"""

label2tag = {
    "PERSON": "i",
    "LOC": "span"
}

html_result = wrapper.predict_html(html_input, label2tag)
print(html_result)