# Testing

In [1]:
# Install requirements if needed
# !pip install beautifulsoup4 lxml

# 1. Import your parser
import sys
import yaml
import json
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

from HTML_rdf_parser.parser import parse_note

# 2. Define a small test HTML string
html = """
<!DOCTYPE html><html><head><meta charset="utf-8"></head><body><div class="zotero-notes"><div class="zotero-note"><h1>Header1</h1>
<h2>header2</h2>
<h3>header3</h3>
<p><code>p.44</code></p>
<p>comment related to p. 44 and header 3</p>
<blockquote>
<p style="margin-left: 30px;">“<em>Man</em> is a concept. <strong><a href="wd:Paris" rel="noopener noreferrer nofollow">Paris</a></strong> is a <em>city</em>. that is testified by <u>wikipedia</u>.”</p>
</blockquote>
<p><span class="citation">(<a href="zotero://select/library/items/KUHYUVEH">“test”</a>)</span></p>
<h3>header4</h3>
<p><code>p.54</code></p>
<p>comment related to p. 54 and header 4</p>
<blockquote>
<p style="margin-left: 30px;">“<em>Woman </em>is a concept. <strong><a href="wd:Rome" rel="noopener noreferrer nofollow">Rome</a> </strong>is a <em>city</em>. that is testified by <u>wikipedia</u>.”</p>
</blockquote>
<pre>This will be ignored <em>Man22</em></pre>
</div></div></body></html>
"""

# 3. Define your mapping
mapping = {
    "Note": {
        "types": ["Note"]
    },
    "Document": {
        "tags": ["p"],
        "types": ["Doc"]
    },
    "Annotation": {
        "Concept": {
            "tags": ["em"],
            "styles": ["font-style:italic"],
            "types": ["Annotation","Concept"]
        },
        "Entity": {
            "tags": ["strong"],
            "types": ["Entity"]
        },
        "Reference": {
            "tags": ["u"],
            "types": ["Reference"]
        }
    },
    "Locator": {
        "tags": ["code"],
        "types": ["Page-Locator"]
    },
    "Structure": {
        "tags": ["h1", "h2", "h3"],
        "types": ["Structure"]
    },
    "Quotation": {
        "tags": ["blockquote"],
        "types": ["Quotation"]
    },
    "IGNORE": {
        "tags": ["pre"]
    },
    "@context": {
        "@vocab": "https://semantic-html.org/vocab#",
        "same:as": {"@id": "owl:sameAs", "@type": "@id"},
        "locator": {"@id": "in_locator", "@type": "@id"},
        "structure": {"@id": "in_structure", "@type": "@id"},
        "note": {"@id": "in_note", "@type": "@id"}
        },
    "@type": ["ResearchNote"]
}

# 4. Parse the note
result = parse_note(html, mapping, return_annotated_html=True, note_uri="https://semantic-html.org/note1")

# 5. Show JSON-LD result
print("=== JSON-LD ===")
print(json.dumps(result["jsonld"], indent=4, ensure_ascii=False))

# 6. Show annotated HTML
print("\n=== Annotated HTML ===")
print(result["RDFa"])

# 7. Show YAML
print("\n=== YAML ===")
print(yaml.dump(result["jsonld"], allow_unicode=True, default_flow_style=False, Dumper=yaml.Dumper))


=== JSON-LD ===
{
    "@graph": [
        {
            "@id": "https://semantic-html.org/note1",
            "@type": [
                "ResearchNote"
            ],
            "text": {
                "@value": "Header1\nheader2\nheader3\np.44\ncomment related to p. 44 and header 3\n\n“Man is a concept. Paris is a city. that is testified by wikipedia.”\n\n(“test”)\nheader4\np.54\ncomment related to p. 54 and header 4\n\n“Woman is a concept. Rome is a city. that is testified by wikipedia.”",
                "@type": "xsd:string"
            }
        },
        {
            "@id": "urn:uuid:40f26a37-2df9-4389-9bc0-c6fab832014a",
            "@type": [
                "Structure"
            ],
            "text": {
                "@value": "Header1",
                "@type": "xsd:string"
            },
            "note": {
                "@id": "https://semantic-html.org/note1"
            },
            "level": {
                "@value": 1,
                "@type": "xsd:int"
