-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
3 changed files
with
317 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
<!DOCTYPE html> | ||
<html lang="zh"> | ||
|
||
<head> | ||
<meta charset="utf-8"> | ||
<title>grid test</title> | ||
<style> | ||
body { | ||
margin: 0; | ||
font-family: "Kaiti SC", serif; | ||
/* writing-mode: vertical-rl; */ | ||
} | ||
|
||
.anno { | ||
color: red; | ||
font-size: .5rem; | ||
display: inline-grid; | ||
grid-auto-rows: 1fr 1fr; | ||
vertical-align: middle; | ||
} | ||
|
||
.col { | ||
display: flex; | ||
} | ||
|
||
.char { | ||
vertical-align: middle; | ||
} | ||
</style> | ||
</head> | ||
|
||
<body> | ||
<h1>Title</h1> | ||
<p> | ||
<span class="char">徐</span> | ||
<span class="char">无</span> | ||
<span class="char">鬼</span> | ||
<span class="char">第</span> | ||
<span class="char">二</span> | ||
<span class="char">十</span> | ||
<span class="anno"> | ||
<span class="col"> | ||
<span class="char">以</span> | ||
<span class="char">義</span> | ||
</span> | ||
<span class="col"> | ||
<span class="char">名</span> | ||
<span class="char">篇</span> | ||
</span> | ||
</span> | ||
<span class="char">四</span> | ||
<span class="char">徐</span> | ||
<span class="char">无</span> | ||
<span class="char">鬼</span> | ||
<span class="char">因</span> | ||
<span class="char">女</span> | ||
<span class="char">商</span> | ||
<span class="char">見</span> | ||
<span class="char">魏</span> | ||
<span class="char">武</span> | ||
<span class="char">侯</span> | ||
<span class="char">,</span> | ||
<span class="char">武</span> | ||
<span class="char">侯</span> | ||
<span class="char">勞</span> | ||
<span class="char">之</span> | ||
<span class="char">曰</span> | ||
<span class="char">:</span> | ||
<span class="char">「</span> | ||
<span class="char">先</span> | ||
<span class="char">生</span> | ||
<span class="char">病</span> | ||
<span class="char">矣</span> | ||
<span class="char">!</span> | ||
<span class="char">苦</span> | ||
<span class="char">於</span> | ||
<span class="char">山</span> | ||
<span class="char">林</span> | ||
<span class="char">之</span> | ||
<span class="char">勞</span> | ||
<span class="char">,</span> | ||
<span class="anno"> | ||
<span class="col"> | ||
<span class="char">音</span> | ||
</span> | ||
<span class="col"> | ||
<span class="char">烏</span> | ||
</span> | ||
</span> | ||
<span class="char">故</span> | ||
<span class="char">乃</span> | ||
<span class="char">肯</span> | ||
<span class="char">見</span> | ||
<span class="char">於</span> | ||
<span class="char">寡</span> | ||
<span class="char">人</span> | ||
<span class="char">。</span> | ||
<span class="char">」</span> | ||
<span class="char">徐</span> | ||
<span class="char">无</span> | ||
<span class="char">鬼</span> | ||
<span class="char">曰</span> | ||
<span class="char">:</span> | ||
<span class="char">「</span> | ||
<span class="char">我</span> | ||
<span class="char">則</span> | ||
<span class="char">勞</span> | ||
<span class="char">於</span> | ||
<span class="char">君</span> | ||
<span class="char">,</span> | ||
<span class="char">君</span> | ||
<span class="char">有</span> | ||
<span class="char">何</span> | ||
<span class="char">勞</span> | ||
<span class="char">於</span> | ||
<span class="char">我</span> | ||
<span class="char">!</span> | ||
<span class="char">君</span> | ||
<span class="char">將</span> | ||
<span class="char">盈</span> | ||
<span class="char">嗜</span> | ||
<span class="char">欲</span> | ||
<span class="char">,</span> | ||
<span class="char">長</span> | ||
<span class="char">好</span> | ||
<span class="char">惡</span> | ||
<span class="char">,</span> | ||
<span class="char">則</span> | ||
<span class="char">性</span> | ||
<span class="char">命</span> | ||
<span class="char">之</span> | ||
<span class="char">情</span> | ||
<span class="char">病</span> | ||
<span class="anno"> | ||
<span class="col"> | ||
<span class="char">力</span> | ||
<span class="char">智</span> | ||
<span class="char">反</span> | ||
<span class="char">下</span> | ||
<span class="char">注</span> | ||
<span class="char">不</span> | ||
<span class="char">離</span> | ||
</span> | ||
<span class="col"> | ||
<span class="char">離</span> | ||
<span class="char">性</span> | ||
<span class="char">下</span> | ||
<span class="char">章</span> | ||
<span class="char">離</span> | ||
<span class="char">於</span> | ||
<span class="char">同</span> | ||
</span> | ||
</span> | ||
<span class="char">矣</span> | ||
<span class="char">;</span> | ||
<span class="char">君</span> | ||
<span class="char">將</span> | ||
<span class="char">黜</span> | ||
<span class="char">嗜</span> | ||
<span class="char">欲</span> | ||
<span class="char">,</span> | ||
<span class="char">掔</span> | ||
<span class="char">好</span> | ||
<span class="char">惡</span> | ||
<span class="char">,</span> | ||
<span class="char">則</span> | ||
<span class="char">耳</span> | ||
<span class="char">目</span> | ||
<span class="char">病</span> | ||
<span class="char">矣</span> | ||
<span class="char">。</span> | ||
</p> | ||
</body> | ||
|
||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
.doc { | ||
font-family: "Kaiti SC", serif; | ||
writing-mode: vertical-rl; | ||
} | ||
|
||
.anno { | ||
color: red; | ||
font-size: .5rem; | ||
width: 1rem; | ||
display: inline-flex; | ||
flex-direction: column; | ||
} | ||
|
||
.col { | ||
width: .5rem; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
from typing import Any, Iterable, Dict, Optional | ||
from pathlib import Path | ||
|
||
from spacy.util import minify_html | ||
|
||
from lib.documents import KanripoDoc | ||
|
||
_html = {} | ||
_css = (Path(__file__).parent / "style.css").read_text(encoding="utf-8") | ||
|
||
|
||
def render( | ||
docs: Iterable[KanripoDoc] | KanripoDoc, | ||
style: str = "anno", | ||
page: bool = False, | ||
minify: bool = False, | ||
options: Dict[str, Any] = {}, | ||
) -> str: | ||
factories = {"anno": AnnotationRenderer} | ||
if style not in factories: | ||
raise ValueError(f"Unknown euphoNy style: {style}.") | ||
if isinstance(docs, KanripoDoc): | ||
docs = [docs] | ||
renderer = factories[style](options) | ||
_html["parsed"] = renderer.render(docs, page=page, minify=minify).strip() | ||
html = _html["parsed"] | ||
return html | ||
|
||
|
||
def serve( | ||
docs: Iterable[KanripoDoc] | KanripoDoc, | ||
style: str = "anno", | ||
page: bool = False, | ||
minify: bool = False, | ||
options: Dict[str, Any] = {}, | ||
port: int = 8000, | ||
host: str = "0.0.0.0", | ||
) -> None: | ||
from wsgiref import simple_server | ||
|
||
render(docs, style=style, page=page, minify=minify, options=options) | ||
with simple_server.make_server(host, port, app) as httpd: | ||
print(f"Using the '{style}' visualizer.") | ||
print(f"Starting server on http://{host}:{port}...") | ||
httpd.serve_forever() | ||
|
||
|
||
def app(environ, start_response): | ||
headers = [("Content-type", "text/html; charset=utf-8")] | ||
start_response("200 OK", headers) | ||
res = _html["parsed"].encode(encoding="utf-8") | ||
return [res] | ||
|
||
|
||
class AnnotationRenderer: | ||
"""Render a Doc with its associated annotations as HTML.""" | ||
|
||
style = "anno" | ||
|
||
def __init__(self, options: Dict[str, Any] = {}) -> None: | ||
pass | ||
|
||
def render( | ||
self, docs: Iterable[KanripoDoc], page: bool = False, minify: bool = False | ||
) -> str: | ||
rendered = [ | ||
self.render_doc( | ||
doc.text, doc.meta.get("annotations", {}), doc.meta.get("title", "") | ||
) | ||
for doc in docs | ||
] | ||
if page: | ||
markup = TPL_PAGE.format(style=_css, content="".join(rendered)) | ||
else: | ||
markup = "".join(rendered) | ||
if minify: | ||
return minify_html(markup) | ||
return markup | ||
|
||
def render_doc( | ||
self, text: str, annotations: Dict[int, str], title: Optional[str] | ||
) -> str: | ||
output = "" | ||
for i, char in enumerate(text): | ||
output += TPL_CHAR.format(content=char, cls="") | ||
if i in annotations: | ||
output += self.render_annotation(annotations[i]) | ||
return TPL_DOC.format(content=output, title=title) | ||
|
||
def render_annotation(self, annotation: str) -> str: | ||
middle = len(annotation) // 2 + (len(annotation) % 2) | ||
right, left = annotation[:middle], annotation[middle:] | ||
fmt_right = "".join([TPL_CHAR.format(content=char) for char in right]) | ||
fmt_left = "".join([TPL_CHAR.format(content=char) for char in left]) | ||
return TPL_ANNOTATION.format(right=fmt_right, left=fmt_left) | ||
|
||
|
||
TPL_PAGE = """ | ||
<!DOCTYPE html> | ||
<html lang="zh"> | ||
<head> | ||
<meta charset="utf-8" /> | ||
<title>euphoNy</title> | ||
<style>{style}</style> | ||
</head> | ||
<body>{content}</body> | ||
</html> | ||
""".strip() | ||
|
||
TPL_DOC = """ | ||
<article class="doc"> | ||
<h1>{title}</h1> | ||
<p>{content}</p> | ||
</article> | ||
""".strip() | ||
|
||
TPL_ANNOTATION = """ | ||
<span class="anno"> | ||
<span class="col">{right}</span> | ||
<span class="col">{left}</span> | ||
</span> | ||
""".strip() | ||
|
||
TPL_CHAR = """ | ||
<span class="char">{content}</span> | ||
""".strip() |