# Translate tex to html

This is an ad-hoc code.  Don't use it for general purpose.

In [102]:
from itertools import dropwhile
from pathlib import Path
import re
from typing import Iterable, Iterator, TextIO

In [17]:
FOLDER = Path()
tex = FOLDER / 'HHn.tex'
html = FOLDER / 'HHn.html'

In [4]:
tex_file = open(tex, encoding='utf-8')

In [5]:
def _in_preamble(line: str) -> bool:
    return not line.startswith('\\begin{document}')

In [8]:
document = dropwhile(_in_preamble, tex_file)

In [93]:
next(document)

'\n'

In [202]:
_HTML_HEAD = '''\
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="description" content="Quaternionic hyperbolic spaces">
<meta name="keywords" content="quaternions, projective spaces, hyperbolic spaces, sympletic groups">
<meta name="author" content="J. Carlos Diaz Ramos">
<meta name="viewport" content="width=device-width, initial-scale=1">
<script async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
<script>
(function() {
    window.MathJax = {
        tex: {
            inlineMath: [['$', '$'], ['\\(', '\\)']],
            processEscapes: true,
            macros: {
                "R": "{\\\\mathbb{R}}",
                "C": "{\\\\mathbb{C}}",
                "H": "{\\\\mathbb{H}}",
                "id": "{\\\\operatorname{id}}",
                "Exp": "{\\\\operatorname{Exp}}",
                "tr": "{\\\\operatorname{tr}}",
                "Ad": "{\\\\operatorname{Ad}}",
                "ad": "{\\\\operatorname{ad}}",
                "Re": "{\\\\operatorname{Re}\\\\,}",
                "Im": "{\\\\operatorname{Im}\\\\,}",
                "g": ["\\\\mathfrak{#1}", 1],
                "Sp": "\\\\mathsf{Sp}"
            }
        }
    }
})();
</script>
<style>
    body {
        max-width: 800px;
        margin: 0 auto;
        background: lightgrey;
    }
    main {
        background: white;
    }
</style>
'''

In [203]:
_HTML_END = '''\
</body>
</html>
'''

In [204]:
def _get_title(document: Iterable[str]) -> str:
    for line in document:
        if line.startswith('\\title{'):
            return re.match(r'\\title\{(.+)}', line).group(1)

In [205]:
def _get_abstract(document: Iterable[str]) -> str:
    abstract = ''
    for line in document:
        if line.startswith(r'\begin{abstract}'):
            while True:
                text = next(document)
                if text.startswith(r'\end{abstract}'):
                    return abstract
                abstract += text


In [206]:
def _process_abstract(document: Iterable[str]) -> Iterator[str]:
    abstract = _get_abstract(document)
    yield '<div class="abstract">\n'
    yield f'<p><strong>Abstract</strong>. {abstract}</p>\n'
    yield '</div>\n\n'
    for line in document:
        if line.startswith(r'\maketitle'):
            return

In [207]:
def _process_document(document: Iterable[str]) -> None:
    newline = True
    for line in document:
        if not line.strip():
            if not newline:
                yield '</p>\n\n'
            newline = True
        elif line.startswith(r'\section{'):
            section = re.match(r'\\section\{(.+?)}', line).group(1)
            yield f'<h2>{section}</h2>\n\n'
            newline = True
        else:
            if newline:
                yield '<p>'
                newline = False
            yield line

In [208]:
def translate(tex_file: TextIO, html_file: TextIO) -> None:
    html_file.write(_HTML_HEAD)
    document = dropwhile(_in_preamble, tex_file)
    title = _get_title(document)
    html_file.write(f'<title>{title}</title>\n')
    html_file.write('</head>\n\n<body>\n')
    html_file.write(f'<h1>{title}</h1>\n\n')
    html_file.writelines(_process_abstract(document))
    html_file.writelines(_process_document(document))
    html_file.write(_HTML_END)

In [209]:
def tex_to_html(tex: Path, html: Path) -> None:
    with (
        tex.open(encoding='utf-8') as tex_file, 
        html.open('w', encoding='utf-8') as html_file
    ):
        translate(tex_file, html_file)

In [210]:
tex_to_html(tex, html)