# Finding math that breaks **sympy.parsing.latex.parse_latex**

Based on some [feedback on this PR](https://github.com/sympy/sympy/pull/13706#issuecomment-359944477), 
let's look at some semi-automated ways to test $\LaTeX$ parsing in `sympy`. 

Roughly, we'll:
- Generate some basic [`hypothesis`](http://hypothesis.works/) strategies for expressions from the `sympy` code base
- Generate some more complex strategies with some custom code
- Test ability to even print with `sympy.printing.latex.latex`
- Test with sources of truth
- Find some examples that break current parsing behavior!
- Bonus: some other strategies

In [None]:
import operator as ops
from tempfile import mkdtemp
import subprocess
import os
import shutil
import re

import attr

from hypothesis import given, assume, settings, strategies as st
from IPython import display

import sympy as S
from sympy.parsing.latex import parse_latex

In [None]:
parse_latex("A")

In [None]:
!rm -rf .hypothesis/

In [None]:
def examples(*strategies):
    try:
        for strategy in strategies:
            strat = strategy
            try: strat = strategy()
            except: pass
            print(strategy)
            print({strat.example() for i in range(10)})
    except Exception as err:
        print(err)

In [None]:
examples(st.text)

## Some Basic Strategies
The simple strategies, `sampled_from` and `from_regex` provide a lot
of value when we know a fair amount about the structure.

In [None]:
numeric_expressions = None

In [None]:
variable_names = st.from_regex(re.compile(r"\A[a-z]\Z", re.IGNORECASE))
examples(variable_names)

In [None]:
_numeric_unary_ops = st.sampled_from([
    S.sin, S.tan, S.cos, S.acos, S.sec, S.acos, S.atan, S.asec, S.Abs,
    S.sqrt, S.factorial,
])
examples(_numeric_unary_ops)

In [None]:
_numeric_binary_ops = st.sampled_from([
    ops.add, ops.sub, ops.mul, ops.pow, ops.truediv,
    S.root, S.Derivative
])
examples(_numeric_binary_ops)

In [None]:
_numeric_trinary_ops = st.sampled_from([
    S.Limit,
])
examples(_numeric_trinary_ops)

In [None]:
_comparators = st.sampled_from([
    ops.gt, ops.ge, ops.lt, ops.le, ops.eq, ops.ne
])
examples(_comparators)

In [None]:
@st.composite
def symbols(draw, name=variable_names):
    return S.Symbol(draw(name))
examples(symbols)

In [None]:
@st.composite
def floats(draw):
    f = draw(st.floats())
    prec = draw(st.integers())
    try:
        return S.Float(f, prec)
    except:
        assume(False)
examples(floats)

## Simple functions

In [None]:
@st.composite
def numeric_unary_expressions(draw):
    op = draw(_numeric_binary_ops)
    a = draw(numeric_expressions)
    
    try:
        return op(a)
    except:
        assume(False)

In [None]:
examples(numeric_unary_expressions)

## Common two-argument functions

In [None]:
@st.composite
def numeric_binary_expressions(draw):
    op = draw(_numeric_binary_ops)
    a = draw(numeric_expressions)
    b = draw(numeric_expressions)
    
    try:
        return op(a, b, c)
    except:
        assume(False)

In [None]:
examples(numeric_binary_expressions)

## Three-argument functions

In [None]:
@st.composite
def numeric_trinary_expressions(draw):
    expr = None
    op = draw(_numeric_trinary_ops)
    a = draw(numeric_expressions)
    b = draw(numeric_expressions)
    c = draw(numeric_expressions)
    
    try:
        return op(a, b, c)
    except:
        assume(False)

In [None]:
examples(numeric_trinary_expressions)

## The `numeric_expressions`
This is a high-level representation of a number-y thing, and is used frequently above. You can go back and try all of the examples that failed for various reasons

In [None]:
numeric_expressions = (
    floats() 
    | symbols() 
    | numeric_unary_expressions() 
    | numeric_binary_expressions()
    | numeric_trinary_expressions()
)

## Relational expressions

In [None]:
@st.composite
def relational_expressions(draw):
    expr = None
    try:
        expr = draw(_comparators)(draw(numeric_expressions), draw(numeric_expressions))
    except:
        pass
    assume(expr is not None)
    
    return expr

In [None]:
examples(relational_expressions)

## Expressions that don't fail **sympy.printing.latex.latex**

The maturity of `sympy.printing.latex.latex` should be considered close to an (opinionated)
production-grade typesetting approach.

If it can't work with what we've built, we probably don't care to handle it yet.

We also don't care about empty strings, for the time being.

Note that this returns the `latex_str` so we don't have to recalculate it later.

In [None]:
@st.composite
def latex_printable_expressions(draw):
    expr = draw(numeric_expressions | relational_expressions())
    
    latex_str = None
    try:
        latex_str = S.latex(expr)
    except Exception as err:
        pass
    assume(latex_str is not None)
    return (expr, latex_str)

In [None]:
examples(latex_printable_expressions)

## Expressions that don't fail "real" `latex`
The ultimate source of truth in $\LaTeX$ parsing is a canonical `latex` distribution like `pdflatex` or `xelatex`. 
Because we can easily `display` PDF in Jupyter, we can use this, along with the `MathJax` representation 
for multiple verifications.

In [None]:
class PointlessPDFWrapper(object):
    def __init__(self, pdf):
        self._pdf = pdf
    def _repr_pdf_(self):
        return self._pdf

We need a very basic $\LaTeX$ document in order to work with the command line tools.

In [None]:
LATEX_DOC = r"""
\documentclass[a4paper]{article}
 
\begin{document}
$$
%s
$$
\end{document}
"""

In [None]:
@st.composite
def typesettable_expressions(draw):
    expr, latex_str = draw(latex_printable_expressions())

    tmpdir = mkdtemp()
    tmp_tex_path = os.path.join(tmpdir, "expr.tex")
    tmp_pdf_path = os.path.join(tmpdir, "expr.pdf")

    success = None
    pdf = None
    try:
        with open(tmp_tex_path, "w+") as fp:
            fp.write(LATEX_DOC % latex_str)
        subprocess.check_call([
            "pdflatex",
            tmp_tex_path
        ], cwd=tmpdir)
        with open(tmp_pdf_path, "rb") as fp:
            pdf = PointlessPDFWrapper(fp.read())
        success = True
    except Exception as err:
        pass
    finally:
        shutil.rmtree(tmpdir)
    
    assume(success)

    return expr, latex_str, pdf

In [None]:
examples(typesettable_expressions)

## The Test

In [None]:
@given(typesettable_expressions())
def test_latex_roundtrip(expr_latex_pdf):
    expr, latex_str, pdf = expr_latex_pdf
    expr_parsed = None
    err = None
    assume(expr)
    try:
        expr_parsed = parse_latex(latex_str)
    except Exception as err:
        pass
    if expr == expr_parsed:
        return
    raise ValueError([expr, expr_parsed, latex_str, pdf])

## Run the Test

In [None]:
try:
    test_latex_roundtrip()
except ValueError as err:
    expr, expr_parsed, latex_str, pdf = err.args[0]
    print(expr)
    for k, ex in {"expr": expr, "parsed": expr_parsed}.items():
        display.display(display.Markdown(f"### {k}"))
        print(ex.__class__.__mro__)
        print(ex)
    display.display(display.Markdown("### LaTeX Source\n```latex\n%s\n```" % latex_str))
    display.display(display.Latex("$$ %s $$" % latex_str))
    display.display(pdf)

### Having a look at `expr` & `parsed`

In [None]:
types = list(map(type, [expr, expr_parsed]))
print(types)
assert len(set(types)) == 1, "they are not the same type"

In [None]:
assert expr == expr_parsed, "they're not equal"

In [None]:
expr_parsed.atoms()

In [None]:
expr.atoms()

In [None]:
S.parsing.latex.parse_latex("A")

# Sir Not-Appearing-In-This-Tool

Not using this for anything yet, but this will generate expressions that don't fail `.simplify`.

In [None]:
@st.composite
def simplifiable_expressions(draw):
    expr = draw(numeric_expressions)
    
    success = None
    try:
        expr.simplify()
        success = True
    except Exception as err:
        pass
    finally:
        assume(success)
    return expr