# helper.path_accepted_string

In [None]:
#| default_exp helper.path_accepted_string

In [None]:
#| export
import re

from pathvalidate import sanitize_filename

In [None]:
from pathvalidate import validate_filename

In [None]:
#| export
CHARACTER_ORDERING_LIST =\
    ['A', 'a', r'\Alpha', r'\alpha', 'B', 'b', r'\Beta', r'\beta', 'C', 'c', r'\Gamma',
     r'\gamma', 'D', 'd', r'\Delta', r'\delta', 'E', 'e', r'\Epsilon', r'\epsilon',
     'F', 'f', 'G', 'g', 'H', 'h', r'\Eta', r'\eta', 'I', 'i', r'\Iota', r'\iota',
     'J', 'j', 'K', 'k', r'\Kappa', r'\kappa', 'L', 'l', r'\Lambda', r'\lambda', 'M',
     'm', r'\Mu', r'\mu', 'N', 'n', r'\Nu', r'\nu', 'O', 'o', r'\Omicron', r'\omicron'
     'P', 'p', r'\Pi', r'\pi', r'\Phi', r'\phi', r'\Psi', r'\psi', 'Q', 'q', 'R', 'r', 
     r'\Rho', r'\rho', 'S', 's', r'\Sigma', r'\sigma', 'T', 't', r'\Theta', r'\theta',
     r'\Tau', r'\tau', 'U', 'u', r'\Upsilon', r'\upsilon', 'V', 'v', 'W', 'w', r'\Omega', r'\omega',
     'X', 'x', r'\Chi', r'\chi', 'Y', 'y', 'Z', 'z', r'\Zeta', r'\zeta', '*', r'\bullet']
DECORATING_CHARACTERS =\
    [r'\tilde', r'\hat', r'\overline', r'\bar', r'\mathscr', r'\mathcal',
     r'\mathfrak', r'\\operatorname', r'\\text', r'\\bf']
NONEFFECTIVE_CHARACTERS =\
    ['^', '_', '{', '}', '(', ')', '[', ']']

In [None]:
#| export
TO_REMOVE = [
    '.', '$', ':', '?', '!', '#', '%', '&',
    '<', '>', '*', '?', '"', '@', '`', '|',  
    'mathscr', 'mathbf', 'mathrm', 'mathfrak', 'mathcal', 'mathbb', 'operatorname',
    'boldsymbol', 'bf',
    'text', 'begin', 'end', 'equation' , 'aligned', 'array', 'pmatrix', 'bmatrix',
    'quad', 'longrightarrow', 'rightarrow', 'left', 'right', 'longmapsto', 'mapsto',
    'stackrel']
TO_UNDERSCORE = [' ', '-', '^', '(', ',', '/', '{', '}', '[', ']', '(', ')', '\\', '=',]
TO_SUBSTITUTE = {
    '*': 'star',
    '+': 'plus',
    'leqslant': 'leq',
    'geqslant': 'geq',
    '\'': '_prime'
}

# TODO: make a universal latex to path string; it seems that latex.convert
# might do something different when naming files.

def latex_to_path_accepted_string(latex: str) -> str:
    """Convert a latex string to a path accepted string
    """
    for to_underscore in TO_UNDERSCORE:
        latex, _ = re.subn(re.escape(to_underscore), '_', latex)
    for symbol_to_substitute, substitute_with in TO_SUBSTITUTE.items():
        latex, _ = re.subn(re.escape(symbol_to_substitute), substitute_with, latex)
    for to_remove in TO_REMOVE:
        latex, _ = re.subn(re.escape(to_remove), '', latex)
    latex, _ = re.subn('_+', '_', latex)
    latex, _ = re.subn('^_', '', latex)
    latex, _ = re.subn('_$', '', latex)
    latex = sanitize_filename(latex)
    return latex

In [None]:
#| hide
sample_1 = r'{ }^* \mathscr{R}_{\Lambda}'
output_1 = latex_to_path_accepted_string(sample_1)
print(output_1)
validate_filename(output_1)

sample_2 = r'\\left(d_1, d_2\\right)'
output_2 = latex_to_path_accepted_string(sample_2)
print(output_2)
validate_filename(output_2)

sample_3 = r'\left(\Delta_q(n)\right)_0 \leqslant n \leqslant q-1'
output_3 = latex_to_path_accepted_string(sample_3)
print(output_3)
validate_filename(output_3)

sample_4 = r"A'"
output_4 = latex_to_path_accepted_string(sample_4)
print(output_4)
validate_filename(output_4)

sample_4 = r"A = \prod"
output_4 = latex_to_path_accepted_string(sample_4)
print(output_4)
validate_filename(output_4)

sample_5 = r"\begin{aligned}  & F_p: \quad \mathbf{G} \longrightarrow \mathbf{G} \\  & \left(\begin{array}{ll}  a & b \\  c & d  \end{array}\right) \longmapsto\left(\begin{array}{ll}  a^p & b^p \\  c^p & d^p  \end{array}\right) \\  &  \end{aligned}"
output_5 = latex_to_path_accepted_string(sample_5)
print(output_5)
validate_filename(output_5)

sample_6 = r"\boldsymbol{\Delta}_{m, n}=[\Delta(n): L(m)]"
output_6 = latex_to_path_accepted_string(sample_6)
print(output_6)
validate_filename(output_6)

sample_7 = r"$\mathbb{F}_q^{+} \stackrel{\chi+}{\longrightarrow} \mathscr{O}^{\times} \longrightarrow k^{\times}$"
output_7 = latex_to_path_accepted_string(sample_7)
print(output_7)
validate_filename(output_7)

sample_8 = r"$\sqrt{\alpha_0(-1) q}=\sum_{z \in \mathbb{F}_q^{\times}} \alpha_0(z) \chi_{+}(z)$"
output_8 = latex_to_path_accepted_string(sample_8)
print(output_8)
validate_filename(output_8)

sample_9 = r"$$\begin{equation} \label{escape rate} 	G_{F_t}(z,w) = \lim_{n\to\infty} \frac{1}{d^n} \log \| F_t^n(z,w) \|, \end{equation}$$"
output_9 = latex_to_path_accepted_string(sample_9)
print(output_9)
validate_filename(output_9)
validate_filename(output_9)

star_R_Lambda
d_1_d_2
Delta_q_n_0_leq_n_leq_q_1
A_prime
A_prod
F_p_G_G_ll_a_b_c_d_ll_a_p_b_p_c_p_d_p
Delta_m_n_Delta_n_L_m
F_q_plus_chiplus_O_times_k_times
sqrt_alpha_0_1_q_sum_z_in_F_q_times_alpha_0_z_chi_plus_z
label_escape_rate_G_F_t_z_w_lim_n_to_infty_frac_1_d_n_log_F_t_n_z_w


The `latex_to_path_accepted_string` function "cleans" a latex str into a path-valid string for the purposes of making files.

In [None]:
sample_1 = r'\mathcal{O}_X'
output_1 = latex_to_path_accepted_string(sample_1)
print(output_1)
assert 'O' in output_1 and 'X' in output_1
validate_filename(output_1)

sample_2 = r'\operatorname{Gal}(L/K)'
output_2 = latex_to_path_accepted_string(sample_2)
print(output_2)
assert 'Gal' in output_2 and 'L' in output_2 and 'K' in output_2
validate_filename(output_2)

# Example found in https://arxiv.org/abs/1607.04471
sample_3 = r'\begin{equation} \label{escape rate} 	G_{F_t}(z,w) = \lim_{n\to\infty} \frac{1}{d^n} \log \| F_t^n(z,w) \|, \end{equation}'
output_3 = latex_to_path_accepted_string(sample_3)
print(output_3)
validate_filename(output_3)


O_X
Gal_L_K
label_escape_rate_G_F_t_z_w_lim_n_to_infty_frac_1_d_n_log_F_t_n_z_w
