# 00_file

> File input

In [None]:
#| default_exp file

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from collections.abc import Iterable, Sequence
import re

TeX hyphenation patterns look like this:

```
% this is a comment
\patterns{
pattern  % could have comments on lines
another
maybe many words per line}  % that brace ends the patterns
\hyphenation{ % and now the exceptions
word
anoth-er
}
```

In [None]:
#|export
def read_patterns(
    lines: Iterable[str] # e.g. open('hyph-fi.tex').readlines()
) -> tuple[Sequence[str], Sequence[str]]: # patterns and exceptions
    """Read a TeX hyphenation pattern file; not a complete parser but hopefully enough
    for hyph-*.tex"""
    comment = re.compile(r' *%.*')
    state = 0
    patterns = []
    exceptions = []
    for line in lines:
        line = line.strip()
        line = comment.sub('', line)
        nextstate = state
        if line == r'\patterns{':
            state = 1
            continue
        if line == r'\hyphenation{':
            state = 2
            continue
        if '}' in line:
            line = re.sub(r'}.*', '', line)
            nextstate = 0
        if not line:
            continue           
        if state == 1:
            patterns.extend(line.split())
        elif state == 2:
            exceptions.extend(line.split())
        state = nextstate
    return patterns, exceptions

In [None]:
#|hide
show_doc(read_patterns)

---

[source](https://github.com/jkseppan/shyster/blob/main/shyster/file.py#L11){target="_blank" style="float:right; font-size:smaller"}

### read_patterns

>      read_patterns (lines:collections.abc.Iterable[str])

Read a TeX hyphenation pattern file; not a complete parser but hopefully enough
for hyph-*.tex

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| lines | Iterable | e.g. open('hyph-fi.tex').readlines() |
| **Returns** | **tuple** | **patterns and exceptions** |

In [None]:
assert read_patterns(r'''
% foo
\patterns{
1ba alous1
1be % foobar
ch2r}
\hyphenation{
saippua-kauppias
}
'''.splitlines()) == (['1ba', 'alous1', '1be', 'ch2r'], ['saippua-kauppias'])

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()