# Cylleneus
#### Next-gen corpus search for electronic corpora of ancient languages

In [None]:
# Get everything set up
from textwrap import wrap

import multiwordnet

from cylleneus.corpus import Corpus, manifest
from cylleneus.search import Collection, Searcher

# Check MultiWordNet installation
for language in ["common", "english", "french", "hebrew", "italian", "latin", "spanish"]:
    if not multiwordnet.db.exists(language):
        multiwordnet.db.compile(language, verbose=False)

#### Corpus

In [None]:
# List available corpora
print("\n".join(f'{meta.description:<64}	[{meta.language}]	{name:<20}' for name, meta in manifest.items() if meta.repo["location"] == "remote"))

In [None]:
corpus = Corpus("")  # Enter corpus name here
if not corpus.searchable:
    corpus.download()

#### Works

In [None]:
# List works in selected corpus
for docix, work in sorted(corpus.manifest.items(), key=lambda x: x[0]):
    print(f"[{docix}] {work['author']}, {work['title']}")

In [None]:
# Select works by their document index number...
# docixs = []
# collection = Collection(works=[corpus.work_by_docix(docix) for docix in docixs])

# ...or use the entire corpus
collection = Collection(works=corpus.works)

#### Query

##### E.g.,
| FORM     	    | LEMMA    	    | CONCEPT      	    | SEMFIELD 	            | MORPHOLOGY 	          | ...AS A FILTER 	      |
|:--------------|:--------------|:------------------|:----------------------|:------------------------|:----------------------|
| ``'animos'`` 	| ``<animus>`` 	| ``[en?courage]`` 	| ``{611}`` (= Anatomy) | ``:ACC.PL.``  (Leipzig) | ``<animus>\|ACC.PL.`` |

In [None]:
# Enter query in the space below.
query = """

""".strip()

#### Results

In [None]:
searcher = Searcher(collection)
results = searcher.search(query.strip())

def display_text(text: str):
    subs = [("<pre>", ""), ("</pre>", ""), ("<match>", ""), ("</match>", ""), ("<post>", ""), ("</post>", ""), (r"<em>(.+?)</em>", r"\033[1m\033[36m\1\033[21m\033[30m")]
    for pat, sub in subs:
        text = re.sub(pat, sub, text, re.DOTALL)
    return "\n".join(wrap(text))

for n, (corpus, author, title, urn, reference, text) in enumerate(results.to_text()):
    print(f"{n}. {author}, {title}: {reference}\n{display_text(text)}\n")