In [1]:
import pickle

from class_token import Morph, Token # important: The loader needs the Token and Morph classes defined in the same scope, or pickle can't reconstitute them.

def load_sentences(pkl_file="oga_sentences.pkl"):
    """
    Load the list of sentences (each a list of Token objects)
    from a pickle file.

    Parameters
    ----------
    pkl_file : str
        Path to the .pkl file

    Returns
    -------
    list
        A list of sentences, where each sentence is a list of Token objects
    """
    with open(pkl_file, "rb") as f:
        sentences = pickle.load(f)
    return sentences

sentences = load_sentences("example_ud.pkl")

In [5]:
import re
from grc_utils import colour_dichrona_in_open_syllables
from grc_macronizer.class_macronizer import Macronizer

macronizer = Macronizer(lowercase=True)
output = macronizer.macronize(sentences)

with open("example_macronized.txt", "w", encoding="utf-8") as f:
    f.write(output)

output_split = [sentence for sentence in re.findall(r'[^.\n;\u037e]+[.\n;\u037e]?', output) if sentence]
for line in output_split[:500]:
    print(colour_dichrona_in_open_syllables(line))

Macronizing tokens ☕️: 100%|██████████| 161/161 [00:00<00:00, 358.32it/s]
                                                              

###### STATS ######
Dichrona in open syllables before:            405
Unmacronized dichrona in open syllables left: 138

[32m267[0m dichrona macronized.

Macronization ratio: [32m65.93%[0m
χρὴ γ[32mι[0m^νώσκειν ὅτ[32mι[0m^ π[32mά[0m_σης τῆς γῆς ὁ περ[32mί[0m^μετρος στ[32mά[0m^δ[32mι[0m^[32mά[0m^ ἐστ[32mι[0m^ δ[32mι[0m^σχ[32mί[0m_λ[32mι[0m^[32mα[0m^ καὶ μ[32mυ[0m_ρ[32mι[0m^[32mά[0m^δες εἴκοσ[32mι[0m^ πέντε μῆκος δὲ τῆς ἡμετέρ[32mα[0m_ς οἰκουμένης [32mἀ[0m^πὸ στόμ[32mα[0m^τος γάγγου ἕως γ[31mα[0mδείρων στ[32mά[0m^δ[32mι[0m^[32mα[0m^ ὀκτ[31mα[0mκισμ[31mύ[0mρ[31mι[0m[32mα[0m^ τρ[32mι[0m^σχ[32mί[0m_λ[32mι[0m^[32mα[0m^ ὀκ τ[31mα[0mκόσ[31mι[0m[32mα[0m^ τὸ δὲ πλ[32mά[0m^τος [32mἀ[0m^πὸ τῆς αἰθ[31mι[0mοπ[31mι[0mκῆς θ[32mα[0m^λάσσης ἕως τοῦ τ[31mα[0mν[31mά[0m[31mϊ[0mδος ποτ[32mα[0m^μοῦ στ[32mά[0m^δ[32mι[0m^[32mα[0m^ τρισμ[31mύ[0mρ[31mι[0m[32mα[0m^ πεντ[31mα[0mκισχ[31mί[0mλ[31mι[0m[32mα[0