This notebook illustrates the `NLP()` pipeline on all available languages.

If dependency parse information is available, an example tree is printed, too.

In [1]:
%load_ext autoreload
%autoreload 2

In [5]:
from cltk import NLP
from cltk.dependency.tree import DependencyTree
from cltk.languages.example_texts import get_example_text
from cltk.languages.pipelines import *

In [6]:
iso_to_pipeline = {
    "akk": AkkadianPipeline,
    "ang": OldEnglishPipeline,
    "arb": ArabicPipeline,
    "arc": AramaicPipeline,
    "chu": OCSPipeline,
    "cop": CopticPipeline,
    "enm": MiddleEnglishPipeline,
    "frm": MiddleFrenchPipeline,
    "fro": OldFrenchPipeline,
    "gmh": MiddleHighGermanPipeline,
    "got": GothicPipeline,
    "grc": GreekPipeline,
    "hin": HindiPipeline,
    "lat": LatinPipeline,
    "lzh": ChinesePipeline,
    "non": OldNorsePipeline,
    "pan": PanjabiPipeline,
    "pli": PaliPipeline,
    "san": SanskritPipeline,
}

In [None]:
for lang, pipeline in iso_to_pipeline.items():
    print(f"{pipeline.language.name} ('{pipeline.language.iso_639_3_code}') ...")
    text = get_example_text(lang)
    cltk_nlp = NLP(language=lang)
    cltk_doc = cltk_nlp.analyze(text=text)
    cltk_doc.sentences_strings
    word = cltk_doc.sentences[0][0]
    print("Example `Word`:", word)
    if all([w.features for w in cltk_doc.sentences[0]]):
        print("Printing dependency tree of first sentence ...")
        try:
            a_tree = DependencyTree.to_tree(cltk_doc.sentences[0])
        except:
            print("DT failed")
            print("")
            continue
        a_tree.print_tree()
    print("")

Akkadian ('akk') ...
Example `Word`: Word(index_char_start=0, index_char_stop=2, index_token=0, index_sentence=None, string=('u2-wa-a-ru', 'akkadian'), pos=None, lemma=None, stem=None, scansion=None, xpos=None, upos=None, dependency_relation=None, governor=None, features={}, category={}, embedding=None, stop=False, named_entity=None, syllables=None, phonetic_transcription=None)
Printing dependency tree of first sentence ...
DT failed

Old English (ca. 450-1100) ('ang') ...
CLTK message: This part of the CLTK depends upon word embedding models from the Fasttext project.
Do you want to download file 'https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.ang.vec' to '/Users/kylejohnson/cltk_data/ang/embeddings/fasttext/wiki.ang.vec'? [Y/n] 



100%|██████████| 34.0M/34.0M [00:01<00:00, 28.0MiB/s]


Example `Word`: Word(index_char_start=0, index_char_stop=5, index_token=0, index_sentence=None, string='Hwæt.', pos=None, lemma='Hwæt.', stem=None, scansion=None, xpos=None, upos=None, dependency_relation=None, governor=None, features={}, category={}, embedding=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

100%|██████████| 1.61G/1.61G [00:51<00:00, 31.6MiB/s]
