This notebook illustrates the `NLP()` pipeline on all available languages.

If dependency parse information is available, an example tree is printed, too.

In [1]:
from cltk import NLP
from cltk.dependency.tree import DependencyTree
from cltk.languages.example_texts import get_example_text
from cltk.languages.pipelines import *

In [2]:
iso_to_pipeline = {
    "akk": AkkadianPipeline,
    "ang": OldEnglishPipeline,
    "arb": ArabicPipeline,
    "arc": AramaicPipeline,
    "chu": OCSPipeline,
    "cop": CopticPipeline,
    "enm": MiddleEnglishPipeline,
    "frm": MiddleFrenchPipeline,
    "fro": OldFrenchPipeline,
    "gmh": MiddleHighGermanPipeline,
    "got": GothicPipeline,
    "grc": GreekPipeline,
    "hin": HindiPipeline,
    "lat": LatinPipeline,
    "lzh": ChinesePipeline,
    "non": OldNorsePipeline,
    "pan": PanjabiPipeline,
    "pli": PaliPipeline,
    "san": SanskritPipeline,
}

In [3]:
for lang, pipeline in iso_to_pipeline.items():
    print(f"{pipeline.language.name} ('{pipeline.language.iso_639_3_code}') ...")
    text = get_example_text(lang)
    cltk_nlp = NLP(language=lang)
    cltk_doc = cltk_nlp.analyze(text=text)
    cltk_doc.sentences_strings
    word = cltk_doc.sentences[0][0]
    print("Example `Word`:", word)
    if all([w.features for w in cltk_doc.sentences[0]]):
        print("Printing dependency tree of first sentence ...")
        try:
            a_tree = DependencyTree.to_tree(cltk_doc.sentences[0])
        except:
            print(f"Dependency parsing Process not available for '{lang}'.")
            print("")
            continue
        a_tree.print_tree()
    print("")

Akkadian ('akk') ...
‎𐤀 CLTK version 'cltk 1.0.0b10'.
Pipeline for language 'Akkadian' (ISO: 'akk'): `AkkadianTokenizationProcess`, `StopsProcess`.
Example `Word`: Word(index_char_start=0, index_char_stop=2, index_token=0, index_sentence=None, string=('u2-wa-a-ru', 'akkadian'), pos=None, lemma=None, stem=None, scansion=None, xpos=None, upos=None, dependency_relation=None, governor=None, features={}, category={}, embedding=None, stop=False, named_entity=None, syllables=None, phonetic_transcription=None, definition=None)
Printing dependency tree of first sentence ...
Dependency parsing Process not available for 'akk'.

Old English (ca. 450-1100) ('ang') ...
‎𐤀 CLTK version 'cltk 1.0.0b10'.
Pipeline for language 'Old English (ca. 450-1100)' (ISO: 'ang'): `MultilingualTokenizationProcess`, `OldEnglishLemmatizationProcess`, `OldEnglishEmbeddingsProcess`, `StopsProcess`, `OldEnglishNERProcess`.
Example `Word`: Word(index_char_start=0, index_char_stop=5, index_token=0, index_sentence=None, st

Example `Word`: Word(index_char_start=None, index_char_stop=None, index_token=0, index_sentence=0, string='отьчє', pos=noun, lemma='отьць', stem=None, scansion=None, xpos='Nb', upos='NOUN', dependency_relation='vocative', governor=7, features={Case: [vocative], Gender: [masculine], Number: [singular]}, category={F: [neg], N: [pos], V: [neg]}, embedding=None, stop=None, named_entity=None, syllables=None, phonetic_transcription=None, definition=None)
Printing dependency tree of first sentence ...
root | свѧтитъ_7/verb
    └─ vocative | отьчє_0/noun
        └─ nmod | нашь·_1/adjective
    └─ obl | ѥси_3/noun
        └─ case | ижє_2/adposition
    └─ obl | нєбєсѣхъ:_5/noun
        └─ case | на_4/adposition
    └─ mark | да_6/subordinating_conjunction
    └─ expl | сѧ_8/pronoun
    └─ nsubj | имѧ_9/noun
        └─ nmod | твоѥ·_10/adjective
    └─ ccomp | придєтъ_12/verb
        └─ mark | да_11/subordinating_conjunction
        └─ nsubj | цѣсар҄ьствиѥ_13/noun
            └─ nmod | твоѥ·_14/a

Example `Word`: Word(index_char_start=None, index_char_stop=None, index_token=0, index_sentence=0, string='ὅτι', pos=adverb, lemma='ὅτι', stem=None, scansion=None, xpos='Df', upos='ADV', dependency_relation='advmod', governor=6, features={}, category={F: [neg], N: [pos], V: [pos]}, embedding=array([ 1.42880e-01, -4.37029e-01, -2.17358e-01, -2.47890e-02,
        3.58539e-01,  2.29410e-02, -3.54123e-01, -4.01863e-01,
       -3.03181e-01,  5.93480e-01,  2.03442e-01,  2.06105e-01,
        2.50816e-01, -1.32482e-01, -5.46897e-01,  4.21188e-01,
        4.58564e-01, -1.44444e-01,  2.17758e-01,  5.60500e-03,
       -3.00801e-01,  3.48658e-01,  4.94777e-01, -7.17467e-01,
       -3.03590e-02, -3.86150e-02, -2.44708e-01, -5.74935e-01,
        3.96000e-04,  1.92331e-01, -7.32740e-02, -2.63904e-01,
       -7.05107e-01,  1.38101e-01, -5.37585e-01, -3.04847e-01,
        1.82334e-01, -1.48218e-01, -1.87638e-01,  2.06440e-02,
       -5.69110e-02, -3.38892e-01,  1.97945e-01, -2.10652e-01,
       -1.9204

Example `Word`: Word(index_char_start=0, index_char_stop=6, index_token=0, index_sentence=None, string='Raajaa', pos=None, lemma=None, stem=None, scansion=None, xpos=None, upos=None, dependency_relation=None, governor=None, features={}, category={}, embedding=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.