# Calculate morphological complexity

The code in this notebook calculates type-token ratio (**ttr**) and moving average type-token ratio (**mattr**) directly from texts.

In [11]:
import os
from lexicalrichness import LexicalRichness
from statistics import mean, variance

In [27]:
def count_content_lines(lines):
    """
    Quantifies how many non-empty lines
    are there in a given text.

    :param lines: a list of lines
    :return: a list containing line numbers that are not empty
    """
    content_lines = []

    for idx, line in enumerate(lines):

        # filter out a line if it is only a newline or if it is empty
        if line != '\n' and line:
            content_lines.append(idx)

    return content_lines


def calculate_measure(filepath,
                      start_line=None,
                      measure='ttr',
                      save_folder=None,
                      rounding=5):
    """
    Provided a filepath to a text, it calculates a measure
    specified in the measure argument.

    If a start_line number is provided, it starts calculating
    from the relevant line.

    :param filepath: filepath to input file
    :param start_line: None or an integer for a starting point
    :param measure: 'ttr' or 'mattr'
    :param save_folder: if provided, content is saved in a separate file in the given folder
    :param rounding: the number of digits the value of the measure will be rounded to
    :return: a dictionary containing score, length, first and last lines
    """
    # read in the input file
    with open(filepath, 'r') as infile:
        lines = infile.readlines()

    # remove empty lines
    lines = [line.strip('\n') for line in lines
             if line != '\n' and line]

    if start_line:
        lines = lines[start_line:]

    if save_folder:
        output_path = os.path.join(save_folder,
                                   os.path.basename(filepath))

        with open(output_path, 'w') as outfile:
            for idx, line in enumerate(lines):
                outfile.write('{}\t{}\n'.format(idx, line))

    joint_lines = ' '.join(lines)

    lex = LexicalRichness(joint_lines)

    if measure == 'ttr':
        score = lex.ttr
    elif measure == 'mattr':
        score = lex.mattr(window_size=500)
    else:
        raise ValueError("Value for measure should be 'ttr' or 'mattr'.")

    # to verify the content covers the same material for all languages
    lines_to_check = (lines[0], lines[-1])

    length = len(count_content_lines(lines))
    score = round(score, rounding)

    return {
        measure: score,
        "line_count": length,
        "first_line": lines_to_check[0],
        "last_line": lines_to_check[1]
    }


def calculate_morph_scores(filepath, start_line=None, save_folder=None):
    """
    Given a filepath, it calculates both TTR and MATTR,
    also providing the relevant first and last lines and text length.

    :param filepath: input filepath
    :param start_line: None or an integer for a starting point
    :param save_folder: if provided, content is saved in a separate file in the given folder
    :return: a dictionary of ttr, mattr and length
    """
    morph_score_dict = dict()
    text_name = os.path.basename(filepath)

    ttr_dict = calculate_measure(filepath,
                                 start_line=start_line,
                                 measure='ttr')
    mattr_dict = calculate_measure(filepath,
                                   start_line=start_line,
                                   measure='mattr',
                                   save_folder=save_folder)

    if (ttr_dict["line_count"] == mattr_dict["line_count"]
            and ttr_dict["first_line"] == mattr_dict["first_line"]
            and ttr_dict["last_line"] == mattr_dict["last_line"]):

        # add values to an output dictionary
        morph_score_dict["ttr"] = ttr_dict["ttr"]
        morph_score_dict["mattr"] = mattr_dict["mattr"]
        morph_score_dict["length"] = ttr_dict["line_count"]
        morph_score_dict["text_name"] = text_name
        morph_score_dict["first_line"] = ttr_dict["first_line"]
        morph_score_dict["last_line"] = ttr_dict["last_line"]

    else:
        raise ValueError('Something does not match up.')

    return morph_score_dict


def aggregate_scores_for_language(filepaths, start_lines=None, save_folder=None):
    """
    Calculates average ttr and mattr scores for texts in the
    same language.

    :param filepaths: a list of filepaths
    :param start_lines: a list of starting points, or None if all texts start at 0
    :param save_folder: if provided, content is saved in a separate file in the given folder
    :return: a dictionary of average ttr, average mattr, average length, and variances
    """
    # lists to allow averaging
    ttrs = []
    mattrs = []
    lengths = []

    for idx, filepath in enumerate(filepaths):
        if start_lines:
            score_dict = calculate_morph_scores(filepath,
                                                start_line=start_lines[idx],
                                                save_folder=save_folder)
        else:
            score_dict = calculate_morph_scores(filepath,
                                                save_folder=save_folder)

        # append scores to lists
        ttrs.append(score_dict['ttr'])
        mattrs.append(score_dict['mattr'])
        lengths.append(score_dict['length'])

        # print out a report
        print(idx+1)
        print('Bible: {}'.format(score_dict['text_name']))
        print('First line: | {}'.format(score_dict['first_line']))
        print('Last line: | {}'.format(score_dict['last_line']))
        print(score_dict['length'])
        print('-' * 15)
        print()

    # calculate averages
    mean_ttr = round(mean(ttrs), 3)
    mean_mattr = round(mean(mattrs), 3)
    mean_lengths = int(mean(lengths))

    if len(filepaths) > 1:
        var_ttr = round(variance(ttrs), 5)
        var_mattr = round(variance(mattrs), 5)
    else:
        var_ttr = 0
        var_mattr = 0

    return {
        "mean_ttr": mean_ttr,
        "mean_mattr": mean_mattr,
        "avg_line_count": mean_lengths,
        "ttr_variance": var_ttr,
        "mattr_variance": var_mattr
    }


def find_start(filepath, line_count):
    """
    Helper function to retrieve the first line of Bibles.
    """
    content_lines = []
    with open(filepath, 'r') as infile:
        lines = infile.readlines()
    for line in lines:
        if line != '\n' and line:
            content_lines.append(line.strip('\n'))
    return content_lines[line_count]

## Define the generic path to the Parallel Bible Corpus

In [13]:
generic_path = '../resources/parallel_bible_corpus/'

In [14]:
amharic = 'amh-x-bible-amh-v1.txt'
arabic = 'arb-x-bible-arb-v1.txt'
bangla1 = 'ben-x-bible-holybible-v1.txt'
bangla2 = 'ben-x-bible-common-v1.txt'
bangla3 = 'ben-x-bible-easy-v1.txt'
bangla4 = 'ben-x-bible-mussolmani-v1.txt'
bangla5 = 'ben-x-bible-old-v1.txt'
burmese1 = 'mya-x-bible-common-v1.txt'
burmese2 = 'mya-x-bible-mya-v1.txt'
eastern_armenian = 'hye-x-bible-eastern-v1.txt'
western_armenian = 'hye-x-bible-western-v1.txt'
basque1 = 'eus-x-bible-navarrolabourdin-v1.txt'
basque2 = 'eus-x-bible-batua-v1.txt'
buryat = 'bxr-x-bible-bxr-v1.txt'
czech1 = 'ces-x-bible-living-v1.txt'
czech2 = 'ces-x-bible-bible21-v1.txt'
czech3 = 'ces-x-bible-ekumenicky-v1.txt'
czech4 = 'ces-x-bible-kralicka-v1.txt'
czech5 = 'ces-x-bible-novakarlica-v1.txt'
czech6 = 'ces-x-bible-preklad-v1.txt'
english = 'eng-x-bible-common-v1.txt'
estonian = 'est-x-bible-portions-v1.txt'
finnish = 'fin-x-bible-1992-v1.txt'
french = 'fra-x-bible-courant1997-v1.txt'
georgian = 'kat-x-bible-kat-v1.txt'
german = 'deu-x-bible-schlachter2000-v1.txt'
greek = 'ell-x-bible-hellenic1-v1.txt'
haitian = 'hat-x-bible-1999-v1.txt'
hindi = 'hin-x-bible-hin-v1.txt'  # possibly not actually hindi
hungarian = 'hun-x-bible-2012-v1.txt'
icelandic = 'isl-x-bible-isl-v1.txt'
ilocano = 'ilo-x-bible-ilo-v1.txt'
indonesian = 'ind-x-bible-kabarbaik-v1.txt'
italian = 'ita-x-bible-vita1997-v1.txt'
javanese = 'jav-x-bible-jav-v1.txt'
korean = 'kor-x-bible-kor-v1.txt'
latin = 'lat-x-bible-novavulgata-v1.txt'
latvian = 'lav-x-bible-1997-v1.txt'
mari = 'mhr-x-bible-mhr-v1.txt'
erzya = 'myv-x-bible-myv-v1.txt'
maori = 'mri-x-bible-mri-v1.txt'
sami = 'sme-x-bible-sme-v1.txt'
persian = 'fas-x-bible-newmillennium2011-v1.txt'
portuguese = 'por-x-bible-linguagemdehoje-v1.txt'
quechua = 'qub-x-bible-2009-v1.txt'
russian = 'rus-x-bible-modern2011-v1.txt'
spanish = 'spa-x-bible-blph-youversion-v1.txt'
swahili = 'swh-x-bible-swh-v1.txt'
tamil = 'tam-x-bible-tam-v1.txt'
turkish = 'tur-x-bible-tur-v1.txt'
vietnamese = 'vie-x-bible-revised2010-v1.txt'
wolof = 'wol-x-bible-wol-v1.txt'

In [30]:
aggregate_scores_for_language([
    os.path.join(generic_path, basque1),
    os.path.join(generic_path, basque2),
    os.path.join(generic_path, 'eus-x-bible-Hautin1571-v1.txt')
]
#     , language='eu', save_folder=os.path.join(generic_path, "contents/basque"),
)

1
Bible: eus-x-bible-navarrolabourdin-v1.txt
First line: | IESVS CHRIST Dauid-en semearen , Abrahamen semearen generationeco Liburuä .
Last line: | Iesus Christ gure Iaunaren gratia dela çuequin gucioquin . Amen .
7958
---------------

2
Bible: eus-x-bible-batua-v1.txt
First line: | Abrahamen eta Daviden ondorengo den Jesus Mesiasen arbasoen zerrenda :
Last line: | Jesus Jaunaren grazia guztiokin .
7644
---------------

3
Bible: eus-x-bible-Hautin1571-v1.txt
First line: | SVS CHRIST Dauid-en semearen , Abrahamen semearen generationeco Liburuä .
Last line: | Iesus Christ gure Iaunaren gratia dela çuequin gucioquin . Amen .
7958
---------------



{'mean_ttr': 0.124,
 'mean_mattr': 0.627,
 'avg_line_count': 7853,
 'ttr_variance': 9e-05,
 'mattr_variance': 2e-05}

In [31]:
aggregate_scores_for_language([
    os.path.join(generic_path, bangla1),
    os.path.join(generic_path, bangla2),
    os.path.join(generic_path, bangla3),
    os.path.join(generic_path, bangla4),
    os.path.join(generic_path, bangla5)
],
start_lines=[0, 22434, 0, 22443, 0])

1
Bible: ben-x-bible-holybible-v1.txt
First line: | যীশু খ্রীষ্ট দায়ূদের বংশের এবং দায়ূদ অব্রাহামের বংশের লোক । যীশু খ্রীষ্টের বংশের তালিকা এই :
Last line: | প্রভু যীশুর আশীর্বাদ ঈশ্বরের সব লোকদের সংগে থাকুক । আমেন ।
7895
---------------

2
Bible: ben-x-bible-common-v1.txt
First line: | যীশু খ্রীষ্ট দায়ূদের বংশের এবং দায়ূদ অব্রাহামের বংশের লোক । যীশু খ্রীষ্টের বংশের তালিকা এই :
Last line: | প্রভু যীশুর আশীর্বাদ ঈশ্বরের সব লোকদের সংগে থাকুক । আমেন ।
7803
---------------

3
Bible: ben-x-bible-easy-v1.txt
First line: | এই হল যীশু খ্রীষ্টের বংশ তালিকা ৷ ইনি ছিলেন রাজা দায়ূদের বংশধর , দায়ূদ ছিলেন অব্রাহামের বংশধর ৷
Last line: | প্রভু যীশুর অনুগ্রহ তাঁর সকল লোকের সহবর্তী হোক্ ৷
7893
---------------

4
Bible: ben-x-bible-mussolmani-v1.txt
First line: | ঈসা মসীহ্‌ দাউদের বংশের এবং দাউদ ইব্রাহিমের বংশের লোক । ঈসা মসীহের বংশের তালিকা এই :
Last line: | হযরত ঈসার রহমত আল্লাহ্‌র সব বান্দাদের উপর থাকুক । আমিন ।
7812
---------------

5
Bible: ben-x-bible-old-v1.txt
First line: | ঈসা মসীহ্‌দাউদে

{'mean_ttr': 0.05,
 'mean_mattr': 0.519,
 'avg_line_count': 7872,
 'ttr_variance': 5e-05,
 'mattr_variance': 5e-05}

In [45]:
aggregate_scores_for_language([
    os.path.join(generic_path, burmese1),
    os.path.join(generic_path, burmese2)
],
start_lines=[0, 23000])

Bible: mya-x-bible-common-v1.txt
First line: | အာ​ဗြ​ဟံ​နှင့်​ဒါ​ဝိဒ်​မှ​ဆင်း​သက်​တော်​မူ​သော သ​ခင်​ယေ​ရှု​ခ​ရစ်​၏​ဆွေ​တော်​စဉ်​မျိုး​တော်​ဆက် စာ​ရင်း​ကား​ဤ​သို့​တည်း ။
Last line: | သ​ခင်​ယေ​ရှု​၏​ကျေး​ဇူး​တော်​ကို​လူ​တိုင်း ခံ​စား​ရ​ကြ​ပါ​စေ​သော ။ ဋ္ဌမ္မ​သစ်​ကျမ်း​တို့​တွင်​နောက်​ဆုံး​ကျမ်း​တည်း ဟူ​သော​ရှင်​ယော​ဟန်​စီ​ရင်​ရေး​ထား​သော​ဗျာ​ဒိတ် ကျမ်း​ပြီး​၏ ။
7604
---------------

Bible: mya-x-bible-mya-v1.txt
First line: | အာဗြဟံနှင့် ဒါဝိဒ်တို့မှဆင်းသက်တော်မူသော ယေရှုခရစ် ၏ ဆွေတော်စဉ်မျိုးတော်ဆက်စာရင်း ဟူမူကား ၊
Last line: | ဤအရာများကို သက်သေခံသောသူ မိန့်တော် မူ ၏ အာမင် ၊ သခင်ယေရှုကြွလာတော်မူပါ ။ ငါတို့သခင် ယေရှုခရစ် ၏ ကျေးဇူးတော်သည် သန့်ရှင်းသူ အပေါင်း တို့ ၌ ရှိစေသတည်း ။
7928
---------------



{'mean_ttr': 0.458,
 'mean_mattr': 0.644,
 'avg_line_count': 7766,
 'ttr_variance': 0.04078,
 'mattr_variance': 0.01168}

In [61]:
aggregate_scores_for_language([
    os.path.join(generic_path, czech1),
    os.path.join(generic_path, czech2),
    os.path.join(generic_path, czech3),
    os.path.join(generic_path, czech4),
    os.path.join(generic_path, czech5),
    os.path.join(generic_path, czech6),
],
start_lines=[0, 0, 23313, 23234, 0, 0])

Bible: ces-x-bible-living-v1.txt
First line: | Ježíšův rodokmen sahá přes Davida až k Abrahamovi . Od praotce Abrahama k Davidovi je to čtrnáct generací , dále čtrnáct generací od Davida po babylónské zajetí a od zajetí v Babylóně až ke Kristu také čtrnáct . Rodokmen uzavírá Josef , manžel Marie , které se narodil Ježíš - Boží Syn .
Last line: | Vám všem , milí čtenáři , platí dosud nabídka Kristovy milosti !
7957
---------------

Bible: ces-x-bible-bible21-v1.txt
First line: | Kniha rodu Ježíše Krista , syna Davidova , syna Abrahamova :
Last line: | Milost Pána Ježíše se všemi .
7926
---------------

Bible: ces-x-bible-ekumenicky-v1.txt
First line: | Listina rodu Ježíše Krista , syna Davidova , syna Abrahamova .
Last line: | Milost Pána Ježíše se všemi .
7959
---------------

Bible: ces-x-bible-kralicka-v1.txt
First line: | Kniha ( o ) rodu Ježíše Krista syna Davidova , syna Abrahamova .
Last line: | Milost Pána našeho Jezukrista se všemi vámi . Amen .
7958
---------------

Bible: ces

{'mean_ttr': 0.121,
 'mean_mattr': 0.602,
 'avg_line_count': 7953,
 'ttr_variance': 0.00012,
 'mattr_variance': 0.00051}

In [62]:
english = [
    'eng-x-bible-literal-v1.txt',
'eng-x-bible-livingoracles-v1.txt',
'eng-x-bible-majority-v1.txt',
'eng-x-bible-montgomery-v1.txt',
'eng-x-bible-new2007-v1.txt',
'eng-x-bible-newcentury-v1.txt',
'eng-x-bible-newinternational-v1.txt',
'eng-x-bible-newliving-v1.txt',
'eng-x-bible-newsimplified-v1.txt',
'eng-x-bible-newreaders-v1.txt',
'eng-x-bible-scriptures-v1.txt',
'eng-x-bible-treeoflife-v1.txt',
'eng-x-bible-worldwide-v1.txt',
'eng-x-bible-lexham-v1.txt',
'eng-x-bible-kingjames-v1.txt',
'eng-x-bible-internationalstandard-v1.txt',
'eng-x-bible-goodnews-v1.txt',
'eng-x-bible-godsword-v1.txt',
'eng-x-bible-etheridge-v1.txt',
'eng-x-bible-easytoread-v1.txt',
'eng-x-bible-diaglot-v1.txt',
'eng-x-bible-darby-v1.txt',
'eng-x-bible-contemporary-v1.txt',
'eng-x-bible-common-v1.txt',
'eng-x-bible-clontz-v1.txt',
'eng-x-bible-books-v1.txt',
'eng-x-bible-amplified-v1.txt'
]

len(english)

27

In [101]:
find_start(os.path.join(generic_path, 'eng-x-bible-kingjames-v1.txt'), 23144)

'The book of the generation of Jesus Christ , the son of David , the son of Abraham .'

In [102]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in english],
                             start_lines=[23145, 0, 0, 0, 0, 0, 0, 0,
                                         23145, 0, 0, 0, 0, 0,
                                            23144, 0, 0, 0, 0, 0, 0,
                                         23145, 0, 0, 0, 0, 0])

Bible: eng-x-bible-literal-v1.txt
First line: | The Book of the genealogy of Jesus Christ the son of David , the son of Abraham :
Last line: | The grace of our Lord Jesus Christ be with all of you . Amen .
7957
---------------

Bible: eng-x-bible-livingoracles-v1.txt
First line: | The History of Jesus Christ , Son of David , Son of Abraham .
Last line: | May the favor of the Lord Jesus Christ be with all the saints !
7957
---------------

Bible: eng-x-bible-majority-v1.txt
First line: | The book of the genealogy of Jesus Christ , the Son of David , the Son of Abraham :
Last line: | The grace of the Lord Jesus Christ [ be ] with all the saints . Amen .
7957
---------------

Bible: eng-x-bible-montgomery-v1.txt
First line: | The book of the generation of Jesus Christ , the son of David , the son of Abraham .
Last line: | The grace of the Lord Jesus Christ be with the saints !
7935
---------------

Bible: eng-x-bible-new2007-v1.txt
First line: | This is the record of the genealogy of Jesu

{'mean_ttr': 0.034,
 'mean_mattr': 0.41,
 'avg_line_count': 7941,
 'ttr_variance': 6e-05,
 'mattr_variance': 0.00036}

In [104]:
finnish = ['fin-x-bible-1766-v1.txt',
'fin-x-bible-1933-v1.txt',
'fin-x-bible-1992-v1.txt']

In [107]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in finnish],
                             start_lines=[23145, 23144, 23145])

Bible: fin-x-bible-1766-v1.txt
First line: | Jesuksen Kristuksen syntymäkirja , Davidin pojan , Abrahamin pojan .
Last line: | Meidän Herran Jesuksen Kristuksen armo olkoon teidän kaikkein kanssanne . Amen !
7957
---------------

Bible: fin-x-bible-1933-v1.txt
First line: | Jeesuksen Kristuksen , Daavidin pojan , Aabrahamin pojan , syntykirja .
Last line: | Herran Jeesuksen armo olkoon kaikkien kanssa . Amen .
7943
---------------

Bible: fin-x-bible-1992-v1.txt
First line: | Jeesuksen Kristuksen , Daavidin pojan ja Abrahamin pojan , sukuluettelo :
Last line: | Herran Jeesuksen armo olkoon kaikkien kanssa .
7957
---------------



{'mean_ttr': 0.136,
 'mean_mattr': 0.596,
 'avg_line_count': 7952,
 'ttr_variance': 0.00035,
 'mattr_variance': 0.00184}

In [225]:
french = [
#     'fra-x-bible-zadockahn-v1.txt',
'fra-x-bible-semeur-v1.txt',
'fra-x-bible-segond21-v1.txt',
'fra-x-bible-pirotclamer-v1.txt',
'fra-x-bible-perret-v1.txt',
'fra-x-bible-paroledevie-v1.txt',
'fra-x-bible-ostervald1867-v1.txt',
'fra-x-bible-nouvellesegond-v1.txt',
'fra-x-bible-louissegond-v1.txt',
'fra-x-bible-kingjames-v1.txt',
'fra-x-bible-jerusalem2004-v1 2.txt',
'fra-x-bible-geneve1669-v1.txt',
'fra-x-bible-davidmartin-v1.txt',
'fra-x-bible-darby-v1.txt',
'fra-x-bible-crampon-v1.txt',
'fra-x-bible-courant1997-v1 2.txt',
'fra-x-bible-bonnet-v1.txt',
# 'fra-x-bible-jerusalem2004-v1.txt'
]
len(french)

16

In [224]:
find_start(os.path.join(generic_path, 'fra-x-bible-crampon-v1.txt'), 23271)

'Généalogie de Jésus-Christ , fils de David , fils d`Abraham .'

In [226]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in french],
                             start_lines=[
                                0, 0,
                                 23145,
                                 23132,
                                 22931,
                                 23145, 0,
                                 23145,
                                 23214,
                                 23241, 0,
                                 23145,
                                 23145,
                                 23271, 0,
                                 23145,
#                                  23145
                             ]
                             )

Bible: fra-x-bible-semeur-v1.txt
First line: | Voici la généalogie de Jésus-Christ , de la descendance de * David et d’ * Abraham .
Last line: | Que le Seigneur Jésus accorde sa grâce à tous .
7921
---------------

Bible: fra-x-bible-segond21-v1.txt
First line: | Voici la généalogie de Jésus-Christ , fils de David , fils d'Abraham .
Last line: | Que la grâce du Seigneur Jésus [ -Christ ] soit avec tous les saints !
7957
---------------

Bible: fra-x-bible-pirotclamer-v1.txt
First line: | Généalogie de Jésus-Christ , fils de David , fils d'Abraham .
Last line: | La grâce du Seigneur Jésus soit avec les saints .
7957
---------------

Bible: fra-x-bible-perret-v1.txt
First line: | Table généalogique de Jésus-Christ , fils de David , fils d'Abraham :
Last line: | La grâce du Seigneur Jésus soit avec tous !
7956
---------------

Bible: fra-x-bible-paroledevie-v1.txt
First line: | Voici la liste des ancêtres de Jésus-Christ : David est l'ancêtre de Jésus-Christ , et Abraham est l'ancêtre de 

{'mean_ttr': 0.055,
 'mean_mattr': 0.453,
 'avg_line_count': 8172,
 'ttr_variance': 5e-05,
 'mattr_variance': 0.00031}

In [281]:
find_start(os.path.join(generic_path, 'deu-x-bible-tafelbibel-v1.txt'), 22210)

'Welchen Propheten haben eure Väter nicht verfolgt , und getötet die , so die Zukunft des Gerechten verkündeten , dessen Verräter und Mörder ihr nunmehr geworden seid .'

In [298]:
german = [
    'deu-x-bible-zuercher-v1.txt',
# 'deu-x-bible-v1.txt',
'deu-x-bible-textbibel-v1.txt',
'deu-x-bible-tafelbibel-v1.txt',
'deu-x-bible-schlachter2000-v1.txt',
'deu-x-bible-schlachter-v1.txt',
'deu-x-bible-reinhardt1910-v1.txt',
'deu-x-bible-pattloch-v1.txt',
'deu-x-bible-neue-v1.txt',
'deu-x-bible-meister-v1.txt',
'deu-x-bible-luther1912-v1.txt',
'deu-x-bible-luther1545letztehand-v1.txt',
'deu-x-bible-luther1545-v1.txt',
'deu-x-bible-konkordant-v1.txt',
'deu-x-bible-interlinear-v1.txt',
'deu-x-bible-hoffnung-v1.txt',
'deu-x-bible-gruenewalder-v1.txt',
'deu-x-bible-greber-v1.txt',
'deu-x-bible-freebible-v1.txt',
'deu-x-bible-erben-v1.txt',
'deu-x-bible-elberfelder1905-v1.txt',
'deu-x-bible-elberfelder1871-v1.txt',
'deu-x-bible-albrecht-v1.txt'
]
len(german)

22

In [300]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in german],
                              save_folder=os.path.join(generic_path, "contents"),
                             start_lines=[
                                 23212,  # 1
                                 23216,  # 2
                                 18187,  # 3
                                 0,  # 4
                                 23145,  # 5
                                 0,  # 6
                                 24192,  # 7
                                 23012,  # 8
                                 0,  # 9
                                 23145,  # 10
                                 23215, # 11
                                 23145,  # 12
                                 0,  # 13
                                 0,  # 14
                                 0,  # 15
                                 23477,  # 16
                                 0,  # 17
                                 23213,  # 18
                                 0,  # 19
                                 23145,  # 20
                                 23145,  # 21
                                 0  # 22
                             ])

1
Bible: deu-x-bible-zuercher-v1.txt
First line: | DAS Buch der Abstammung Jesu Christi , des Sohnes Davids , des Sohnes Abrahams .
Last line: | Die Gnade des Herrn Jesus sei mit allen !
7957
---------------

2
Bible: deu-x-bible-textbibel-v1.txt
First line: | Stammbaum Jesus Christus' , des Sohnes Davids , des Sohnes Abrahams :
Last line: | Die Gnade des Herrn Jesus mit allen .
7957
---------------

3
Bible: deu-x-bible-tafelbibel-v1.txt
First line: | Das Buch von der Geburt Jesu Christi , des Sohnes Davids , des Sohnes Abrahams .
Last line: | Die Gnade unseres Herrn Jesus Christus sei mit euch allen ! Amen .
7957
---------------

4
Bible: deu-x-bible-schlachter2000-v1.txt
First line: | Geschlechtsregister Jesu Christi , des Sohnes Davids , des Sohnes Abrahams .
Last line: | Die Gnade unseres Herrn Jesus Christus sei mit euch allen ! Amen .
7958
---------------

5
Bible: deu-x-bible-schlachter-v1.txt
First line: | Geschlechtsregister Jesu Christi , des Sohnes Davids , des Sohnes Abrah

{'mean_ttr': 0.061,
 'mean_mattr': 0.484,
 'avg_line_count': 8310,
 'ttr_variance': 4e-05,
 'mattr_variance': 0.00053}

In [301]:
greek = [
    'ell-x-bible-hellenic1-v1.txt',
'ell-x-bible-koine1894-v1.txt',
'ell-x-bible-modern2009-v1.txt'
]

In [303]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in greek],
                              save_folder=os.path.join(generic_path, "contents/greek"),
                             start_lines=[0, 0, 23145])

1
Bible: ell-x-bible-hellenic1-v1.txt
First line: | Βιβλίον τῆς ἱστορίας τοῦ Ἰησοῦ Χριστοῦ , τοῦ υἱοῦ τοῦ Δαυΐδ , τοῦ υἱοῦ τοῦ Ἀβραάμ .
Last line: | Ἡ χάρις τοῦ Κυρίου Ἰησοῦ Χριστοῦ νὰ εἶναι μαζὶ μὲ ὅλους τοὺς ἁγίους . Ἀμήν .
7914
---------------

2
Bible: ell-x-bible-koine1894-v1.txt
First line: | Βίβλος γενέσεως Ἰησοῦ Χριστοῦ , υἱοῦ Δαβὶδ , υἱοῦ Ἀβραάμ .
Last line: | Ἡ χάρις τοῦ Κυρίου ημῶν Ἰησοῦ Χριστοῦ μετὰ πάντων ὑμῶν . ἀμήν .
7957
---------------

3
Bible: ell-x-bible-modern2009-v1.txt
First line: | Βιβλος της γενεαλογιας του Ιησου Χριστου , υιου του Δαβιδ , υιου του Αβρααμ .
Last line: | Η χαρις του Κυριου ημων Ιησου Χριστου ειη μετα παντων υμων ? αμην .
7957
---------------



{'mean_ttr': 0.108,
 'mean_mattr': 0.51,
 'avg_line_count': 7942,
 'ttr_variance': 0.00049,
 'mattr_variance': 0.00094}

In [304]:
haitian = [
    'hat-x-bible-1985-v1.txt',
'hat-x-bible-1999-v1.txt'
]

In [307]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in haitian],
                             save_folder=os.path.join(generic_path, "contents/haitian"),
                             start_lines=[23139,23051])

1
Bible: hat-x-bible-1985-v1.txt
First line: | Men non zansèt Jezikri ki te pitit pitit David , pitit pitit Abraram .
Last line: | Se pou benediksyon Seyè Jezi a rete toujou la ak tout moun .
7958
---------------

2
Bible: hat-x-bible-1999-v1.txt
First line: | Men non zansèt Jezikri ki te pitit pitit David , pitit pitit Abraram .
Last line: | Se pou benediksyon Senyè Jezi a rete toujou la ak tout moun .
7949
---------------



{'mean_ttr': 0.014,
 'mean_mattr': 0.328,
 'avg_line_count': 7953,
 'ttr_variance': 0.0,
 'mattr_variance': 0.0}

In [311]:
hungarian = ['hun-x-bible-2003-v1.txt',
'hun-x-bible-2005-v1.txt',
'hun-x-bible-2012-v1.txt',
'hun-x-bible-karoli-v1.txt',
'hun-x-bible-revised-v1.txt']

In [313]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in hungarian],
                              save_folder=os.path.join(generic_path, "contents/hungarian"),
                             start_lines=[0, 23127, 0, 23240, 0])

1
Bible: hun-x-bible-2003-v1.txt
First line: | Jézus Krisztusnak , Dávid fiának , Ábrahám fiának nemzetségéről szóló könyv .
Last line: | A mi Urunknak , Jézus Krisztusnak kegyelme legyen mindnyájatokkal ! Ámen .
7953
---------------

2
Bible: hun-x-bible-2005-v1.txt
First line: | Jézus Krisztusnak , a Dávid fiának , az Ábrahám fiának nemzetségkönyve .
Last line: | Az Úr Jézus kegyelme legyen mindnyájatokkal ! Ámen .
7957
---------------

3
Bible: hun-x-bible-2012-v1.txt
First line: | Ez Jézus Krisztus családfája . Jézus Dávid családjából származik , Dávid pedig Ábrahám családjából .
Last line: | Az Úr Jézus kegyelme legyen veletek !
7826
---------------

4
Bible: hun-x-bible-karoli-v1.txt
First line: | Jézus Krisztusnak , Dávid fiának , Ábrahám fiának nemzetségérõl való könyv .
Last line: | A mi Urunk Jézus Krisztusnak kegyelme [ legyen ] mindnyájan ti veletek . Ámen .
7959
---------------

5
Bible: hun-x-bible-revised-v1.txt
First line: | Jézus Krisztusnak , a Dávid fiának , az Ábrah

{'mean_ttr': 0.144,
 'mean_mattr': 0.567,
 'avg_line_count': 7930,
 'ttr_variance': 0.00017,
 'mattr_variance': 0.00031}

In [314]:
indonesian = [
    'ind-x-bible-suciinjil-v1.txt',
'ind-x-bible-easy2005-v1.txt',
'ind-x-bible-kabarbaik-v1.txt',
'ind-x-bible-terjemahanbaru-v1.txt',
'ind-x-bible-terjemahanlama-v1.txt'
]

In [316]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in indonesian],
                              save_folder=os.path.join(generic_path, "contents/indonesian"),
                             start_lines=[23211, 0, 0, 23214, 0])

1
Bible: ind-x-bible-suciinjil-v1.txt
First line: | Inilah silsilah Isa Al-Masih , anak Daud , anak Ibrahim :
Last line: | Rahmat Isa , Junjungan Yang Ilahi , menyertai semua orang saleh . Amin .
7957
---------------

2
Bible: ind-x-bible-easy2005-v1.txt
First line: | Inilah silsilah Yesus Kristus . Ia berasal dari keturunan Daud , keturunan Abraham .
Last line: | Semoga Tuhan Yesus memberikan berkat dan menyertai semua orang . Amin .
7849
---------------

3
Bible: ind-x-bible-kabarbaik-v1.txt
First line: | Inilah daftar nenek moyang Yesus Kristus , keturunan Daud , keturunan Abraham . Dari Abraham sampai Daud , nama-nama nenek moyang Yesus sebagai berikut :
Last line: | Semoga Tuhan Yesus memberi berkat-Nya kepada semua umat Allah !
7958
---------------

4
Bible: ind-x-bible-terjemahanbaru-v1.txt
First line: | Inilah silsilah Yesus Kristus , anak Daud , anak Abraham .
Last line: | Kasih karunia Tuhan Yesus menyertai kamu sekalian ! Amin .
7958
---------------

5
Bible: ind-x-bible-ter

{'mean_ttr': 0.042,
 'mean_mattr': 0.445,
 'avg_line_count': 7936,
 'ttr_variance': 1e-05,
 'mattr_variance': 0.00017}

In [317]:
javanese = ['jav-x-bible-1981-v1.txt',
'jav-x-bible-jav-v1.txt']

In [318]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in javanese],)

1
Bible: jav-x-bible-1981-v1.txt
First line: | Iki sarasilahe Gusti Yesus Kritus , tedhake Sang Prabu Dawud , tedhake Rama Abraham .
Last line: | Sih-rahmate Gusti kita Yesus Kristus nunggila ing kowe kabeh ! Amin .
7618
---------------

2
Bible: jav-x-bible-jav-v1.txt
First line: | Iki sarasilahé Yésus Kristus , tedhaké Daud , tedhaké Abraham .
Last line: | Sih-rahmaté Gusti Yésus nunggila ing kowé kabèh ! Amin .
7959
---------------



{'mean_ttr': 0.066,
 'mean_mattr': 0.467,
 'avg_line_count': 7788,
 'ttr_variance': 0.0,
 'mattr_variance': 0.0}

In [319]:
korean = [
    'kor-x-bible-revised-v1.txt',
'kor-x-bible-1985-v1.txt',
'kor-x-bible-kor-v1.txt',
'kor-x-bible-latinscript-v1.txt'
]

In [321]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in korean],
                             save_folder=os.path.join(generic_path, "contents/korean"),
                             start_lines=[22773, 0, 0, 0])

1
Bible: kor-x-bible-revised-v1.txt
First line: | 아브라함과 다윗의 자손 예수 그리스도의 세계라
Last line: | 주 예수의 은혜가 모든 자들에게 있을지어다 ! 아멘
7960
---------------

2
Bible: kor-x-bible-1985-v1.txt
First line: | 이것은 아브라함과 다윗의 후손 예수 그리스도의 족보이다:
Last line: | 주 예수님의 은혜가 모든 성도들과 함께하기를 기도합니다 . 아멘 .
7911
---------------

3
Bible: kor-x-bible-kor-v1.txt
First line: | 아브라함 의 자손이요 다윗 의 자손인 예수 그리스도 의 계보는 이러하다 .
Last line: | 주 예수 의 은혜가 모든 사람에게 있기를 빕니다 . 아멘 .
7611
---------------

4
Bible: kor-x-bible-latinscript-v1.txt
First line: | apurahamkwa tawisuy cason yeyswu kurisutouy seykyeyra
Last line: | cwu yeyswuuy unhyeyka motun catureykey issurcieta ! ameyn
7920
---------------



{'mean_ttr': 0.18,
 'mean_mattr': 0.696,
 'avg_line_count': 7850,
 'ttr_variance': 0.00028,
 'mattr_variance': 0.00059}

In [322]:
latin = [
    'lat-x-bible-vulgataclementina-v1.txt',
'lat-x-bible-novavulgata-v1.txt'
]

In [326]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in latin],
                              start_lines=[23376, 23379])

1
Bible: lat-x-bible-vulgataclementina-v1.txt
First line: | Liber generationis Jesu Christi filii David , filii Abraham .
Last line: | Sicut enim vinum semper bibere , aut semper aquam , contrarium est ; alternis autem uti , delectabile : ita legentibus si semper exactus sit sermo , non erit gratus . Hic ergo erit consummatus .
12602
---------------

2
Bible: lat-x-bible-novavulgata-v1.txt
First line: | Liber generationis Iesu Christi filii David filii Abraham .
Last line: | Sicut enim vinum solummodo bibere , similiter autem rursus et aquam , contrarium est , quemadmodum autem vinum aquae contemperatum iam et delectabilem gratiam perficit , huiusmodi etiam structura sermonis delectat aures eorum , quibus contingat compositionem legere . Hic autem erit finis .
12417
---------------



{'mean_ttr': 0.119,
 'mean_mattr': 0.581,
 'avg_line_count': 12509,
 'ttr_variance': 0.0,
 'mattr_variance': 4e-05}

In [327]:
latvian = ['lav-x-bible-ljd-youversion-v1.txt',
'lav-x-bible-1997-v1.txt']

In [328]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in latvian],)

1
Bible: lav-x-bible-ljd-youversion-v1.txt
First line: | Jēzus Kristus , Dāvida dēla , Ābrahama dēla , cilts grāmata .
Last line: | Nāc , Kungs Jēzu ! Mūsu Kunga Jēzus Kristus žēlastība lai ir ar jums visiem ! Amen .
7949
---------------

2
Bible: lav-x-bible-1997-v1.txt
First line: | Jēzus Kristus , Dāvida dēla , Ābrahāma dēla , cilts grāmata .
Last line: | Mūsu Kunga Jēzus žēlastība lai ir ar visiem ! Āmen !
7954
---------------



{'mean_ttr': 0.114,
 'mean_mattr': 0.552,
 'avg_line_count': 7951,
 'ttr_variance': 2e-05,
 'mattr_variance': 0.0}

In [329]:
persian = [
    'fas-x-bible-1995-v1.txt',
'fas-x-bible-2007-v1.txt',
'fas-x-bible-newmillennium2011-v1.txt'
]

In [330]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in persian],)

1
Bible: fas-x-bible-1995-v1.txt
First line: | کتاب نسب نامه عیسی مسیح بن داود بن ابراهیم :
Last line: | او که بر این امور شاهد است ، میگوید : « بلی ، به زودی میآیم ! » آمین . بیا ، ای خداوند عیسی !
7905
---------------

2
Bible: fas-x-bible-2007-v1.txt
First line: | شجره‌نامهٔ عیسی مسیح ، پسر داوود ، پسر ابراهیم :
Last line: | فیض عیسی خداوند با همهٔ شما باد ، آمین !
7866
---------------

3
Bible: fas-x-bible-newmillennium2011-v1.txt
First line: | شجره‌نامۀ عیسی مسیح ، پسر داوود ، پسر ابراهیم :
Last line: | فیض خداوندْ عیسی با همۀ شما باد . آمین .
7931
---------------



{'mean_ttr': 0.067,
 'mean_mattr': 0.481,
 'avg_line_count': 7900,
 'ttr_variance': 0.00016,
 'mattr_variance': 9e-05}

In [331]:
portuguese = [
    'por-x-bible-almeidarevista-v1.txt',
'por-x-bible-paratodos-v1.txt',
'por-x-bible-almeidaatualizada-v1.txt',
'por-x-bible-linguagemdehoje-v1.txt',
'por-x-bible-versaofacil-v1.txt',
'por-x-bible-versaointernacional-v1.txt'
]

In [333]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in portuguese],
                             save_folder=os.path.join(generic_path, "contents/portuguese"),
                             start_lines=[23148, 23209, 23147, 0, 0, 0])

1
Bible: por-x-bible-almeidarevista-v1.txt
First line: | Livro da geração de Jesus Cristo , Filho de Davi , Filho de Abraão .
Last line: | A graça de nosso Senhor Jesus Cristo seja com todos vós . Amém !
7957
---------------

2
Bible: por-x-bible-paratodos-v1.txt
First line: | Esta é a lista dos antepassados de Jesus Cristo , filho de David , filho de Abraão .
Last line: | Que as bênçãos do Senhor Jesus estejam com todos vós .
7958
---------------

3
Bible: por-x-bible-almeidaatualizada-v1.txt
First line: | Livro da genealogia de Jesus Cristo , filho de Davi , filho de Abraão .
Last line: | A graça do Senhor Jesus seja com todos .
7959
---------------

4
Bible: por-x-bible-linguagemdehoje-v1.txt
First line: | Esta é a lista dos antepassados de Jesus Cristo , descendente de Davi , que era descendente de Abraão .
Last line: | E que a graça do Senhor Jesus esteja com todos .
7954
---------------

5
Bible: por-x-bible-versaofacil-v1.txt
First line: | Esta é a lista dos antepassados de Jesu

{'mean_ttr': 0.064,
 'mean_mattr': 0.479,
 'avg_line_count': 7918,
 'ttr_variance': 0.00016,
 'mattr_variance': 0.00015}

In [None]:
qub, quf, qug, quh, qul, qup, quy

In [334]:
russian = [
    'rus-x-bible-churchslavonic-v1.txt',
'rus-x-bible-kulakov-v1.txt',
'rus-x-bible-modern2011-v1.txt',
'rus-x-bible-centralasian-v1.txt',
'rus-x-bible-slovozhizny2006-v1.txt',
'rus-x-bible-synodal-v1.txt'
]

In [336]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in russian],
                             save_folder=os.path.join(generic_path, "contents/russian"),
                             start_lines=[0, 0, 0, 0, 0, 23146])

1
Bible: rus-x-bible-churchslavonic-v1.txt
First line: | Книга родства Иисуса Христа , сына давидова , сына авраамля .
Last line: | Благодать Господа нашего Иисуса Христа со всеми вами . Аминь .
7956
---------------

2
Bible: rus-x-bible-kulakov-v1.txt
First line: | Вот родословие Иисуса Христа , потомка Давида и Авраама :
Last line: | Благодать Господа Иисуса да будет со всеми !
7940
---------------

3
Bible: rus-x-bible-modern2011-v1.txt
First line: | Вот родословная Иисуса Христа , происходящего из рода Давида , рожденного в роде Авраама :
Last line: | Да будет благодать Господа Иисуса со всеми вами !
7867
---------------

4
Bible: rus-x-bible-centralasian-v1.txt
First line: | Вот родословие Исы Масиха , потомка Давуда , потомка Ибрахима .
Last line: | Пусть благодать Повелителя Исы будет со всеми . Аминь .
7913
---------------

5
Bible: rus-x-bible-slovozhizny2006-v1.txt
First line: | Всего от Авраама до Давида было четырнадцать поколений , от Давида до переселения в Вавилон – тоже

{'mean_ttr': 0.123,
 'mean_mattr': 0.58,
 'avg_line_count': 7906,
 'ttr_variance': 0.0001,
 'mattr_variance': 4e-05}

In [337]:
spanish = [
    'spa-x-bible-dhhe-v1.txt',
'spa-x-bible-dioshablahoy-v1.txt',
'spa-x-bible-reinavalera1960-v1.txt',
'spa-x-bible-blph-youversion-v1.txt',
'spa-x-bible-rvr95-youversion-v1.txt',
'spa-x-bible-tla-v1.txt'
]

In [339]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in spanish],)

1
Bible: spa-x-bible-dhhe-v1.txt
First line: | La lista de los antepasados de Jesucristo , descendiente de David y de Abraham :
Last line: | Que el Señor Jesús derrame su gracia sobre todos .
7734
---------------

2
Bible: spa-x-bible-dioshablahoy-v1.txt
First line: | Ésta es una lista de los antepasados de Jesucristo , que fue descendiente de David y de Abraham :
Last line: | Que el Señor Jesús derrame su gracia sobre todos .
7785
---------------

3
Bible: spa-x-bible-reinavalera1960-v1.txt
First line: | Libro de la genealogía de Jesucristo , hijo de David , hijo de Abraham .
Last line: | La gracia del Señor Jesús sea con todos . Amén .
7945
---------------

4
Bible: spa-x-bible-blph-youversion-v1.txt
First line: | Esta es la lista de los antepasados * de Jesucristo , descendiente de David y de Abrahán :
Last line: | Que la gracia de Jesús , el Señor , esté con todos . Amén .
7766
---------------

5
Bible: spa-x-bible-rvr95-youversion-v1.txt
First line: | Libro de la genealogía de Jes

{'mean_ttr': 0.061,
 'mean_mattr': 0.458,
 'avg_line_count': 7754,
 'ttr_variance': 0.0001,
 'mattr_variance': 0.00013}

In [340]:
swahili = [
    'swh-x-bible-habarinjema-v1.txt',
'swh-x-bible-swh-v1.txt',
'swh-x-bible-union-v1.txt'
]

In [341]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in swahili],)

1
Bible: swh-x-bible-habarinjema-v1.txt
First line: | Yesu Kristo alikuwa wa ukoo wa Daudi , wa ukoo wa Abrahamu . Hii ndiyo orodha ya ukoo wake : Isaka alimzaa Yakobo , Yakobo alimzaa Yuda na ndugu zake , Peresi alimzaa Hesroni , Hesroni alimzaa Rami , Aminadabu alimzaa Nashoni , Nashoni alimzaa Salmoni , Boazi na Ruthu walikuwa wazazi wa Obedi , Obedi alimzaa Yese , Daudi alimzaa Solomoni ( mama yake Solomoni alikuwa Bath-Sheba mke wa Uria ) . Rehoboamu alimzaa Abiya , Abiya alimzaa Asa , Yehoshafati alimzaa Yoramu , Yoramu alimzaa Uzia , Yothamu alimzaa Ahazi , Ahazi alimzaa Hezekia , Manase alimzaa Amoni , Amoni alimzaa Yosia , Huo ulikuwa wakati Wayahudi walipopelekwa uhamishoni Babuloni . Yekonia alimzaa Shealtieli , Shealtieli alimzaa Zerubabeli , Abiudi alimzaa Eliakimu , Eliakimu alimzaa Azori , Zadoki alimzaa Akimu , Akimu alimzaa Eliudi , Eleazari alimzaa Mathani , Mathani alimzaa Yakobo ,
Last line: | Nawatakieni nyote neema ya Bwana Yesu . Amina .
7689
---------------

2
B

{'mean_ttr': 0.12,
 'mean_mattr': 0.55,
 'avg_line_count': 7740,
 'ttr_variance': 4e-05,
 'mattr_variance': 2e-05}

In [342]:
turkish = ['tur-x-bible-2009-v1.txt',
'tur-x-bible-southernazeri-v1.txt',
'tur-x-bible-tur-v1.txt']

In [347]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in turkish],
                             save_folder=os.path.join(generic_path, "contents/turkish"),
                             start_lines=[0,22724,0])

1
Bible: tur-x-bible-2009-v1.txt
First line: | Ýbrahim oðlu , Davut oðlu Ýsa Mesih’in soyuyla ilgili kayýt þöyledir : Ýbrahim , Ýshak’ýn babasýydý . Ýshak , Yakub’un babasý ; Yakup da Yahuda ve onun kardeþlerinin babasýydý .
Last line: | Rab Ýsa’nýn lütfu , kutsallarla birlikte olsun . Amin .
7428
---------------

2
Bible: tur-x-bible-southernazeri-v1.txt
First line: | - 2 İbrahim oğlu , Davut oğlu İsa Mesihin soy kaydı şöyledir : İbrahim İshakın babasıydı , İshak Yakupun babasıydı , Yakup Yahuda ve kardeşlerinin babasıydı ,
Last line: | Rab İsa'nın lütfu kutsallarla birlikte olsun ! Amin .
7458
---------------

3
Bible: tur-x-bible-tur-v1.txt
First line: | İbrahim oğlu , Davut oğlu İsa Mesih'in soyuyla ilgili kayıt şöyledir : İbrahim , İshak'ın babasıydı . İshak , Yakup'un babası ; Yakup da Yahuda ve onun kardeşlerinin babasıydı .
Last line: | Rab İsa'nın lütfu , kutsallarla birlikte olsun . Amin .
7440
---------------



{'mean_ttr': 0.169,
 'mean_mattr': 0.662,
 'avg_line_count': 7442,
 'ttr_variance': 0.0001,
 'mattr_variance': 0.00058}

In [348]:
vietnamese = [
    'vie-x-bible-1926compounds-v1.txt',
'vie-x-bible-1926nocompounds-v1.txt',
'vie-x-bible-2002-v1.txt',
'vie-x-bible-banphothong-v1.txt',
'vie-x-bible-bd2011-youversion-v1.txt',
'vie-x-bible-revised2010-v1.txt'
]

In [350]:
aggregate_scores_for_language([os.path.join(generic_path, bib) for bib in vietnamese],
                             save_folder=os.path.join(generic_path, "contents/vietnamese"),
                             start_lines=[23114,23146,23144,0,0,0]
                             )

1
Bible: vie-x-bible-1926compounds-v1.txt
First line: | Gia-phổ Đức Chúa Jêsus-Christ , con cháu Đa-vít và con cháu Áp-ra-ham .
Last line: | Nguyền xin ân-điển của Đức Chúa Jêsus ở với mọi người !
7945
---------------

2
Bible: vie-x-bible-1926nocompounds-v1.txt
First line: | Gia phổ Đức Chúa Jêsus Christ , con cháu Đa-vít và con cháu Áp-ra-ham .
Last line: | Nguyền xin ân điển của Đức Chúa Jêsus ở với mọi người !
7960
---------------

3
Bible: vie-x-bible-2002-v1.txt
First line: | Gia phổ của Chúa Cứu Thế Giê-su , dòng dõi Đa-vít , con cháu Áp-ra-ham .
Last line: | Đấng làm chứng cho những điều này phán : “ Phải , Ta đến mau chóng! ” A-men . Lạy Chúa Giê-su , xin Ngài đến!
7952
---------------

4
Bible: vie-x-bible-banphothong-v1.txt
First line: | Đây là gia phổ của Chúa Cứu Thế Giê-xu . Ngài xuất thân từ dòng họ Đa-vít , Đa-vít xuất thân từ dòng họ Áp-ra-ham .
Last line: | Nguyền xin ân phúc của Chúa Giê-xu ở với mọi người . A-men .
7853
---------------

5
Bible: vie-x-bible-bd2011-y

{'mean_ttr': 0.017,
 'mean_mattr': 0.422,
 'avg_line_count': 7903,
 'ttr_variance': 1e-05,
 'mattr_variance': 0.00016}