Skip to content

Commit

Permalink
Add Swadesh list for Coptic (#959)
Browse files Browse the repository at this point in the history
* added swadesh list from wikipedia, changed all words to Coptic script,added some missing words

* added tests

* updated tests and list to conform to discussed format

* one change from test

* Mk small adjustment to coptic Swadesh test

* Added documentation
  • Loading branch information
nolanee authored and kylepjohnson committed Nov 16, 2019
1 parent 7bc3ffd commit f6bc71b
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 1 deletion.
25 changes: 24 additions & 1 deletion cltk/corpus/swadesh.py
Expand Up @@ -238,7 +238,28 @@

swadesh_old_norse = ["ek", "þú", "hann", "vér", "þér", "þeir", "sjá, þessi", "sá", "hér", "þar", "hvar", "hvat", "hvar", "hvenær", "hvé", "eigi", "allr", "margr", "nǫkkurr", "fár", "annarr", "einn", "tveir", "þrír", "fjórir", "fimm", "stórr", "langr", "breiðr", "þykkr", "þungr", "lítill", "stuttr", "mjór", "þunnr", "kona", "karl", "maðr", "barn", "kona", "bóndi", 'móðir', "faðir", "dýrr", "fiskr", "fugl", "hundr", "lús", "snókr", "ormr", "tré", "skógr", "stafr", "ávǫxtr", "fræ", "lauf", "rót", "bǫrkr", "blóm", "gras", "reip", "húð", "kjǫt", "blóð", "bein", "fita", "egg", "horn", "hali", "fjǫðr", "hár", "hǫfuð", "eyra", "auga", "nef", "munnr", "tǫnn", "tunga", "nagl", "fótr", "leggr", "kné", "hǫnd", "vængr", "magi", "iinyfli", "hals" , "bak", "brjóst", "hjarta", "lifr", "drekka", "eta", "bíta", "súga", "spýta", ", hrækja", None, "blása", "anda", "hlæja", "sjá", "heyra", "vita", "þýkkja", "þefa", "ugga", "sofa", "lifa", "deyja", "drepa", "hals", "bak", "berja", "skera", "kljúfa""stinga", "klóra", "grafa", "synda", "fljúga", "ganga", "koma", "liggja", "sitja", "standa", "snúa", "falla", "gefa", "halda", "kreista", "gnúa","þvá", "þurka", "draga", "ýta", "kasta", "kasta", "binda", "sauma", "telja", "segja", "syngja", "leika", "flóta", "streyma", "frjósa", "þrútna", "sól", "tungl", "stjarna", "vatn", "regn", "á", "vatn", "hav", "salt", "steinn", "sandr", "ryk", "jörð", "ský", "þoka", "himinn", "vindr", "snjór", "íss", "reykr", "ild", "eldr", "aska", "brenna", "vegr", "fjall", "rauðr", "grœnn", "gulr", "hvítr", "svartr", "nótt", "dagr", "ár", "heitr", "kaldr", "fullr", "nýr", "gamall", "góðr", "illr", "rottin", "skitinn", "beinn", "kringlóttr", "beittr", None, "sleipr", "blautr", " þurr", "réttr", "nálægr", "langr", "hœgr", "vinstri", "hjá","í", "með", "ok", "ef", "því at", "nafn"] # pylint: disable=line-too-long


swadesh_cop = ['ⲁⲛⲟⲕ', 'ⲛⲧⲟⲕ, ⲛⲧⲟ', 'ⲛⲧⲟϥ, ⲛⲧⲟⲥ', 'ⲁⲛⲟⲛ', 'ⲛⲧⲟⲧⲛ', 'ⲛⲧⲟⲩ', '-ⲉⲓ', 'ⲡⲓ-, ϯ-, ⲛⲓ-', 'ⲡⲉⲓⲙⲁ',
'ⲙⲙⲁⲩ', 'ⲛⲓⲙ', 'ⲁϣ', 'ⲧⲱⲛ', 'ⲧⲛⲛⲁⲩ', 'ⲡⲱⲥ', 'ⲛ, ⲁⲛ', 'ⲧⲏⲣ', 'ⲟϣ', 'ϩⲟⲉⲓⲛⲉ', ['ⲕⲟⲩⲓ', 'ϣⲏⲙ'],
'ⲕⲉ', 'ⲟⲩⲁ', 'ⲥⲛⲁⲩ', 'ϣⲟⲙⲧ', 'ϥⲧⲟⲩ', 'ϯⲟⲩ', 'ⲛⲟϭ', 'ϣⲓⲁⲓ', ['ⲟⲩⲟⲥⲧⲛ', 'ⲟⲩⲱϣⲥ'],
'ⲟⲩⲙⲟⲧ', 'ϩⲣⲟϣ', ['ⲕⲟⲩⲓ', 'ϣⲏⲙ', 'ϣⲓⲣⲉ'], 'ϣⲏⲙ', 'ϫⲏⲧ', 'ⲡⲁⲕⲉ', 'ⲥϩⲓⲙⲉ', 'ϩⲟⲟⲩⲧ', 'ⲣⲱⲙⲉ',
'ϣⲏⲣⲉ', 'ⲥϩⲓⲙⲉ', 'ϩⲁⲓ', 'ⲙⲁⲁⲩ', 'ⲉⲓⲱⲧ', 'ⲧⲃⲛⲏ', 'ⲧⲃⲧ', 'ϩⲁⲗⲏⲧ', 'ⲟⲩϩⲟⲣ', 'ϩⲗⲱⲙ', 'ϩⲟϥ', 'ϥⲛⲧg',
'ϣⲏⲛ', 'ⲉⲓⲁϩ ϣⲏⲛ', 'ⲟⲩⲁϩ', 'ⲟⲩⲧⲁϩ', 'ϫⲣⲟϫ', 'ϭⲱⲱⲃⲉ', 'ⲛⲟⲩⲛⲉ', 'ⲕⲟⲩⲕⲉ', 'ϩⲣⲏⲣⲉ', 'ⲥⲓⲙ', 'ⲛⲟⲩϩ',
'ϣⲁⲁⲣ', 'ⲁϥ', 'ⲥⲛⲟϥ', 'ⲕⲁⲥ', 'ⲱⲧ', 'ⲥⲟⲟⲩϩⲉ', 'ⲥⲃⲟⲕ', 'ⲥⲁⲧ', 'ⲙⲏϩⲉ', 'ϥⲱ', 'ⲁⲡⲉ', 'ⲙⲁⲁϫⲉ',
'ⲉⲓⲁ', 'ϣⲁ', 'ⲣⲟ', ['ϣⲟⲗ', 'ⲛⲁϫϩⲉ'], 'ⲗⲁⲥ', 'ⲉⲓⲃ', ['ⲟⲩⲉⲣⲏⲧⲉ', 'ⲣⲁⲧ'], 'ⲣⲁⲧ', 'ⲕⲗⲗⲉ', 'ϭⲓϫ, ',
'ⲧⲛϩ', 'ϩⲏ(ⲧ)', ['ⲙⲁϩⲧ', 'ⲙⲉϩⲧⲟ'], 'ⲙⲁⲕϩ', 'ϫⲓⲥⲉ', 'ⲉⲕⲓⲃⲉ', 'ϩⲏⲧ', 'ⲟⲩⲫⲁϫⲓ', 'ⲥⲱ', 'ⲟⲩⲱⲙ',
'ⲗⲱⲕⲥ', 'ⲥⲱⲛⲕ', 'ⲛⲉϫⲧⲁϥ', 'ⲕⲁⲃⲟⲗ', 'ⲛⲓϥⲉ', 'ⲥⲉⲕⲧⲏⲩ', 'ⲥⲱⲃⲉ', 'ⲛⲁⲩ', 'ⲥⲱⲧⲙ', 'ⲉⲓⲙⲉ', 'ⲙⲉⲉⲩⲉ', 'ϣⲱⲗⲙ',
'ⲣϩⲟⲧⲉ', 'ϩⲓⲛⲏⲃ', 'ⲟⲟⲩ-', 'ⲙⲟⲩ', 'ⲙⲟⲩⲟⲩⲧ', 'ⲙⲓϣⲉ', 'ⲙⲉⲧϫⲉⲣⲏϫ', 'ϯ', 'ϣⲟⲧϣⲧ', 'ⲡⲱϩ',
'ⲗⲟⲅⲭⲓⲍⲉ', 'ϩⲱϩ', 'ϣⲓⲕⲉ', 'ⲛⲏⲏⲃⲉ', 'ϩⲁⲗⲁⲓ', 'ⲙⲟⲟϣⲉ', 'ⲉⲓ', 'ⲛⲕⲟⲧⲕ', 'ϩⲙⲟⲟⲥ',
'ⲱϩⲉ', ['ⲡⲱⲱⲛⲉ', 'ⲕⲧⲟ'], 'ϩⲉ', 'ϯ', 'ⲁⲙⲁϩⲧⲉ', '', 'ⲗⲟϫⲗϫ', ['ⲣⲱϩⲉ', 'ⲉⲓⲱ'], 'ϥⲱⲧⲉ', 'ⲥⲟⲕⲥⲉⲕ',
'ϭⲱⲟⲩ', 'ⲛⲟⲩϫⲉ', 'ⲙⲟⲩⲣ', ['ⲧⲱⲣⲡ', 'ⲱⲧϩ', 'ϫⲱⲗⲕ'], 'ⲱⲡ', ['ϣⲁϫⲉ', 'ϫⲱ'], ['ϭⲛϭⲛ', 'ϩⲱⲥ'], 'ⲥⲱⲃⲉ',
['ⲛⲏⲏⲃⲉ', 'ϩⲗⲟⲉⲓⲗⲉ'], 'ϣⲱⲗ', 'ⲱϭⲣ', 'ⲛⲟⲩϥⲧ', 'ⲣⲏ', 'ⲟⲟϩ', 'ⲥⲓⲟⲩ', 'ⲙⲟⲟⲩ', 'ϩⲱⲟⲩ',
'ⲉⲓⲉⲣⲟ', ['ⲑⲁⲗⲁⲥⲥⲁ', 'ⲗⲓⲙⲛⲏ'], 'ⲉⲓⲟⲙ', ['ⲙⲗϩ', 'ϩⲙⲟⲩ'], 'ⲱⲛⲉ', 'ϣⲱ', 'ϣⲟⲉⲓϣ',
['ⲉⲓⲧⲛ', 'ⲕⲁϩ', 'ⲧⲟ'], ['ⲕⲗⲟⲟⲗⲉ', 'ϭⲏⲡⲉ'], 'ⲧⲙⲧⲙ', ['ⲡⲉ', 'ⲡⲏⲩⲉ'], 'ⲧⲏⲩ', 'ⲭⲓⲱⲛ', 'ⲕⲣⲩⲥⲧⲁⲗⲗⲟⲥ',
['ⲕⲣⲙⲧⲥ', 'ⲧⲙⲧⲙ'], ['ⲕⲱϩⲧ', 'ⲕⲣⲱⲛ'], 'ⲕⲣⲙⲉⲥ', 'ⲣⲱⲕϩ', ['ϩⲓⲏ', 'ϩⲓⲟⲟⲩⲉ'], 'ⲧⲟⲟⲩ', 'ⲧⲱⲣϣ', 'ⲟⲩⲟⲧ', '',
'ⲟⲩⲟⲃϣ', 'ⲕⲏⲙ', 'ⲟⲩϣⲏ', 'ϩⲟⲟⲩ', ['ⲣⲟⲙⲡⲉ', 'ⲣⲙⲡⲟⲟⲩⲉ'], 'ⲑⲉⲣⲙⲟⲛ', 'ⲟⲣϣ', 'ⲙⲏϩ', 'ⲃⲣⲣⲉ',
'ⲁⲥ', 'ⲛⲟⲩϥⲉ', ['ϩⲟⲟⲩ', 'ⲃⲱⲱⲛ'], 'ⲗⲱⲙⲥ', 'ϫⲱϩⲙ', 'ⲥⲟⲩⲧⲱⲙ', '', 'ⲧⲱⲙ', '', 'ⲗⲉⲕⲗⲱⲕ',
'ϩⲟⲣⲡ', 'ⲃⲟⲥⲧ', 'ⲥⲟⲟϩⲉ', 'ϩⲏⲛ', 'ⲟⲩⲉ', 'ⲟⲩⲛⲁⲙ', 'ϩⲃⲟⲩⲣ',
'', '', 'ⲙⲛ', 'ⲁⲩⲱ', '', 'ⲉⲣϣⲁⲛ', 'ⲣⲁⲛ']

class Swadesh():
def __init__(self, language):
Expand All @@ -265,3 +286,5 @@ def words(self):
return swadesh_hi
elif self.language == 'ar':
return swadesh_ar
elif self.language == 'cop':
return swadesh_cop
28 changes: 28 additions & 0 deletions cltk/tests/test_languages/test_coptic.py
@@ -0,0 +1,28 @@
"""Test for Coptic, based on John Stewart's tests for Old English"""

import os
import unittest

from cltk.corpus.swadesh import Swadesh

__author__ = ["Edward Nolan <nolanee@umich.edu>", ]


class TestCoptic(unittest.TestCase):
"""Class for unittest"""

# Swadesh list
def test_swadesh_coptic(self):
swadesh = Swadesh('cop')
first_word = 'ⲁⲛⲟⲕ'
match = swadesh.words()[0]
self.assertEqual(first_word, match)
turn = ['ⲡⲱⲱⲛⲉ', 'ⲕⲧⲟ']
match = swadesh.words()[125]
self.assertEqual(turn, match)
match = len(swadesh.words())
self.assertEqual(match, 207)


if __name__ == '__main__':
unittest.main()
15 changes: 15 additions & 0 deletions docs/coptic.rst
Expand Up @@ -17,3 +17,18 @@ Use ``CorpusImporter()`` or browse the `CLTK GitHub organization <https://github
In [3]: c.list_corpora
Out[3]: ['coptic_text_scriptorium']
Swadesh
=======
The corpus module has a class for generating a Swadesh list for Coptic.

.. code-block:: python
In[1]: from cltk.corpus.swadesh import Swadesh
In[2]: swadesh = Swadesh('cop')
In[3]: swadesh.words()[:10]
Out[3]: ['ⲁⲛⲟⲕ', 'ⲛⲧⲟⲕ, ⲛⲧⲟ', 'ⲛⲧⲟϥ, ⲛⲧⲟⲥ', 'ⲁⲛⲟⲛ', 'ⲛⲧⲟⲧⲛ', 'ⲛⲧⲟⲩ', '-ⲉⲓ', 'ⲡⲓ-, ϯ-, ⲛⲓ-', 'ⲡⲉⲓⲙⲁ', 'ⲙⲙⲁⲩ']

0 comments on commit f6bc71b

Please sign in to comment.