Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
free-variation committed Apr 4, 2019
2 parents 0c5d165 + fb61d10 commit d838370
Show file tree
Hide file tree
Showing 34 changed files with 2,724 additions and 430 deletions.
6 changes: 5 additions & 1 deletion .travis.yml
@@ -1,9 +1,12 @@
sudo: required

dist: xenial

language: python

python:
- '3.6'
- '3.7'

before_install:
- sudo rm -f /etc/boto.cfg
Expand All @@ -23,13 +26,14 @@ before_script:
- pip install numpy
- pip install scipy
- pip install scikit-learn
- pip install nose-timer

script:
# Notes on nose:
# Travis CI pre-installs `nose`
# https://github.com/coagulant/coveralls-python#nosetests
# http://nose.readthedocs.org/en/latest/plugins/skip.html
- nosetests --no-skip --with-coverage --cover-package=cltk --with-doctest
- nosetests --no-skip --with-coverage --cover-package=cltk --with-doctest --with-timer
- ( cd docs && make doctest; )

after_success:
Expand Down
7 changes: 7 additions & 0 deletions cltk/contributors.md
@@ -0,0 +1,7 @@
# Contributors
CLTK Core authors, ordered alphabetically by first name

## key
* val1
* val2

7 changes: 6 additions & 1 deletion cltk/corpus/greek/corpora.py
Expand Up @@ -66,5 +66,10 @@
'origin': 'https://github.com/cltk/First1KGreek',
'location': 'remote',
'type': 'text'},
{'name': 'greek_text_tesserae',
'encoding': 'utf-8',
'markup': 'plaintext', #modified plaintext with Tesserae-style citations
'origin': 'https://github.com/cltk/greek_text_tesserae.git',
'location': 'remote',
'type': 'text'},
]

22 changes: 21 additions & 1 deletion cltk/corpus/hindi/alphabet.py
Expand Up @@ -21,7 +21,7 @@


#the Semivowels are also in the script of hindi
SEMIVOWELS = ['य ','र ','ल' ,'व']
SEMIVOWELS = ['य','र','ल' ,'व']

#There are three sibilants:
SIBILANTS = ['श','ष','स']
Expand All @@ -31,3 +31,23 @@
# Anusvara is used for final velar nasal sound, Visarga adds voiceless breath after vowel and Candrabindu is used to nasalize vowels

MODIFIERS = ['◌্','◌ঁ','◌ং','◌ঃ']

# classification of alphabets according to how their sound is produced

VELAR_CONSONANTS = [ 'क' , 'ख' , 'ग' , 'घ' , 'ङ' ]

PALATAL_CONSONANTS = ['च' , 'छ' , 'ज' , 'झ' , 'ञ' ]

RETROFLEX_CONSONANTS = ['ट' , 'ठ' , 'ड' , 'ढ' , 'ण']

DENTAL_CONSONANTS = ['त' , 'थ' , 'द' , 'ध' , 'न' ]

LABIAL_CONSONANTS = ['प' , 'फ' , 'ब' , 'भ' , 'म']

SONORANT_CONSONANTS = ['य' , 'र' , 'ल' , 'व']

SIBILANT_CONSONANTS = ['श' , 'ष' , 'स']

GUTTURAL_CONSONANT = ['ह']

SIGNS= ['ॐ']
93 changes: 83 additions & 10 deletions cltk/corpus/odia/alphabet.py
@@ -1,16 +1,81 @@
"""Odia alphabet"""
__author__ = 'Nishchith Shetty <inishchith[at]gmail[.]com>'
__author__ = ['Nishchith Shetty <inishchith@gmail.com>']

VOWELS = [
'ଅ', 'ଆ', 'ଇ', 'ଈ', 'ଉ', 'ଊ', 'ଋ',
'ୠ', 'ଌ', 'ୡ', 'ଏ', 'ଐ', 'ଓ', 'ଔ']

STRUCTURED_CONSONANTS = [
'କ', 'ଖ', 'ଗ', 'ଘ', 'ଙ',
'ଚ', 'ଛ', 'ଜ', 'ଝ', 'ଞ',
'ଟ', 'ଠ', 'ଡ', 'ଢ', 'ଣ',
'ତ', 'ଥ', 'ଦ', 'ଧ', 'ନ',
'ପ', 'ଫ', 'ବ', 'ଭ', 'ମ']
# Oriya Unicode Standard

VOWELS = {
'0B05':'ଅ',
'0B06':'ଆ',
'0B07':'ଇ',
'0B08':'ଈ',
'0B09':'ଉ',
'0B0A':'ଊ',
'0B0B':'ଋ',
'N/A':'ୠ',
'0B0C':'ଌ',
'N/A':'ୡ',
'0B0F':'ଏ',
'0B10':'ଐ',
'0B13':'ଓ',
'0B14':'ଔ'
}

STRUCTURED_CONSONANTS = {
'0B15':'କ',
'0B16':'ଖ',
'0B17':'ଗ',
'0B18':'ଘ',
'0B19':'ଙ',
'0B1A':'ଚ',
'0B1B':'ଛ',
'0B1C':'ଜ',
'0B1D':'ଝ',
'0B1E':'ଞ',
'0B1F':'ଟ',
'0B20':'ଠ',
'0B21':'ଡ',
'0B22':'ଢ',
'0B23':'ଣ',
'0B24':'ତ',
'0B25':'ଥ',
'0B26':'ଦ',
'0B27':'ଧ',
'0B28':'ନ',
'0B2A':'ପ',
'0B2B':'ଫ',
'0B2C':'ବ',
'0B2D':'ଭ',
'0B2E':'ମ',
'0B2F':'ଯ',
'0B30':'ର',
'0B32':'ଲ',
'0B33':'ଳ',
'0B35':'ଵ',
'0B36':'ଶ',
'0B37':'ଷ',
'0B38':'ସ',
'0B39':'ହ'
}

# The structured consonants are classified according to where the tongue touches the palate of the mouth and are classified accordingly into five structured groups.
# These consonants are shown here with their IAST transcriptions.

VELAR_CONSONANTS = [ 'କ', 'ଖ', 'ଗ', 'ଘ', 'ଙ' ]
VELAR_CONSONANTS_PRONONCIATION = [ 'ka', 'kha', 'ga', 'gha', 'ṅa']

PALATAL_CONSONANTS = ['ଚ', 'ଛ', 'ଜ', 'ଝ', 'ଞ']
PALATAL_CONSONANTS_PRONOUNCIATION = [ 'ca', 'cha', 'ja', 'jha', 'ña']

RETROFLEX_CONSONANTS = ['ଟ', 'ଠ', 'ଡ', 'ଢ', 'ଣ']
RETROFLEX_CONSONANTS_PRONOUNCIATION = [ 'ṭa', 'ṭha', 'ḍa', 'ḍha', 'ṇa']

DENTAL_CONSONANTS = [ 'ତ', 'ଥ', 'ଦ', 'ଧ', 'ନ']
DENTAL_CONSONANTS_PRONOUNCIATION = [ 'ta', 'tha', 'da', 'dha', 'na']

LABIALS_CONSONANTS = ['ପ', 'ଫ','ବ', 'ଭ', 'ମ']
LABIALS_CONSONANTS_PRONOUNCIATION = [ 'pa', 'pha', 'ba', 'bha','ma']


UNSTRUCTURED_CONSONANTS = [
'ଯ', 'ୟ', 'ର', 'ଲ', 'ଳ', 'ୱ',
Expand All @@ -19,3 +84,11 @@
NUMERALS = [
'୦', '୧', '୨', '୩', '୪',
'୫', '୬', '୭', '୮', '୯']
EXTRA_NUMERICAL_SYMBOLS = ['୵', '୶', '୷','୲', '୳','୴']
EXTRA_NUMERICAL_SYMBOLS_DESC = ['1/16', '1/8', '3/16', '1/4','1/2', '3/4']

# Anusvara is used for final velar nasal sound,
# Visarga adds voiceless breath after vowel
# Candrabindu is used to nasalize vowels

MODIFIERS = ['◌্','◌ঁ','◌ং','◌ঃ']

0 comments on commit d838370

Please sign in to comment.