Code clean up

make comply with PEP440. remove deprecated function
bluedisk · Feb 1, 2017 · 4d421ee · 4d421ee
1 parent f0e0d9b
commit 4d421ee
Show file tree

Hide file tree

Showing 10 changed files with 148 additions and 96 deletions.
diff --git a/hgtk/checker.py b/hgtk/checker.py
@@ -41,10 +41,11 @@ def is_jamo(letter):
 def is_hanja(phrase):
     for unicode_value in map(lambda letter:ord(letter), phrase):
         if ((unicode_value < FIRST_HANJA_UNICODE or unicode_value > LAST_HANJA_UNICODE) and
-            (unicode_value < FIRST_HANJA_EXT_A_UNICODE or unicode_value > LAST_HANJA_EXT_A_UNICODE)):
+                (unicode_value < FIRST_HANJA_EXT_A_UNICODE or unicode_value > LAST_HANJA_EXT_A_UNICODE)):
             return False
     return True
 
+
 def is_latin1(phrase):
     for unicode_value in map(lambda letter:ord(letter), phrase):
         if unicode_value < FIRST_LATIN1_UNICODE or unicode_value > LAST_LATIN1_UNICODE:
@@ -62,21 +63,23 @@ def has_jongsung(letter):
     code = lt.hangul_index(letter)
     return code % NUM_JONG > 0
 
+
 def has_batchim(letter):
     """This method is the same as has_jongsung()"""
     return has_jongsung(letter)
 
-def has_approximant(letter):
-    """Approximant makes complex vowels, such as ones starting with y or w.
-    In Korean there is a unique approximant euㅡ making uiㅢ, but ㅢ does not make many irregularities."""
-    if len(letter) != 1:
-        raise Exception('The target string must be one letter.')
-    if not is_hangul(letter):
-        raise NotHangulException('The target string must be Hangul')
-
-    jaso = lt.decompose(letter)
-    diphthong = (u'ㅑ',u'ㅒ',u'ㅕ',u'ㅖ',u'ㅘ',u'ㅙ',u'ㅛ',u'ㅝ',u'ㅞ',u'ㅠ')
-    # excluded 'ㅢ' because y- and w-based complex vowels are irregular.
-    # vowels with umlauts (ㅐ, ㅔ, ㅚ, ㅟ) are not considered complex vowels.
-    return jaso[1] in diphthong
-
+# DEPRECATED !
+# def has_approximant(letter):
+#     """Approximant makes complex vowels, such as ones starting with y or w.
+#     In Korean there is a unique approximant euㅡ making uiㅢ, but ㅢ does not make many irregularities."""
+#     if len(letter) != 1:
+#         raise Exception('The target string must be one letter.')
+#     if not is_hangul(letter):
+#         raise NotHangulException('The target string must be Hangul')
+#
+#     jaso = lt.decompose(letter)
+#     diphthong = (u'ㅑ', u'ㅒ', u'ㅕ', u'ㅖ', u'ㅘ', u'ㅙ', u'ㅛ', u'ㅝ', u'ㅞ', u'ㅠ')
+#     # excluded 'ㅢ' because y- and w-based complex vowels are irregular.
+#     # vowels with umlauts (ㅐ, ㅔ, ㅚ, ㅟ) are not considered complex vowels.
+#     return jaso[1] in diphthong
+#
diff --git a/hgtk/const.py b/hgtk/const.py
@@ -8,15 +8,28 @@
 ################################################################################
 
 # Code = 0xAC00 + (Chosung_index * NUM_JOONG * NUM_JONG) + (Joongsung_index * NUM_JONG) + (Jongsung_index)
-CHO = (u'ㄱ',u'ㄲ',u'ㄴ',u'ㄷ',u'ㄸ',u'ㄹ',u'ㅁ',u'ㅂ',u'ㅃ',u'ㅅ',u'ㅆ',u'ㅇ',u'ㅈ',u'ㅉ',u'ㅊ',u'ㅋ',u'ㅌ',u'ㅍ',u'ㅎ')
-JOONG = (u'ㅏ',u'ㅐ',u'ㅑ',u'ㅒ',u'ㅓ',u'ㅔ',u'ㅕ',u'ㅖ',u'ㅗ',u'ㅘ',u'ㅙ',u'ㅚ',u'ㅛ',u'ㅜ',u'ㅝ',u'ㅞ',u'ㅟ',u'ㅠ',u'ㅡ',u'ㅢ',u'ㅣ')
-JONG = (u'',u'ㄱ',u'ㄲ',u'ㄳ',u'ㄴ',u'ㄵ',u'ㄶ',u'ㄷ',u'ㄹ',u'ㄺ',u'ㄻ',u'ㄼ',u'ㄽ',u'ㄾ',u'ㄿ',u'ㅀ',u'ㅁ',u'ㅂ',u'ㅄ',u'ㅅ',u'ㅆ',u'ㅇ',u'ㅈ',u'ㅊ',u'ㅋ',u'ㅌ',u'ㅍ',u'ㅎ')
+
+CHO = (
+    u'ㄱ', u'ㄲ', u'ㄴ', u'ㄷ', u'ㄸ', u'ㄹ', u'ㅁ', u'ㅂ', u'ㅃ', u'ㅅ',
+    u'ㅆ', u'ㅇ', u'ㅈ', u'ㅉ', u'ㅊ', u'ㅋ', u'ㅌ', u'ㅍ', u'ㅎ'
+)
+
+JOONG = (
+    u'ㅏ', u'ㅐ', u'ㅑ', u'ㅒ', u'ㅓ', u'ㅔ', u'ㅕ', u'ㅖ', u'ㅗ', u'ㅘ',
+    u'ㅙ', u'ㅚ', u'ㅛ', u'ㅜ', u'ㅝ', u'ㅞ', u'ㅟ', u'ㅠ', u'ㅡ', u'ㅢ', u'ㅣ'
+)
+
+JONG = (
+    u'', u'ㄱ', u'ㄲ', u'ㄳ', u'ㄴ', u'ㄵ', u'ㄶ', u'ㄷ', u'ㄹ', u'ㄺ',
+    u'ㄻ', u'ㄼ', u'ㄽ', u'ㄾ', u'ㄿ', u'ㅀ', u'ㅁ', u'ㅂ', u'ㅄ', u'ㅅ',
+    u'ㅆ', u'ㅇ', u'ㅈ', u'ㅊ', u'ㅋ', u'ㅌ', u'ㅍ', u'ㅎ'
+)
 
 JAMO = CHO + JOONG + JONG[1:]
 
 NUM_CHO = 19
 NUM_JOONG = 21
 NUM_JONG = 28
 
-FIRST_HANGUL_UNICODE = 0xAC00 #'가'
-LAST_HANGUL_UNICODE = 0xD7A3 #'힣'
+FIRST_HANGUL_UNICODE = 0xAC00  # '가'
+LAST_HANGUL_UNICODE = 0xD7A3  # '힣'
diff --git a/hgtk/exception.py b/hgtk/exception.py
@@ -8,8 +8,10 @@
 class NotHangulException(Exception):
     pass
 
+
 class NotLetterException(Exception):
     pass
 
+
 class NotWordException(Exception):
     pass
diff --git a/hgtk/josa.py b/hgtk/josa.py
@@ -8,19 +8,22 @@
 from . import letter
 
 ################################################################################
-# Josa functions
+# Josa Type Parameters
 ################################################################################
 
+EUN_NEUN = {'not': u'은', 'has': '는', 'except':  None}
+I_GA = {'not': u'이', 'has': '가', 'except':  None}
+EUL_REUL = {'not': u'을', 'has': '를', 'except':  None}
+GWA_WA = {'not': u'과', 'has': '와', 'except':  None}
+IDA_DA = {'not': u'이다', 'has': '다', 'except':  None}
+
+EURO_RO = {'not':  u'으로', 'has': u'로', 'except': u'ㄹ'}
+RYUL_YUL = {'not':  u'률', 'has': u'율', 'except': u'ㄴ'}
 
-class Josa:
-    EUN_NEUN = { 'not':u'은', 'has':'는', 'except': None }
-    I_GA = { 'not':u'이', 'has':'가', 'except': None }
-    EUL_REUL = { 'not':u'을', 'has':'를', 'except': None }
-    GWA_WA = { 'not':u'과', 'has':'와', 'except': None }
-    IDA_DA = { 'not':u'이다', 'has':'다', 'except': None }
 
-    EURO_RO = { 'not': u'으로', 'has':u'로', 'except':u'ㄹ' }
-    RYUL_YUL = { 'not': u'률', 'has':u'율', 'except':u'ㄴ' }
+################################################################################
+# Josa functions
+################################################################################
 
 
 def attach(word, josa=Josa.EUN_NEUN):

diff --git a/hgtk/letter.py b/hgtk/letter.py
@@ -4,14 +4,15 @@
 from __future__ import division
 
 from .const import CHO, JOONG, JONG, FIRST_HANGUL_UNICODE, NUM_CHO, NUM_JOONG, NUM_JONG
-from .exception import NotHangulException
+from .exception import NotHangulException, NotLetterException
 
 from six import unichr
 
 ################################################################################
 # Decomposition & Combination
 ################################################################################
 
+
 def compose(chosung, joongsung, jongsung=u''):
     """This function returns a Hangul letter by composing the specified chosung, joongsung, and jongsung.
     @param chosung
@@ -25,21 +26,23 @@ def compose(chosung, joongsung, jongsung=u''):
         joongsung_index = JOONG.index(joongsung)
         jongsung_index = JONG.index(jongsung)
     except Exception:
-        raise NotHangulException('No valid Hangul character can be generated using given combination of chosung, joongsung, and jongsung.')
+        raise NotHangulException('No valid Hangul character index')
 
     return unichr(0xAC00 + chosung_index * NUM_JOONG * NUM_JONG + joongsung_index * NUM_JONG + jongsung_index)
 
+
 def hangul_index(letter):
     return ord(letter) - FIRST_HANGUL_UNICODE
 
+
 def decompose_index(code):
     jong = int(code % NUM_JONG)
     code /= NUM_JONG
     joong = int(code % NUM_JOONG)
     code /= NUM_JOONG
     cho = int(code)
 
-    return (cho, joong, jong)
+    return cho, joong, jong
 
 
 def decompose(hangul_letter):
@@ -53,14 +56,13 @@ def decompose(hangul_letter):
         raise NotHangulException('')
 
     if hangul_letter in CHO:
-        return (hangul_letter, '', '')
+        return hangul_letter, '', ''
 
     if hangul_letter in JOONG:
-        return ('', hangul_letter, '')
+        return '', hangul_letter, ''
 
     if hangul_letter in JONG:
-        return ('', '', hangul_letter)
-
+        return '', '', hangul_letter
 
     code = hangul_index(hangul_letter)
     cho, joong, jong = decompose_index(code)
@@ -69,8 +71,8 @@ def decompose(hangul_letter):
         cho = 0
 
     try:
-        return (CHO[cho], JOONG[joong], JONG[jong])
+        return CHO[cho], JOONG[joong], JONG[jong]
     except:
-        print ("%d / %d  / %d"%(cho, joong, jong))
-        print ("%s / %s " %( (JOONG[joong].encode("utf8"), JONG[jong].encode('utf8'))))
+        print("%d / %d  / %d"%(cho, joong, jong))
+        print("%s / %s " %( JOONG[joong].encode("utf8"), JONG[jong].encode('utf8')))
         raise Exception()
diff --git a/hgtk/text.py b/hgtk/text.py
@@ -9,15 +9,15 @@
 
 # 코딩 효율과 가독성을 위해서 index대신 unicode사용 by bluedisk
 JONG_COMP = {
-    u'ㄱ':{
+    u'ㄱ': {
         u'ㄱ': u'ㄲ',
         u'ㅅ': u'ㄳ',
     },
-    u'ㄴ':{
+    u'ㄴ': {
         u'ㅈ': u'ㄵ',
         u'ㅎ': u'ㄶ',
     },
-    u'ㄹ':{
+    u'ㄹ': {
         u'ㄱ': u'ㄺ',
         u'ㅁ': u'ㄻ',
         u'ㅂ': u'ㄼ',
@@ -30,10 +30,12 @@
 
 DEFAULT_COMPOSE_CODE = u'ᴥ'
 
+
 ################################################################################
 # Hangul Automata functions by bluedisk@gmail.com
 ################################################################################
 
+
 def decompose(text, latin_filter=True, compose_code=DEFAULT_COMPOSE_CODE):
     result=u""
 
@@ -55,76 +57,83 @@ def decompose(text, latin_filter=True, compose_code=DEFAULT_COMPOSE_CODE):
 
     return result
 
+
+STATUS_CHO = 0
+STATUS_JOONG = 1
+STATUS_JONG1 = 2
+STATUS_JONG2 = 3
+
+
 def compose(text, compose_code=DEFAULT_COMPOSE_CODE):
     res_text = u""
-    status="CHO"
+
+    status = STATUS_CHO
 
     for c in text:
 
-        if status == "CHO":
+        if status == STATUS_CHO:
 
             if c in CHO:
                 chosung = c
-                status="JOONG"
+                status = STATUS_JOONG
             else:
                 if c != compose_code:
 
                     res_text = res_text + c
 
-        elif status == "JOONG":
+        elif status == STATUS_JOONG:
 
             if c != compose_code and c in JOONG:
                 joongsung = c
-                status="JONG1"
+                status = STATUS_JONG1
             else:
                 res_text = res_text + chosung
 
                 if c in CHO:
                     chosung = c
-                    status="JOONG"
+                    status = STATUS_JOONG
                 else:
                     if c != compose_code:
 
                         res_text = res_text + c
-                    status="CHO"
+                    status = STATUS_CHO
 
-        elif status == "JONG1":
+        elif status == STATUS_JONG1:
 
             if c != compose_code and c in JONG:
                 jongsung = c
 
                 if c in JONG_COMP:
-                    status="JONG2"
+                    status = STATUS_JONG2
                 else:
                     res_text = res_text + letter.compose(chosung, joongsung, jongsung)
-                    status="CHO"
+                    status = STATUS_CHO
 
             else:
                 res_text = res_text + letter.compose(chosung, joongsung)
 
                 if c in CHO:
                     chosung = c
-                    status="JOONG"
+                    status = STATUS_JOONG
                 else:
                     if c != compose_code:
 
                         res_text = res_text + c
 
-                    status="CHO"
+                    status = STATUS_CHO
 
-        elif status == "JONG2":
+        elif status == STATUS_JONG2:
 
             if c != compose_code and c in JONG_COMP[jongsung]:
                 jongsung = JONG_COMP[jongsung][c]
-                c = compose_code # 종성 재 출력 방지
+                c = compose_code  # 종성 재 출력 방지
 
             res_text = res_text + letter.compose(chosung, joongsung, jongsung)
 
             if c != compose_code:
 
                 res_text = res_text + c
 
-            status="CHO"
-
+            status = STATUS_CHO
 
-    return res_text
+    return res_text
diff --git a/tests/test_checker.py b/tests/test_checker.py
@@ -5,34 +5,44 @@
 
 import hgtk
 
+
+# hangul test - true/false
 def test_is_hangul_1():
     assert hgtk.checker.is_hangul('한글입니다')
 
+
 def test_is_hangul_2():
-    assert hgtk.checker.is_hangul('no한글입니다') == False
+    assert not hgtk.checker.is_hangul('no한글입니다')
 
 
+# hanja test - true/false
 def test_is_hanja_1():
     assert hgtk.checker.is_hanja('大韓民國')
 
+
 def test_is_hanja_2():
-    assert hgtk.checker.is_hanja('大한민국') == False
+    assert not hgtk.checker.is_hanja('大한민국')
 
 
+# latin test - true/false
 def test_is_latin1_1():
     assert hgtk.checker.is_latin1('abcdefghijklmnopqrstuvwxyz')
 
+
 def test_is_latin1_2():
-    assert hgtk.checker.is_latin1('한글latin1한') == False
+    assert not hgtk.checker.is_latin1('한글latin1한')
 
 
+# batchim test - true/false
 def test_has_batchim_1():
     assert hgtk.checker.has_batchim('한')
 
+
 def test_has_batchim_2():
-    assert hgtk.checker.has_batchim('하') == False
+    assert not hgtk.checker.has_batchim('하')
 
 
+# DEPRECATED! - not a general function
 # def test_has_approximant_1():
 #     assert hgtk.checker.has_approximant('롹')