In [6]:
__all__ = ["split_syllable_char", "split_syllables",
           "join_jamos", "join_jamos_char",
           "CHAR_INITIALS", "CHAR_MEDIALS", "CHAR_FINALS"]

import itertools

INITIAL = 0x001
MEDIAL = 0x010
FINAL = 0x100
CHAR_LISTS = {
    INITIAL: list(map(chr, [
        0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139,
        0x3141, 0x3142, 0x3143, 0x3145, 0x3146, 0x3147,
        0x3148, 0x3149, 0x314a, 0x314b, 0x314c, 0x314d,
        0x314e
    ])),
    MEDIAL: list(map(chr, [
        0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154,
        0x3155, 0x3156, 0x3157, 0x3158, 0x3159, 0x315a,
        0x315b, 0x315c, 0x315d, 0x315e, 0x315f, 0x3160,
        0x3161, 0x3162, 0x3163
    ])),
    FINAL: list(map(chr, [
        0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136,
        0x3137, 0x3139, 0x313a, 0x313b, 0x313c, 0x313d,
        0x313e, 0x313f, 0x3140, 0x3141, 0x3142, 0x3144,
        0x3145, 0x3146, 0x3147, 0x3148, 0x314a, 0x314b,
        0x314c, 0x314d, 0x314e
    ]))
}
CHAR_INITIALS = CHAR_LISTS[INITIAL]
CHAR_MEDIALS = CHAR_LISTS[MEDIAL]
CHAR_FINALS = CHAR_LISTS[FINAL]
CHAR_SETS = {k: set(v) for k, v in CHAR_LISTS.items()}
CHARSET = set(itertools.chain(*CHAR_SETS.values()))
CHAR_INDICES = {k: {c: i for i, c in enumerate(v)}
                for k, v in CHAR_LISTS.items()}


def is_hangul_syllable(c):
    return 0xac00 <= ord(c) <= 0xd7a3  # Hangul Syllables


def is_hangul_jamo(c):
    return 0x1100 <= ord(c) <= 0x11ff  # Hangul Jamo


def is_hangul_compat_jamo(c):
    return 0x3130 <= ord(c) <= 0x318f  # Hangul Compatibility Jamo


def is_hangul_jamo_exta(c):
    return 0xa960 <= ord(c) <= 0xa97f  # Hangul Jamo Extended-A


def is_hangul_jamo_extb(c):
    return 0xd7b0 <= ord(c) <= 0xd7ff  # Hangul Jamo Extended-B


def is_hangul(c):
    return (is_hangul_syllable(c) or
            is_hangul_jamo(c) or
            is_hangul_compat_jamo(c) or
            is_hangul_jamo_exta(c) or
            is_hangul_jamo_extb(c))


def is_supported_hangul(c):
    return is_hangul_syllable(c) or is_hangul_compat_jamo(c)


def check_hangul(c, jamo_only=False):
    if not ((jamo_only or is_hangul_compat_jamo(c)) or is_supported_hangul(c)):
        raise ValueError(f"'{c}' is not a supported hangul character. "
                         f"'Hangul Syllables' (0xac00 ~ 0xd7a3) and "
                         f"'Hangul Compatibility Jamos' (0x3130 ~ 0x318f) are "
                         f"supported at the moment.")


def get_jamo_type(c):
    check_hangul(c)
    assert is_hangul_compat_jamo(c), f"not a jamo: {ord(c):x}"
    return sum(t for t, s in CHAR_SETS.items() if c in s)


def split_syllable_char(c):
    """
    Splits a given korean syllable into its components. Each component is
    represented by Unicode in 'Hangul Compatibility Jamo' range.

    Arguments:
        c: A Korean character.

    Returns:
        A triple (initial, medial, final) of Hangul Compatibility Jamos.
        If no jamo corresponds to a position, `None` is returned there.

    Example:
        >>> split_syllable_char("안")
        ("ㅇ", "ㅏ", "ㄴ")
        >>> split_syllable_char("고")
        ("ㄱ", "ㅗ", None)
        >>> split_syllable_char("ㅗ")
        (None, "ㅗ", None)
        >>> split_syllable_char("ㅇ")
        ("ㅇ", None, None)
    """
    check_hangul(c)
    if len(c) != 1:
        raise ValueError("Input string must have exactly one character.")

    init, med, final = None, None, None
    if is_hangul_syllable(c):
        offset = ord(c) - 0xac00
        x = (offset - offset % 28) // 28
        init, med, final = x // 21, x % 21, offset % 28
        if not final:
            final = None
        else:
            final -= 1
    else:
        pos = get_jamo_type(c)
        if pos & INITIAL == INITIAL:
            pos = INITIAL
        elif pos & MEDIAL == MEDIAL:
            pos = MEDIAL
        elif pos & FINAL == FINAL:
            pos = FINAL
        idx = CHAR_INDICES[pos][c]
        if pos == INITIAL:
            init = idx
        elif pos == MEDIAL:
            med = idx
        else:
            final = idx
    return tuple(CHAR_LISTS[pos][idx] if idx is not None else None
                 for pos, idx in
                 zip([INITIAL, MEDIAL, FINAL], [init, med, final]))


def split_syllables(s, ignore_err=True, pad= '_'):
    """
    Performs syllable-split on a string.

    Arguments:
        s (str): A string (possibly mixed with non-Hangul characters).
        ignore_err (bool): If set False, it ensures that all characters in
            the string are Hangul-splittable and throws a ValueError otherwise.
            (default: True)
        pad (str): Pad empty jamo positions (initial, medial, or final) with
            `pad` character. This is useful for cases where fixed-length
            strings are needed. (default: None)

    Returns:
        Hangul-split string

    Example:
        >>> split_syllables("안녕하세요")
        "ㅇㅏㄴㄴㅕㅇㅎㅏㅅㅔㅇㅛ"
        >>> split_syllables("안녕하세요~~", ignore_err=False)
        ValueError: encountered an unsupported character: ~ (0x7e)
        >>> split_syllables("안녕하세요ㅛ", pad="x")
        'ㅇㅏㄴㄴㅕㅇㅎㅏxㅅㅔxㅇㅛxxㅛx'
    """

    def try_split(c):
        try:
            return split_syllable_char(c)
        except ValueError:
            if ignore_err:
                return (c,)
            raise ValueError(f"encountered an unsupported character: "
                             f"{c} (0x{ord(c):x})")

    s = map(try_split, s)
    if pad is not None:
        tuples = map(lambda x: tuple(pad if y is None else y for y in x), s)
    else:
        tuples = map(lambda x: filter(None, x), s)
    return "".join(itertools.chain(*tuples))


def join_jamos_char(init, med, final=None):
    """
    Combines jamos into a single syllable.

    Arguments:
        init (str): Initial jao.
        med (str): Medial jamo.
        final (str): Final jamo. If not supplied, the final syllable is made
            without the final. (default: None)

    Returns:
        A Korean syllable.
    """
    chars = (init, med, final)
    for c in filter(None, chars):
        check_hangul(c, jamo_only=True)

    idx = tuple(CHAR_INDICES[pos][c] if c is not None else c
                for pos, c in zip((INITIAL, MEDIAL, FINAL), chars))
    init_idx, med_idx, final_idx = idx
    # final index must be shifted once as
    # final index with 0 points to syllables without final
    final_idx = 0 if final_idx is None else final_idx + 1
    return chr(0xac00 + 28 * 21 * init_idx + 28 * med_idx + final_idx)


def join_jamos(s, ignore_err=True):
    """
    Combines a sequence of jamos to produce a sequence of syllables.

    Arguments:
        s (str): A string (possible mixed with non-jamo characters).
        ignore_err (bool): If set False, it will ensure that all characters
            will be consumed for the making of syllables. It will throw a
            ValueError when it fails to do so. (default: True)

    Returns:
        A string

    Example:
        >>> join_jamos("ㅇㅏㄴㄴㅕㅇㅎㅏㅅㅔㅇㅛ")
        "안녕하세요"
        >>> join_jamos("ㅇㅏㄴㄴㄴㅕㅇㅎㅏㅅㅔㅇㅛ")
        "안ㄴ녕하세요"
        >>> join_jamos()
    """
    last_t = 0
    queue = []
    new_string = ""

    def flush(n=0):
        new_queue = []
        while len(queue) > n:
            new_queue.append(queue.pop())
        if len(new_queue) == 1:
            if not ignore_err:
                raise ValueError(f"invalid jamo character: {new_queue[0]}")
            result = new_queue[0]
        elif len(new_queue) >= 2:
            try:
                result = join_jamos_char(*new_queue)
            except (ValueError, KeyError):
                # Invalid jamo combination
                if not ignore_err:
                    raise ValueError(f"invalid jamo characters: {new_queue}")
                result = "".join(new_queue)
        else:
            result = None
        return result

    for c in s:
        if c == '_':  #jeonghui add
            continue  #jeonghui add
        if c not in CHARSET:
            if queue:
                new_c = flush() + c
            else:
                new_c = c
            last_t = 0
        else:
            t = get_jamo_type(c)
            new_c = None
            if t & FINAL == FINAL:
                if not (last_t == MEDIAL):
                    new_c = flush()
            elif t == INITIAL:
                new_c = flush()
            elif t == MEDIAL:
                if last_t & INITIAL == INITIAL:
                    new_c = flush(1)
                else:
                    new_c = flush()
            last_t = t
            queue.insert(0, c)
        if new_c:
            new_string += new_c
    if queue:
        new_string += flush()
    return new_string



In [7]:
from khaiii import KhaiiiApi
from collections import deque
api = KhaiiiApi()

da_low= ['다', 'ㄴ다', '는다', '느냐다', '단다', '서다', '란다', 'ㄴ단다'] 
da_case1 = ['더다', '느냐다', '서다', '어서다', '세다', '는지다', '냐다', '라다', '드다', 'ㄹ지다', 'ㄴ가다', '매다', '대다', '가다', '어다', 'ㅂ세다', 'ㄴ지다', '거다', 'ㄹ다', '네다', '어라다', 'ㄹ트다', '뫼다', '는가다', '을지다', '두다', '구다', '아야다', '조다', '아서다', '오다', '기다']
da_case2 = ['ㄴ다', '단다', '란다', 'ㄴ단다', '더란다', '넌다', '냐이다', '건다', '으란다', '서란다', '나다', '는단다', '인다', '렌다', '은다', '련다', '차다', 'ㄴ거다', '프다', '올게다']
da_case3 = ['는다', '잖다', '뵀다', '쫒는다', '더랬다', 'ㅂ다']
da_tag_case1 = ['SL', 'SW', 'SS', 'NNP', 'NNG']
da_tag_case2 = ['NNB', 'XSV', 'XSA', 'ETN', 'VCN', 'JKB', 'VCP', 'VV', 'VX', 'EP', 'VA', 'EC']

yoo_low = ['을까', 'ㄹ까', '지', '으니까', '어', '는데', 'ㄴ가', 'ㄹ게', '래', '잖아', '네', '여', 'ㄹ래', '게', '나', '군', '을게', '다고', '지만', '은데', 'ㄴ지', '는지', '라고', 'ㄴ데', '는걸', '아서', '거든', '더라고', '어서', '려고', '을래', '대', 'ㄴ대', '던데', '더군', '여서', '건가', '어야지', 'ㄹ걸', 'ㄹ께', '아야지', '다면서', '는군', 'ㄴ다고', '은가', '다니', '다면', '실까', '가', '신가', '서', '까', '다니까', '데', '다던데', '다는데', '랄까', '거나', '라던데', '던가']

an_low = ['아', '야', '어', '지', '자', '을까', 'ㄹ까', '어라']

#아로 끝나는 친구들
ah_low = ['하', '좋', '오', '같', '나오', '많', '앉', '막', '팔', '편하', '다녀오', '나', '만나', '보', '알', '바라', '말', '가', '귀찮', '들어가', '바르', '다르', '않', '아프', '내려가', '돌아오', '놀', '지나가', '받', '나타나', '나쁘', '남', '모르', '맞', '못하', '밝', '타', '들어오', '찾아오', '괜찮', '닫', '놓', '살', '춰', '다보', '지내', '그리워하', '높', '잡', '가보', '힘내', '내', '찾아보', '가져오', '사', '달', '내보', '라', '꽂', '돌아가', '프', '나가', '여받', '밀어내', '빛나', '생각나', '찾아가', '깨', '걷어오', '걷어가', '맡', '자', '빠르', '닿', '약하', '참', '끝나', '올라가', '찾', '일어나', '깨어나', '골', '잦', '보내', '싸', '닦', '똑같', '따라오', '볶', '날아가', '낫', '갚', '떠나', '빨', '돌', '알맞', '작', '옳', '깎', '낳', '떠나가', '낮', '보내보', '고르', '심하', '살아가', '비싸', '고프', '담', '뛰어오', '녹', '멀잖', '따르', '알아보', '사오', '주말', '급하', '사보', '뛰어보', '모으', '가져가', '내려오', '해오', '잖', '자르', '좁', '삼', '마시잖', '형편없잖', '짧', '올라오', '둘러보', '시하', '돌아보', '도망가', '나아가', '뒤따라오', '두르러보', '달려가', '바라보', '돕오', '구르러가',  '흘러나오', '삶', '다가오', '살펴보', '깨닫', '빠져나오', '내려보', '상하']


def tohigh(input):

    result = []

    analyzed = api.analyze(input)  
    for data in analyzed:
        lis_word = []
        lis_tag = []
        for morph in data.morphs:
            lis_word.append(morph.lex)
            lis_tag.append(morph.tag)
#         print(lis_word)
#         print(lis_tag)
#         print("====================")
        try:
            lis_tag.index('EF')
            result.append(to_high(data.lex,lis_word,lis_tag))
        except:
            if len(lis_tag) == 2 and lis_tag[0] == "NP" and ("JK" in lis_tag[1] or "JX" == lis_tag[1]):
                if lis_word[0] == "나":
                    lis_word[0] = "저"
                    if lis_word[1] == "ㄴ":
                        result.append("전")
                    elif lis_word[1] == "ㄹ":
                        result.append("절")
                    else:
                        result.append(''.join(lis_word))
                elif lis_word[0] == "너":
                    print(lis_word[1])
                    lis_word[0] = "당신"
                    if lis_word[1] == "ㄴ":
                        result.append("당신은")
                    elif lis_word[1] == "ㄹ":
                        result.append("당신을")
                    elif lis_word[1] == "랑":
                        result.append("당신이랑")
                    else:
                        if lis_word[1] == "는":
                            lis_word[1] = "은"
                        elif lis_word[1] == "와":
                            lis_word[1] == "과"
                        elif lis_word[1] == "가":
                            lis_word[1] == "이"
                        elif lis_word[1] == "를":
                            lis_word[1] == "을"
                        result.append(''.join(lis_word))
                elif data.lex == "내가":
                    result.append("제가")
                elif data.lex == "네가":
                    result.append("당신이")
                else:
                    result.append(data.lex)
            else:
                result.append(data.lex)
        
    return ' '.join(result)
        

    
    
def to_high(result, lis_word, lis_tag):
#     print(lis_word)
#     print(lis_tag)
    EF_indexs = [pos for pos, char in enumerate(lis_tag) if char == "EF"]
    EF_index = EF_indexs[-1]
    EF = lis_word[EF_index]
    EF_next = lis_word[EF_index+1]
    EF_front = lis_word[EF_index-1]
    if is_hangul(EF_next):
        return result
    
    #다로 끝나는 친구들
    if EF in da_case1:
        result = result.replace('다'+EF_next, '입니다'+EF_next)
    elif EF in da_case2:
        if EF == 'ㄴ다':
            result = join_jamos(split_syllables(result).replace('ㄴㄷㅏ_'+EF_next, 'ㅂㄴㅣ_ㄷㅏ_' + EF_next))
        else:
            jamo = split_syllables(EF[:-1])[-1]
            result = join_jamos(split_syllables(result).replace(jamo + 'ㄷㅏ_' + EF_next, 'ㅂㄴㅣ_ㄷㅏ_' + EF_next))
    elif EF in da_case3:
        if '는다' in EF:
            result = result.replace('는다'+EF_next, '습니다'+EF_next)
        else:
            result = result.replace('다'+EF_next, '습니다'+EF_next)
    elif EF == '다':
        if lis_tag[EF_index-1] in da_tag_case1:
            result = result.replace('다'+EF_next, '입니다'+EF_next)
        elif lis_tag[EF_index-1] in da_tag_case2:
            jamo = split_syllables(EF_front)[-1]
#             print(jamo)
            #받침없는 경우(ㄹ,ㄴ포함)
            if jamo == '_' or jamo == 'ㄹ' or jamo == 'ㄴ' :
                if (lis_tag[EF_index-1] == 'VCP' and lis_word[EF_index-1] == '이') and lis_word[EF_index-1] != result[result.rfind('다'+EF_next)-1]:
                    result = result.replace('다'+EF_next, '입니다'+EF_next)
                else:
                    result = join_jamos(split_syllables(result).replace(jamo+'ㄷㅏ_'+EF_next, 'ㅂㄴㅣ_ㄷㅏ_' + EF_next))
            #받침있는 경우
            else:
                result = result.replace('다'+EF_next, '습니다'+EF_next)
    #요로 끝나는 친구들 
    elif EF in yoo_low:
        if len(EF) == 1:
            if lis_word[-2] == EF and (lis_word[-2] == '어' or lis_word[-2] == "여") and lis_word[-2] != result[-2]:
                result = result.replace(EF_next, '요'+EF_next)
            else:
                result = result.replace(EF+EF_next, EF+'요'+EF_next)
            
        else:
            result = join_jamos(split_syllables(result).replace(split_syllables(EF[1:])+EF_next, split_syllables(EF[1:]+'요')+EF_next))
    #아
    elif EF == '아':
        if EF_front in ah_low:
            result = result.replace(EF_next, '요'+EF_next)
    #구나
    elif '구나' in EF:
        result = result.replace(EF+EF_next, '네요'+EF_next)
    #야
    elif EF == '야':
        result = result.replace(EF+EF_next, '에요'+EF_next)
    #니 냐 느냐 는가
    elif EF == '니' or EF == '냐' or EF == '느냐' or EF == '는가':
        result = result.replace(EF+EF_next, '나요'+EF_next)
    #자
    elif EF == '자':
        result = result.replace(EF+EF_next, '지요'+EF_next)
    #라
    elif EF == '라':
        jamo = split_syllables(EF_front)[-1]
        if lis_tag[EF_index-1] == 'VX' or 'XSV':
            if jamo == '_':
                result = result.replace(EF+EF_next, '세요'+EF_next)
            elif EF_front[-1] =='달':
                result = result.replace(EF_front[-1]+EF+EF_next,'주세요'+EF_next)
            else:
                result = result.replace(EF+EF_next, EF+'요'+EF_next)
    #아라, 어라
    elif EF == '아라' or EF == '어라':
        jamo = split_syllables(EF_front)[-1]
        check = result.find(EF_front+EF+EF_next)
        if check != -1:
            if jamo == 'ㄹ':
                result = result.replace(EF_front+EF+EF_next, join_jamos(split_syllables(EF_front)[:-1])+'세요'+EF_next)
            elif jamo == '_':
                result = result.replace(EF_front+EF+EF_next, EF_front+'세요'+EF_next)
            else:
                result = result.replace(EF_front+EF+EF_next, EF_front+'으세요'+EF_next)
        else:
            ra_index = result.find('라'+EF_next)
            if result[ra_index-1:ra_index+1]=='라라':
                result = result[:ra_index-len(EF_front)-1] + join_jamos(split_syllables(EF_front)[:-1])+'르세요' + result[ra_index+1:]
            elif jamo == 'ㄹ':
                result = result[:ra_index-len(EF_front)] + join_jamos(split_syllables(EF_front)[:-1])+'세요' + result[ra_index+1:]
            elif jamo == '_':
                result = result[:ra_index-len(EF_front)] + EF_front + '세요' + result[ra_index+1:]
            else:
                result = result[:ra_index-len(EF_front)] + EF_front + '으세요' + result[ra_index+1:]
    elif EF == '더라' or EF == "다더라":
        result = result.replace(EF+EF_next, EF+'고요'+EF_next)
    elif EF == "잖니":
        result = result.replace(EF+EF_next, EF[0]+'아요'+EF_next)
    elif EF == "야지":
        result = result.replace(EF+EF_next, EF[0]+'죠'+EF_next)
    elif EF == "자아":
        result = result.replace(EF_front + EF[0] + EF_next, '지냅시다'+EF_next)
    elif EF == "이다":
        result = result.replace(EF+EF_next, "입니다" + EF_next)

        

    return result

In [14]:
txt = "나는 네가 얘기했던 모든 게 기억나요."
print(Changer(txt))

#니까
#라니
#으리라
#이다
#ㄴ다


['나', '는']
['NP', 'JX']
['네', '가']
['NP', 'JKS']
['얘기', '하', '였', '던']
['NNG', 'XSV', 'EP', 'ETM']
['모든']
['MM']
['것', '이']
['NNB', 'JKS']
['기억나', '요', '.']
['VV', 'EF', 'SF']
['기억나', '요', '.']
['VV', 'EF', 'SF']
나는 네가 얘기했던 모든 게 기억나요.


In [11]:
kr = open('/Users/jeonghui/Desktop/aihub.kr.shuf', "r" , encoding = "utf-8")
khaiii = open('/Users/jeonghui/Desktop/aihub.khaiii.shuf', "w" , encoding = "utf-8")

text=kr.readline()

while(text != ''):
    check = 0
    if text[-1]=='\n':
        text = text.replace('\n','').strip()
    if is_hangul(text[-1]):
        text = text+"."
        check = 1
    text = Changer(text)
    if check == 1:
        text = text[:-1]
    khaiii.write(text+"\n")
    text=kr.readline()
    
khaiii.close()
kr.close()

In [12]:
kr = open('/Users/jeonghui/Desktop/hgu_clean.kr.shuf', "r" , encoding = "utf-8")
khaiii = open('/Users/jeonghui/Desktop/hgu_clean.khaiii.shuf', "w" , encoding = "utf-8")

text=kr.readline()

while(text != ''):
    check = 0
    if text[-1]=='\n':
        text = text.replace('\n','').strip()
    if is_hangul(text[-1]):
        text = text+"."
        check = 1
    text = Changer(text)
    if check == 1:
        text = text[:-1]
    khaiii.write(text+"\n")
    text=kr.readline()
    
khaiii.close()
kr.close()

In [126]:
khaiii = open('/Users/jeonghui/Desktop/aihub.khaiii.shuf', "r" , encoding = "utf-8")
mecab = open('/Users/jeonghui/Desktop/aihub.mecab.shuf', "r" , encoding = "utf-8")
diff = open('/Users/jeonghui/Desktop/두개비교', "w" , encoding = "utf-8")

text1=khaiii.readline()
text2=mecab.readline()

high = ['니다', '니까', '요', '시다', '도다', '리다', '야']


while(text1 != ''):
    check = 0
    text1_split = text1.split()
    text2_split = text2.split()
#     if text1_split[-1] != text2_split[-1] and len(text1) <= len(text2):
#         if "니다" in text1_split[-1] or "니까" in text1_split[-1] or "요" in text1_split[-1] or "시다" in text1_split[-1] or "도다" in text1_split[-1] or "리다" in text1_split[-1] or "야" in text1_split[-1]:
#             check = 1
#         if text1 != text2 and check == 0:
    
    if text1 != text2:
#         if "니다" not in text1_split[-1] and "니까" not in text1_split[-1] and "요" not in text1_split[-1] and "시다" not in text1_split[-1] and "도다" not in text1_split[-1] and "리다" not in text1_split[-1] and "야" not in text1_split[-1]:
        diff.write(text1)
        diff.write(text2)
        diff.write('\n')
    
    text1=khaiii.readline()
    text2=mecab.readline()

khaiii.close()
mecab.close()
diff.close()

In [125]:
kr = open('/Users/jeonghui/Desktop/hgu_clean.kr.shuf', "r" , encoding = "utf-8")
khaiii = open('/Users/jeonghui/Desktop/hgu_clean.khaiii.shuf', "w" , encoding = "utf-8")

text=kr.readline()

while(text != ''):
    check = 0
    if text[-1]=='\n':
        text = text.replace('\n','').strip()
    if is_hangul(text[-1]):
        text = text+"."
        check = 1
    text = Changer(text)
    if check == 1:
        text = text[:-1]
    khaiii.write(text+"\n")
    text=kr.readline()
    
khaiii.close()
kr.close()

In [None]:
##니다.
##요
##세요.
#아요, 어요
#선어말 어미 -시-

#clear
#ㅂ니다.


In [9]:
from khaiii import KhaiiiApi
from collections import deque
api = KhaiiiApi()

#ㅂ니다
q_nida2s_da = ["VV", "VX", "XSV", "EP"]
q_nida2da = ["XSA", "VCP", "VA", "VCN"]
#니다에 결합된 친구들
#'ㅂ니다'를 'ㄴ다'로
니다_결합1 = ['답니다', 'ㄴ답니다', '렙니다', '랍니다', '십니다', '읍니다', '더랍니다', '렵니다', '풉니다', '뜹니다', '툽니다', '춥니다', '찹니다', '앱니다', '옵니다', '챕니다', '귑니다', '줍니다', '합니다']
#'ㅂ니다'를 '다'로
니다_결합2 = ['입니다', '섭니다', '겁니다', 'ㄹ겁니다', '읩니다', '아입니다']

yoo_low = ['을까', 'ㄹ까', '지', '으니까', '어', '는데', 'ㄴ가', 'ㄹ게', '래', '잖아', '네', '여', 'ㄹ래', '게', '나', '군', '을게', '다고', '지만', '은데', 'ㄴ지', '는지', '라고', 'ㄴ데', '는걸', '아서', '거든', '더라고', '어서', '려고', '을래', '대', 'ㄴ대', '던데', '더군', '여서', '건가', '어야지', 'ㄹ걸', 'ㄹ께', '아야지', '다면서', '는군', 'ㄴ다고', '은가', '다니', '다면', '실까', '가', '신가', '서', '까', '다니까', '데', '다던데', '다는데', '랄까', '거나', '라던데', '던가']


def Changer(input):

    result = []

    analyzed = api.analyze(input)  
    for data in analyzed:
        lis_word = []
        lis_tag = []
        for morph in data.morphs:
            lis_word.append(morph.lex)
            lis_tag.append(morph.tag)
        print(lis_word)
        print(lis_tag)
        print("====================")
        try:
            lis_tag.index('EF')
            result.append(to_low(data.lex,lis_word,lis_tag))
        except:
            if len(lis_tag) == 2 and lis_tag[0] == "NP" and ("JK" in lis_tag[1] or "JX" == lis_tag[1]):
                if lis_word[0] == "저":
                    lis_word[0] = "나"
                    if lis_word[1] == "ㄴ":
                        result.append("난")
                    elif lis_word[1] == "ㄹ":
                        result.append("날")
                    else:
                        result.append(''.join(lis_word))
                elif lis_word[0] == "당신":
                    lis_word[0] = "너"
                    if lis_word[1] == "은":
                        result.append("너는")
                    elif lis_word[1] == "을":
                        result.append("너를")
                    elif lis_word[1] == "이랑":
                        result.append("너랑")
                    elif lis_word[1] == "이":
                        result.append("너가")
                    else:
                        if lis_word[1] == "과":
                            lis_word[1] == "와"
                        result.append(''.join(lis_word))
                elif data.lex == "제가":
                    result.append("내가")
                elif data.lex == "당신이":
                    result.append("네가")
                else:
                    result.append(data.lex)
            else:
                result.append(data.lex)
        
    return ' '.join(result)
    
def to_low(result, lis_word, lis_tag):
    print(lis_word)
    print(lis_tag)
    EF_indexs = [pos for pos, char in enumerate(lis_tag) if char == "EF"]
    EF_index = EF_indexs[-1]
    EF = lis_word[EF_index]
    EF_next = lis_word[EF_index+1]
    EF_front = lis_word[EF_index-1]
    tag_front = lis_tag[EF_index-1]
    if is_hangul(EF_next):
        return result
    
    #다로 끝나는 친구들
    if EF == "ㅂ니다":
        if tag_front in q_nida2s_da:
            result = join_jamos(split_syllables(result).replace('ㅂㄴㅣ_ㄷㅏ_'+EF_next, 'ㄴㄷㅏ_'+EF_next))
        if tag_front in q_nida2da:
            if tag_front == 'VA' and (EF_front == "하" or EF_front == "나" or EF_front == "지" or EF_front == "원하"):
                result = join_jamos(split_syllables(result).replace('ㅂㄴㅣ_ㄷㅏ_'+EF_next, 'ㄴㄷㅏ_'+EF_next))
            else:
                result = join_jamos(split_syllables(result).replace('ㅂㄴㅣ_ㄷㅏ_'+EF_next, '_ㄷㅏ_'+EF_next))
    elif EF == "습니다":
        result = result.replace(EF + EF_next, "다" + EF_next)
    elif EF == "뭅니다":
        result = join_jamos(split_syllables(result).replace('ㅂㄴㅣ_ㄷㅏ_'+EF_next, 'ㄹㄷㅏ_'+EF_next))
    elif EF in 니다_결합1:
        result = join_jamos(split_syllables(result).replace('ㅂㄴㅣ_ㄷㅏ_'+EF_next, 'ㄴㄷㅏ_'+EF_next))
    elif EF in 니다_결합2:
        result = join_jamos(split_syllables(result).replace('ㅂㄴㅣ_ㄷㅏ_'+EF_next, '_ㄷㅏ_'+EF_next))
    elif EF[:-1] in yoo_low:
        result = result.replace("요" + EF_next, EF_next)
    
    
    
    return result

In [10]:
txt = "오늘 저녁에요?"
print(Changer(txt))

['오늘']
['NNG']
['저녁', '에', '요', '?']
['NNG', 'JKB', 'EF', 'SF']
['저녁', '에', '요', '?']
['NNG', 'JKB', 'EF', 'SF']
오늘 저녁에요?


In [11]:
import copy
import torch
import torch.nn as nn

class ConcatLinear(nn.Module):
    def __init__(self, dim_in, dim_out):
        super(ConcatLinear, self).__init__()
        self._layer = nn.Linear(dim_in + 1, dim_out)

    def forward(self, t, x):
        tt = torch.ones_like(x[:, :1]) * t
        ttx = torch.cat([tt, x], 1)
        return self._layer(ttx)

z_size = 64
input_shape = (z_size,)
hidden_shape = input_shape
print(hidden_shape)
dims = "512-512"
hidden_dims = tuple(map(int, dims.split("-")))
strides = [None] * (len(hidden_dims) + 1)

print("==========")
for dim_out, stride in zip(hidden_dims + (input_shape[0],), strides):
    if stride is None:
        layer_kwargs = {}
    print(hidden_shape[0],dim_out, stride)
    print(ConcatLinear(hidden_shape[0],dim_out,**layer_kwargs))
    
    hidden_shape = list(copy.copy(hidden_shape))
    hidden_shape[0] = dim_out


(64,)
64 512 None
ConcatLinear(
  (_layer): Linear(in_features=65, out_features=512, bias=True)
)
512 512 None
ConcatLinear(
  (_layer): Linear(in_features=513, out_features=512, bias=True)
)
512 64 None
ConcatLinear(
  (_layer): Linear(in_features=513, out_features=64, bias=True)
)


In [12]:
input_size = [1,28,28]
input_size[0]

1

In [13]:
import torch
import torch.nn as nn

class ConcatLinear(nn.Module):
    def __init__(self, dim_in, dim_out):
        super(ConcatLinear, self).__init__()
        self._layer = nn.Linear(dim_in + 1, dim_out)

    def forward(self, t, x):
        tt = torch.ones_like(x[:, :1]) * t
        ttx = torch.cat([tt, x], 1)
        return self._layer(ttx)
    
    
ConcatLinear(hidden_s)

NameError: name 'hidden_s' is not defined