In [13]:
import time
import matplotlib.pyplot as plt
import numpy as np
import math as mt
import seaborn as sns
from tqdm import tqdm
import pandas as pd
from konlpy.tag import Hannanum
from hangul_utils import split_syllables, join_jamos
from tqdm.auto import tqdm
from kiwipiepy import Kiwi
import re
from pykospacing import Spacing
from eunjeon import Mecab
han = Hannanum()
mec = Mecab()

number = 100

# 초성 리스트. 00 ~ 18
CHOSUNG_LIST = ['ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ']
# 중성 리스트. 00 ~ 20
JUNGSUNG_LIST = ['ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ', 'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ']
# 종성 리스트. 00 ~ 27 + 1(1개 없음)
JONGSUNG_LIST = ['_', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ', 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ']

EP = [
    [['ㅈ','ㅓ','_','ㄴ','ㅡ','ㄴ'],['ㄴ','ㅏ','_','ㄴ','ㅡ','ㄴ']],
    [[' ','ㅈ','ㅓ','ㄴ',' '],['ㄴ','ㅏ','_','ㄴ','ㅡ','ㄴ']],
    [['ㅇ','ㅓ','_','ㅇ','ㅛ','_'],['ㄷ','ㅏ','_']]
    ,[['ㅅ','ㅔ'],['']]
]

#종결 어미
EF = [
    [['ㅅ','ㅡ','ㅂ','ㄴ','ㅣ','ㄷ','ㅏ'],['ㄷ','ㅏ']],
    [[' ','ㅈ','ㅓ','ㄴ',' '],['ㄴ','ㅏ','_','ㄴ','ㅡ','ㄴ']],
    [['ㅇ','ㅓ','_','ㅇ','ㅛ','_'],['ㄷ','ㅏ','_']],
    [['ㅂ','ㄴ','ㅣ','ㄷ','ㅏ'],['ㄴ','ㄷ','ㅏ']],
    [['ㅇ','ㅔ','ㅇ','ㅛ'],['ㅇ','ㅑ']],
    [['ㅇ','ㅛ'],['']]
    #,[['ㅅ','ㅔ'],['ㅝ'],['ㅘ']]
]

NP = [
    
    [['ㅈ','ㅓ','ㄴ'],['ㄴ','ㅏ','ㄴ']],
    [['ㅈ','ㅓ'],['ㄴ','ㅏ']]
    
]

# 보조사
JX = [
   [['ㅇ','ㅣ','ㅇ','ㅛ'],['ㅇ','ㅣ','ㅇ','ㅑ']] 
]

VX = [
    
    [['ㅈ','ㅜ'],['ㅈ']]
    
]

VV = [
    
    [['ㅇ','ㅗ'],['ㅇ','ㅗ'],['ㅇ','ㅘ']],
    [['ㅈ','ㅜ'],['ㅈ','ㅜ'],['ㅈ','ㅝ']],
    [['ㅎ','ㅏ'],['ㅎ','ㅏ'],['ㅎ','ㅐ']]
    
]

EXC_word = [
    
    ['ㅅㅔ',2]
    
]

EXC_tags = [
    
    'EP',
    'EF'
    
]

class Jamodealer:
    jamo = []
    pp = ''
    def __init__(self,lis_word):
        
        #print('the jamo starts!')
        self.jamo = []
        for i in lis_word:
            self.jamo.append(split_syllables(i))
        
    def make_one(self):
        self.pp = ''
        for i in self.jamo:
             self.pp= self.pp+i
                
        chars = list(set(self.pp))
        char_to_ix = { ch:i for i,ch in enumerate(chars) }
        ix_to_char = { i:ch for i,ch in enumerate(chars) }
        
        jamo_numbers = [char_to_ix[x] for x in self.pp]
        restored_jamo = ''.join([ix_to_char[x] for x in jamo_numbers])
        restored_text = join_jamos(restored_jamo)
        return restored_text

def tojamo(korean_word):
    r_lst = []
    for w in list(korean_word.strip()):
        ## 영어인 경우 구분해서 작성함. 
        if '가'<=w<='힣':
            ## 588개 마다 초성이 바뀜. 
            ch1 = (ord(w) - ord('가'))//588
            ## 중성은 총 28가지 종류
            ch2 = ((ord(w) - ord('가')) - (588*ch1)) // 28
            ch3 = (ord(w) - ord('가')) - (588*ch1) - 28*ch2
            r_lst.append([CHOSUNG_LIST[ch1], JUNGSUNG_LIST[ch2], JONGSUNG_LIST[ch3]])
        else:
            r_lst.append([w])
    return r_lst

def toword(arr):
    print('wow')
    

def to1dim(input):
    result=[]
    for i in input:
        for j in i:
            result.append(j)
    return result

def to2dim(input):
    result = []
    li = []
    for i in input:
        if i == ' ':
            result.append([' '])
        else:
            li.append(i)
        if len(li)==3:
            result.append(li)
            li = []
    return result

def makeone(input):
    result = ''
    li = ''
    for i in input:
        if i[0]==' ':
            result = result+' '
        else:
            ind = ord('가')
            ind +=CHOSUNG_LIST.index(i[0])*588
            ind +=JUNGSUNG_LIST.index(i[1])*28
            ind +=JONGSUNG_LIST.index(i[2])
            result = result+chr(ind)
    return result
        
def li2str(input):
    st = ""
    for i in input:
        st = st+i
    return st

def str2li(input):
    li = []
    for i in range(len(input)):
        li.append(input[i])
    return li

def makejamodict(input):
    result = []
    for i in input:
        bullet = []
        one = []
        two = []
        gre1 = tojamo(i[0])
        for j in gre1:
            for k in j:
                one.append(k)
        bullet.append(one)
        gre2 = tojamo(i[1])
        for j in gre2:
            for k in j:
                two.append(k)
        bullet.append(two)
        result.append(bullet)
    return result

def makestrdict(input):
    result = []
    for i in input:
        bullet = []
        for j in range(len(i)):
            gre = li2str(i[j])
            bullet.append(gre)
        #gre1 = li2str(i[0])
        #gre2 = li2str(i[1])
        #bullet.append(gre1)
        #bullet.append(gre2)
        result.append(bullet)
    return result

#strlis = makestrdict(lis)

#strlis = makejamodict(lili)
#strlis = makestrdict(strlis)

#추가한 사전에 대한 str 사전을 생성, mapping 시키는 부분

EP_dict = makestrdict(EP)
EF_dict = makestrdict(EF)
NP_dict = makestrdict(NP)
JX_dict = makestrdict(JX)
VX_dict = makestrdict(VX)
VV_dict = makestrdict(VV)

Dict_list=['EP','EF','NP','JX','VX','VV']

Dict_map = [EP_dict, EF_dict,NP_dict,JX_dict,VX_dict, VV_dict]



#여기까지

def to2lists(input):
    lis_word = []
    lis_tag = []
    #data = han.pos(input,ntags=22,flatten=True, join=False)
    data = mec.pos(input)
    for i in data:
        lis_word.append(i[0])
        lis_tag.append(i[1])
    return lis_word, lis_tag
    

    

class Changer(object):
    #def __init__(self):
        #print('the changer starts!')
        
    def high_low(self, stc):
        result = stc
        lis_word, lis_tag = to2lists(result)
        jam = Jamodealer(lis_word)
        lis = []
        for i in range(len(lis_tag)):
            res = jam.jamo[i]
            for k in range(len(Dict_list)):
                if Dict_list[k] in lis_tag[i]:
                    dic = Dict_map[Dict_list.index(Dict_list[k])]
                    #res = jam.jamo[i]
                    for j in range(len(dic)):
                        #print(dic[j])
                        if self.isExcept(dic[j])==1:
                            print('ee')
                            ind = self.indicator(i,jam.jamo,lis_tag,EXC_word, EXC_tags)
                            res = re.sub(dic[j][0],dic[j][ind],res)
                        else:
                            res = re.sub(dic[j][0],dic[j][1],res)
                            
                    #jam.jamo[i] = res
            lis.append(res)
            #print(jam.jamo[i])
        for i in range(len(lis_tag)):
            jam.jamo[i] = lis[i]
            #print(lis[i])
        
        return jam.make_one()
        
    def isExcept(self, input):
        if len(input)>=3:
            return 1
        else:
            return 0
        
    def indicator(self, ind, lis, tag, ex_word, exc_tags):
        re = 1
        for j in range(len(ex_word)):
            if exc_tags[j] in tag[ind+1]:
                for i in range(len(ex_word)):
                    if ex_word[i][0] in lis[ind+1]:
                        #print('tt')
                        print('dd')
                        re = ex_word[i][1]
                        break
                        #else:
                        #print('ㅗㅗ')
                        print(lis[ind-1])
        return re
        
    
    def processText(self,stc):
        result = stc
        res = self.high_low(result)
        spacing = Spacing()
        res = spacing(res)
        return res
    



In [16]:
txt = '이거 하자.'
tx = '저한테 주세요.'
ch = Changer()
tt = ch.processText(txt)
ttt = ch.processText(tx)
print(tt)
print(ttt)

ee
ee
ee
ee
dd
ee
dd
ee
dd
이거 하자.
나한테 줘.
