In [1]:
import pandas as pd
import numpy as np
import os
import datasets
import torch
import tensorflow as tf
import re
import pickle

from konlpy.tag import Okt
from collections import deque
from tqdm import tqdm
from koalanlp import *
from koalanlp.proc import Parser
from koalanlp.Util import initialize, finalize
from glob import glob
from transformers import TFRobertaModel, RobertaModel, BertTokenizerFast, DataCollatorWithPadding
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras

initialize(KKMA='LATEST')  #: HNN=2.0.4, ETRI=2.0.4


[koalanlp.jip] [INFO] Latest version of kr.bydelta:koalanlp-kkma (2.1.4) will be used.
[root] Java gateway started with port number 51067
[root] Callback server will use port number 25334
[koalanlp.jip] JVM initialization procedure is completed.


In [2]:
original = pd.read_csv("./RawData/train_data.csv").drop("index", axis = "columns")

def text_preprocessing(inputs) :
    inputs = inputs.lower()
    inputs = inputs.replace("·", ',')
    inputs = inputs.replace("．", '.')
    inputs = re.sub("[^가-힣a-z0-9 ,.?!%~:]", ' ', inputs)
    return inputs

original.loc[:, "premise"] = original.premise.apply(lambda x : text_preprocessing(x))
original.loc[:, "hypothesis"] = original.hypothesis.apply(lambda x : text_preprocessing(x))

In [3]:
parser = Parser(API.KKMA)

def make_chunk_pair(parser, sentence, chunk_token = '[WORD]') :
    analyzed = parser(sentence)

    chunk = []
    tag_info = []
    n_words = []
    gov_id = []
    chunk_length = []

    rel_gov_idx = []

    for an in analyzed :
        tmp_chunk = []
        tmp_gov_id = []
        tmp_idx = []



        if len(an.words) > 1 :
            words_a = an.words[:-1]
            words_b = an.words[1:]

            for f, s in zip(words_a, words_b) :
                if (f.governorEdge.depType in ["CMP", "MOD", "AJT"]) and (s.governorEdge.depType in ["CMP", "MOD", "AJT"]) or \
                   (f.governorEdge.type == s.governorEdge.type)  and (f.governorEdge.type != 'X' or s.governorEdge.type != 'X') :
                    tmp_chunk.append(f.surface)
                else :
                    tmp_chunk.append(f.surface)
                    tmp_chunk.append(chunk_token)
                    tmp_gov_id.append(f.governorEdge if f.governorEdge.src else 0)
                    tag_info.append(f.governorEdge.type + '-' + f.governorEdge.depType if f.governorEdge.depType else f.governorEdge.type)

            tmp_chunk.append(s.surface)
            tmp_chunk.append(chunk_token)
            tag_info.append(s.governorEdge.type + '-' + s.governorEdge.depType)
            tmp_gov_id.append(s.governorEdge if s.governorEdge.src else 0)

        else :
            tmp_chunk.append(an.words[0].surface)
            tmp_chunk.append(chunk_token)
            tmp_gov_id.append(0)
            tag_info.append(an.words[0].governorEdge.type + '-' + an.words[0].governorEdge.depType if an.words[0].governorEdge.depType else an.words[0].governorEdge.type)


        tmp_words = np.cumsum(np.array(tmp_chunk) == chunk_token).tolist()
        tmp_words = [tmp_words[i] for i in range(len(tmp_words)) if tmp_chunk[i] != chunk_token]

        chunk_length.append(len(chunk[-1]) if chunk else 0)
        n_words.append(tmp_words)
        chunk.append(tmp_chunk)
        to_get_rel_idx = tmp_gov_id.copy()


        for idx in range(len(to_get_rel_idx)) :
            if to_get_rel_idx[idx] != 0 :
                edge = to_get_rel_idx[idx].src.id
                tmp_gov_id[idx] = edge + tmp_words[edge] + sum(chunk_length)
                tmp_idx.append(tmp_words[edge] - tmp_words[to_get_rel_idx[idx].dest.id])
            else :
                tmp_idx.append(0)
        gov_id.append(tmp_gov_id)
        rel_gov_idx.append(tmp_idx)

    gov_id = [ids for sentence in gov_id for ids in sentence]
    n_words = [n for sentence in n_words for n in sentence]
    chunk = [c for sentence in chunk for c in sentence]
    rel_gov_idx = [g for count in rel_gov_idx for g in count]


        #####추가#####
    tagger = Okt()
    okt = tagger.morphs(sentence)
    copy = chunk.copy()

    ori_list = np.where(np.array(chunk) == chunk_token)[0]
    res_list = ori_list - 1


    chunk_mark = []
    idx_mark = []
    cur_n_word = 0

    index_pair = np.array([[0, 0]])

    for j in range(len(res_list)) :
        temp = []
        cur_n_word -= 1
        index_pair[:, 1] -= 1

        res = np.where(index_pair[:, 1] < 1)

        if res[0].size :
            index_pair = np.delete(index_pair, res, axis = 0)
            index_pair = np.append(index_pair, [j, rel_gov_idx[j]])
            index_pair = index_pair.reshape(-1, 2)

        for i in range(len(okt)) :
            if okt[i] in chunk[res_list[j]] :
                temp.append(1)
            else : temp.append(0)

        if sum(temp) == 0 :
            chunk_mark.append(ori_list[j])
            idx_mark.append(j)
            for idx, n_word in index_pair :
                rel_gov_idx[idx] -= rel_gov_idx[j]

    rel_gov_idx = [rel_gov_idx[i] for i in range(len(rel_gov_idx)) if i not in idx_mark]
    tag_info = [tag_info[i] for i in range(len(tag_info)) if i not in idx_mark]
    chunk = [chunk[i] for i in range(len(chunk)) if i not in chunk_mark]
    
    result = {"chunk" : chunk,
              "chunk_tag" : tag_info,
              "rel_gov_idx" : rel_gov_idx}
    
    return result
    


In [4]:
# konlpy 패키지로 명사 tagging시켜서 명사 사이에 끼어있는 word 토큰 빼기

In [5]:
def error_line_change(text) :
            
    text = re.sub('다입니다', "전부입니다", text)
    text = re.sub('딱인', "딱 맞는", text)
    text = re.sub('딱인', "딱 맞는", text)
    text = re.sub('딱입니다', "딱 입니다", text)
    text = re.sub('어째서인지', "어째선지", text)
    text = re.sub('않는다고', "않는다며", text)
    text = re.sub('그대로고', "그대로 이고", text)
    text = re.sub('그대로이고', "그대로 이고", text)
    text = re.sub('그대로이며', "그대로 이며", text)
    text = re.sub('그대로입니다', "그대로 입니다", text)
    text = re.sub('그럭저럭이네요', "그럭저럭 이네요", text)
    text = re.sub('낮다고', "저조하다고", text)
    text = re.sub('내륙', "내륙부", text)
    text = re.sub('다고미 노부스케', "노부스케", text)
    text = re.sub('혹인', "혹은", text)
    text = re.sub('별로인|별로이나', "별로였던", text)
    text = re.sub('별로고', "별로였고", text)
    text = re.sub('화룡점점', "화룡점정", text)
    text = re.sub('임진왜란', "임진란", text)


    return text
        

In [13]:
# chunked_premise = {}
# premise_err_sentences = []
# for s in tqdm(original.premise.unique()) :
#     try :
#         c = make_chunk_pair(parser, s)
#         chunked_premise[s] = c
#     except :
#         corrected = error_line_change(s)
#         c = make_chunk_pair(parser, corrected)
#         chunked_premise[s] = c
#         print(c)

  0%|          | 31/8387 [00:04<19:27,  7.16it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.bydel

{'chunk': ['인테리어가', '[WORD]', '굉장히', '[WORD]', '귀엽고', '러', '블', '리한', '게,', '[WORD]', '사진에서', '[WORD]', '보이는', '[WORD]', '그대로', '[WORD]', '입니다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'DP-MOD', 'X-UNDEF', 'NP-AJT', 'X-UNDEF', 'X-UNDEF', 'X-UNDEF'], 'rel_gov_idx': [2, 1, 0, 1, 0, 0, 0]}


  4%|▍         | 353/8387 [00:37<10:31, 12.73it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byde

{'chunk': ['숙소도', '사진', '[WORD]', '그대로', '[WORD]', '이고', '[WORD]', '너무너무', '[WORD]', '예쁩니다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'DP-MOD', 'VP-CNJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [2, 2, 2, 1, 0]}


  8%|▊         | 682/8387 [01:07<12:13, 10.50it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byde

{'chunk': ['숙소의', '시설은', '사진에', '[WORD]', '나와', '있는', '그대로', '[WORD]', '입니다.', '[WORD]'], 'chunk_tag': ['NP-AJT', 'X-UNDEF', 'X-UNDEF'], 'rel_gov_idx': [1, 0, 0]}


 13%|█▎        | 1119/8387 [01:48<15:54,  7.61it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['진짜', '연기들도', '[WORD]', '별로였고', '[WORD]', '전체적으로', '[WORD]', '뭔', '가', '상당히', '어색함', '스토리는', '[WORD]', '뻔하고', '[WORD]', '시간', '[WORD]', '아까움', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'VP-CNJ', 'NP-AJT', 'NP-AJT', 'VP-CNJ', 'NP-AJT', 'X-UNDEF'], 'rel_gov_idx': [1, 3, 2, 1, 2, 1, 0]}


 30%|███       | 2529/8387 [03:57<06:52, 14.19it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['시청률이', '[WORD]', '저조하다고', '[WORD]', '그것이', '[WORD]', '드라마의', '[WORD]', '완성도가', '[WORD]', '떨어진다는', '[WORD]', '척도가', '[WORD]', '될', '수', '없다는', '[WORD]', '것을', '[WORD]', '증명한', '[WORD]', '드라마.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'X-UNDEF', 'NP-SBJ', 'DP-MOD', 'NP-SBJ', 'DP-MOD', 'X-UNDEF', 'DP-MOD', 'NP-OBJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 0, 3, 1, 1, 1, 0, 1, 1, 1, 0]}


 54%|█████▍    | 4560/8387 [07:12<09:06,  7.01it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['칠', '던', '은', '바이네', '[WORD]', '스라는', '[WORD]', '스웨덴', '기업가에게', '줄', '방문', '선물을', '[WORD]', '찾는', '[WORD]', '일본', '무역', '대표부', '소속', '고위관료', '노부스케로부터', '온', '[WORD]', '전화를', '[WORD]', '받는다.', '[WORD]'], 'chunk_tag': ['X-UNDEF', 'DP-MOD', 'NP-OBJ', 'DP-MOD', 'DP-MOD', 'NP-OBJ', 'X-UNDEF'], 'rel_gov_idx': [0, 1, 1, 1, 1, 1, 0]}


 57%|█████▋    | 4802/8387 [07:36<06:30,  9.17it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['시설은', '정말', '[WORD]', '훌륭하고', '[WORD]', '혼자', '[WORD]', '지내기', '[WORD]', '딱', '[WORD]', '입니다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'VP-CNJ', 'NP-AJT', 'X-UNDEF', 'X-UNDEF', 'X-UNDEF'], 'rel_gov_idx': [1, 2, 1, 0, 0, 0]}


 60%|██████    | 5050/8387 [08:03<07:20,  7.57it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['방은', '사진에서', '[WORD]', '보는', '[WORD]', '그대로', '[WORD]', '이며', '[WORD]', '깔끔합니다.', '[WORD]'], 'chunk_tag': ['NP-AJT', 'X-UNDEF', 'DP-MOD', 'VP-CNJ', 'X-UNDEF'], 'rel_gov_idx': [1, 0, 2, 1, 0]}


 63%|██████▎   | 5289/8387 [08:26<06:03,  8.53it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['집도', '진짜', '[WORD]', '깔끔하고', '[WORD]', '사진', '[WORD]', '그대로', '[WORD]', '입니다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'X-UNDEF', 'NP-SBJ', 'X-UNDEF', 'X-UNDEF'], 'rel_gov_idx': [1, 0, 2, 0, 0]}


 70%|██████▉   | 5848/8387 [09:18<05:49,  7.27it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['방은', '사진', '[WORD]', '그대로', '[WORD]', '이고', '[WORD]', '호스트는', '매우', '[WORD]', '친절합니다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'DP-MOD', 'VP-CNJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [2, 2, 2, 1, 0]}


 71%|███████   | 5968/8387 [09:32<03:13, 12.49it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['숙소는', '사진', '[WORD]', '그대로', '[WORD]', '이고', '[WORD]', '매우', '[WORD]', '깨끗합니다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'DP-MOD', 'VP-CNJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [2, 2, 2, 1, 0]}


 78%|███████▊  | 6582/8387 [10:25<02:32, 11.83it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['그런데', '[WORD]', '어째', '선지', '펑펑', '울', '수는', '[WORD]', '없어서', '[WORD]', '더', '서러웠던', '[WORD]', '영화.', '[WORD]'], 'chunk_tag': ['VP-CNJ', 'NP-AJT', 'VP-CNJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 1, 1, 0]}


 79%|███████▊  | 6596/8387 [10:26<02:53, 10.35it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['리그', '오브', '레', '전드', '이용자들', '중', '실력이', '[WORD]', '올라도', '[WORD]', '랭크가', '[WORD]', '오르지', '않는다며', '[WORD]', '불만을', '[WORD]', '드러내는', '이용자도', '[WORD]', '많았다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'VP-CNJ', 'NP-SBJ', 'VP-CNJ', 'NP-OBJ', 'NP-AJT', 'X-UNDEF'], 'rel_gov_idx': [1, 2, 1, 2, 1, 1, 0]}


 79%|███████▉  | 6606/8387 [10:27<02:51, 10.37it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['단지', '그것뿐', '추석', '연휴', '오전', '11시', '즈음에', '볼', '마음도', '[WORD]', '없었는데', '[WORD]', '우연히', '보는', '[WORD]', '추석', '특선영화정도가', '[WORD]', '딱', '맞는', '[WORD]', '영화', '[WORD]'], 'chunk_tag': ['NP-AJT', 'VP-CNJ', 'DP-MOD', 'NP-SBJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 1, 1, 1, 0]}


 92%|█████████▏| 7682/8387 [12:11<01:16,  9.23it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['다만', '[WORD]', '해외', '유입', '환자는', '모두', '검역', '또는', '[WORD]', '격리', '과정에서', '[WORD]', '발견되고', '있어', '[WORD]', '지역사회', '2', '차', '이상', '전파로', '이어진', '[WORD]', '사례가', '[WORD]', '없으므로', '[WORD]', '감염', '[WORD]', '전파의', '위험도는', '[WORD]', '저조하다고', '[WORD]', '볼', '수', '[WORD]', '있다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'DP-MOD', 'NP-AJT', 'VP-CNJ', 'DP-MOD', 'NP-SBJ', 'VP-CNJ', 'NP', 'NP-AJT', 'X-UNDEF', 'NP-AJT', 'X-UNDEF'], 'rel_gov_idx': [1, 2, 1, 1, 1, 1, 3, 1, 1, 0, 1, 0]}


 96%|█████████▋| 8087/8387 [12:47<00:28, 10.43it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.byd

{'chunk': ['이', '지역은', '아름다운', '[WORD]', '해변,', '[WORD]', '넓은', '연안의', '[WORD]', '경치,', '[WORD]', '그리고', '[WORD]', '내륙부의', '비옥한', '[WORD]', '토양이', '[WORD]', '특징이다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'X-UNDEF', 'DP-MOD', 'X-UNDEF', 'X-UNDEF', 'DP-MOD', 'NP-SBJ', 'X-UNDEF'], 'rel_gov_idx': [1, 0, 1, 0, 0, 1, 1, 0]}


100%|██████████| 8387/8387 [13:21<00:00, 10.47it/s]


In [14]:
# with open("chunked_premise.pkl", "wb") as premise :
#     pickle.dump(chunked_premise, premise)

In [None]:
# with open("chunked_premise.pkl", "rb") as premise :
#     chunked_premise = pickle.load(premise)
# pd.DataFrame.from_dict(chunked_premise, orient = "index").reset_index()

In [15]:
# chunked_hypo = {}
# hypo_err_sentences = []
# for s in tqdm(original.hypothesis.unique()) :
#     try :
#         c = make_chunk_pair(parser, s)
#         chunked_hypo[s] = c
#     except :
#         corrected = error_line_change(s)
#         c = make_chunk_pair(parser, corrected)
#         chunked_hypo[s] = c
#         print(c)

 22%|██▏       | 5387/24940 [05:56<21:37, 15.07it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.by

{'chunk': ['전체구성은', '[WORD]', '별로였던', '[WORD]', '전개가', '[WORD]', '흥미진진하다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'DP-MOD', 'NP-SBJ', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 1, 0]}


 22%|██▏       | 5561/24940 [06:07<20:13, 15.97it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.by

{'chunk': ['응답', '기업', '중', '59개', '기업은', '여름', '휴가를', '[WORD]', '가지지', '않는다며', '[WORD]', '응답했다.', '[WORD]'], 'chunk_tag': ['NP-OBJ', 'VP-CNJ', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0]}


 23%|██▎       | 5858/24940 [06:27<18:13, 17.45it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.by

{'chunk': ['노부스케는', '[WORD]', '일본', '후생노동성', '소속', '[WORD]', '고위관료이다.', '[WORD]'], 'chunk_tag': ['X-UNDEF', 'NP', 'X-UNDEF'], 'rel_gov_idx': [0, 1, 0]}


 31%|███       | 7761/24940 [08:35<14:04, 20.34it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.by

{'chunk': ['로마의', '물가에', '[WORD]', '비해', '[WORD]', '별로였던', '[WORD]', '숙소입니다.', '[WORD]'], 'chunk_tag': ['NP-AJT', 'X-UNDEF', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 0, 1, 0]}


 33%|███▎      | 8262/24940 [09:08<13:01, 21.33it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.by

{'chunk': ['집', '외부도', '사진', '[WORD]', '그대로', '[WORD]', '입니다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'X-UNDEF', 'X-UNDEF'], 'rel_gov_idx': [2, 0, 0]}


 39%|███▉      | 9807/24940 [10:38<20:59, 12.02it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.by

{'chunk': ['비엔나를', '[WORD]', '여행', '[WORD]', '하기엔', '[WORD]', '별로였던', '[WORD]', '위치입니다.', '[WORD]'], 'chunk_tag': ['X-UNDEF', 'NP', 'X-UNDEF', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [0, 1, 0, 1, 0]}


 40%|████      | 10024/24940 [10:54<14:37, 16.99it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['지역의', '[WORD]', '위치가', '[WORD]', '별로였고', '[WORD]', '주변이', '[WORD]', '재미없었습니다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'NP-SBJ', 'VP-CNJ', 'NP-SBJ', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 2, 1, 0]}


 47%|████▋     | 11686/24940 [12:24<13:15, 16.66it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['임진란', '때', '주민들이', '나라에', '바친', '[WORD]', '건', '소나무', '화살', '[WORD]', '뿐이었다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'NP', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0]}


 47%|████▋     | 11795/24940 [12:29<12:29, 17.55it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['문', '체부는', '객관적', '인과관계가', '[WORD]', '입증되지', '않는다며', '[WORD]', '주장했다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'VP-CNJ', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0]}


 50%|█████     | 12569/24940 [13:11<10:34, 19.51it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['연출도', '[WORD]', '별로였고', '[WORD]', '소재가', '[WORD]', '너무', '[WORD]', '진부하다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'VP-CNJ', 'NP-SBJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 3, 2, 1, 0]}


 51%|█████▏    | 12801/24940 [13:23<10:42, 18.90it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['3인', '[WORD]', '가족', '여행에', '딱', '맞는', '[WORD]', '넓이였습니다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0]}


 54%|█████▎    | 13352/24940 [13:58<11:11, 17.26it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['깨끗한데', '[WORD]', '인테리어가', '[WORD]', '별로였던', '[WORD]', '아파트입니다.', '[WORD]'], 'chunk_tag': ['X-UNDEF', 'NP-SBJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [0, 1, 1, 0]}


 55%|█████▍    | 13636/24940 [14:16<09:39, 19.51it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['대한민국', '기상청은', '태풍이', '한국에', '상륙할', '가능성은', '매우', '[WORD]', '저조하다고', '[WORD]', '밝혔다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'X-UNDEF', 'X-UNDEF'], 'rel_gov_idx': [1, 0, 0]}


 58%|█████▊    | 14343/24940 [14:54<08:34, 20.58it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['노부스케로부터', '온', '[WORD]', '전화를', '[WORD]', '받았지만', '[WORD]', '대답이', '[WORD]', '없었다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'NP-OBJ', 'VP-CNJ', 'NP-SBJ', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 2, 1, 0]}


 59%|█████▉    | 14730/24940 [15:15<08:13, 20.68it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['위치', '[WORD]', '상으로는', '[WORD]', '별로였던', '[WORD]', '숙소입니다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'X-UNDEF', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [2, 0, 1, 0]}


 67%|██████▋   | 16607/24940 [16:54<07:30, 18.52it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['영상미도', '[WORD]', '별로였고', '[WORD]', '스토리', '연결도', '[WORD]', '엉성하다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'VP-CNJ', 'NP-AJT', 'X-UNDEF'], 'rel_gov_idx': [1, 2, 1, 0]}


 70%|███████   | 17467/24940 [17:40<06:22, 19.54it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['시청률이', '[WORD]', '높았지만', '[WORD]', '내용이', '[WORD]', '별로였던', '[WORD]', '드라마.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'X-UNDEF', 'NP-SBJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 0, 1, 1, 0]}


 72%|███████▏  | 18007/24940 [18:09<06:12, 18.63it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['내용이', '[WORD]', '별로였던', '[WORD]', '스릴러.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0]}


 73%|███████▎  | 18289/24940 [18:27<06:05, 18.21it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['모든', '전문가가', '[WORD]', '보고서', '[WORD]', '내', '[WORD]', '연구의', '[WORD]', '신뢰도가', '[WORD]', '저조하다고', '[WORD]', '평가했다.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'VP-CNJ', 'X-UNDEF', 'DP-MOD', 'NP-SBJ', 'X-UNDEF', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0, 1, 1, 0, 0]}


 73%|███████▎  | 18320/24940 [18:29<07:18, 15.10it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['내용', '구성이', '[WORD]', '별로였고', '[WORD]', '작가가', '[WORD]', '한심스러움.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'VP-CNJ', 'NP-SBJ', 'X-UNDEF'], 'rel_gov_idx': [1, 2, 1, 0]}


 76%|███████▋  | 19038/24940 [19:13<06:37, 14.87it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['노부스케는', '[WORD]', '스웨덴', '기업가에게', '줄', '방문', '선물을', '[WORD]', '찾는다.', '[WORD]'], 'chunk_tag': ['VP-CNJ', 'NP-OBJ', 'X-UNDEF'], 'rel_gov_idx': [2, 1, 0]}


 78%|███████▊  | 19411/24940 [19:36<04:49, 19.08it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['숙소의', '위치는', '[WORD]', '훌륭하지만', '[WORD]', '그게', '[WORD]', '전부입니다.', '[WORD]'], 'chunk_tag': ['NP-AJT', 'VP-CNJ', 'X-UNDEF', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0, 0]}


 81%|████████  | 20182/24940 [20:20<04:39, 17.00it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['감염', '[WORD]', '전파의', '[WORD]', '위험도가', '[WORD]', '저조하다고', '[WORD]', '봄에도', '불구하고', '[WORD]', '해외', '유입', '환자들을', '일정', '기간동안', '[WORD]', '격리하고', '[WORD]', '있다.', '[WORD]'], 'chunk_tag': ['NP', 'DP-MOD', 'NP-SBJ', 'X-UNDEF', 'VP-CNJ', 'NP', 'VP-CNJ', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 1, 0, 2, 1, 1, 0]}


 81%|████████  | 20192/24940 [20:21<05:59, 13.20it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['지금', '[WORD]', '생각해', '봐도', '별로였던', '영화였던', '것', '[WORD]', '같다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'NP-AJT', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0]}


 83%|████████▎ | 20761/24940 [20:59<04:40, 14.92it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['근처에는', '[WORD]', '별로였던', '[WORD]', '식당들', '[WORD]', '뿐입니다.', '[WORD]'], 'chunk_tag': ['X-UNDEF', 'DP-MOD', 'NP', 'X-UNDEF'], 'rel_gov_idx': [0, 1, 1, 0]}


 85%|████████▍ | 21138/24940 [21:26<03:08, 20.16it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['입국자들의', '[WORD]', '자가', '격리는', '[WORD]', '고려하지', '않는다며', '[WORD]', '밝혔다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'NP-AJT', 'VP-CNJ', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 1, 0]}


 86%|████████▋ | 21555/24940 [21:50<03:10, 17.80it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['스토리는', '[WORD]', '별로였고', '[WORD]', '연출은', '[WORD]', '그럭저럭', '[WORD]', '이네요.', '[WORD]'], 'chunk_tag': ['NP-SBJ', 'X-UNDEF', 'NP-SBJ', 'X-UNDEF', 'X-UNDEF'], 'rel_gov_idx': [1, 0, 2, 0, 0]}


 89%|████████▉ | 22300/24940 [22:37<02:50, 15.47it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['엔딩이', '화룡', '[WORD]', '점정인', '엄청난', '[WORD]', '명작영화.', '[WORD]'], 'chunk_tag': ['NP', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0]}


 90%|█████████ | 22468/24940 [22:46<02:00, 20.47it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['가격에', '[WORD]', '비해', '[WORD]', '가', '[WORD]', '성비가', '[WORD]', '별로였던', '못생긴', '[WORD]', '집입니다.', '[WORD]'], 'chunk_tag': ['NP-AJT', 'VP-CNJ', 'X-UNDEF', 'NP-SBJ', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 0, 1, 1, 0]}


 91%|█████████▏| 22777/24940 [23:03<02:01, 17.85it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['더빙', '[WORD]', '배우의', '[WORD]', '목소리가', '[WORD]', '너무', '[WORD]', '낮아서', '[WORD]', '별로였던', '[WORD]', '듯.', '[WORD]'], 'chunk_tag': ['NP', 'DP-MOD', 'NP-SBJ', 'DP-MOD', 'X-UNDEF', 'DP-MOD', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 2, 1, 0, 1, 0]}


 96%|█████████▌| 23949/24940 [24:11<00:57, 17.23it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['미래', '한국당은', '[WORD]', '바뀌지', '않는다며', '[WORD]', '결정', '[WORD]', '났다.', '[WORD]'], 'chunk_tag': ['NP-AJT', 'VP-CNJ', 'NP-AJT', 'X-UNDEF'], 'rel_gov_idx': [1, 2, 1, 0]}


 96%|█████████▌| 23995/24940 [24:13<00:50, 18.71it/s][root] Java에서 처리하던 중에 문제가 발생했습니다. 문제가 계속된다면, Issue를 등록해주세요.
Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\koalanlp\proc.py", line 228, in analyze
    result += py_list(self.__api.analyze(string(paragraph)), item_converter=Sentence.fromJava)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o1255.analyze.
: java.lang.IllegalStateException: There are more than two properties belong to the same key: DEP_GOVERNOR
	at kr.bydelta.koala.data.CanHaveProperty.setProperty$koalanlp_core(data.kt:116)
	at kr.bydelta.koala.data.DepEdge.<init>(data.kt:680)
	at kr.b

{'chunk': ['리그', '오브', '레', '전드', '이용자들', '중에는', '자신의', '[WORD]', '실력이', '랭크에', '[WORD]', '반영되지', '않는다며', '[WORD]', '불만을', '[WORD]', '드러내는', '이용자도', '[WORD]', '많았다.', '[WORD]'], 'chunk_tag': ['DP-MOD', 'NP-AJT', 'VP-CNJ', 'NP-OBJ', 'NP-AJT', 'X-UNDEF'], 'rel_gov_idx': [1, 1, 2, 1, 1, 0]}


100%|██████████| 24940/24940 [25:07<00:00, 16.54it/s]


In [16]:
# with open("chunked_hypo.pkl", "wb") as hypo :
#     pickle.dump(chunked_hypo, hypo)
# pd.DataFrame.from_dict(chunked_hypo, orient = "index").reset_index()

Unnamed: 0,index,chunk,chunk_tag,rel_gov_idx
0,씨름의 여자들의 놀이이다.,"[씨름의, 여자들의, [WORD], 놀이, [WORD], 이다., [WORD]]","[DP-MOD, NP, X-UNDEF]","[1, 1, 0]"
1,자작극을 벌인 이는 3명이다.,"[자작극을, [WORD], 벌인, [WORD], 이는, [WORD], 3명이다., ...","[NP-OBJ, DP-MOD, NP-SBJ, X-UNDEF]","[1, 1, 1, 0]"
2,예측적 범죄예방 시스템 구축하고 고도화하는 것은 목적이 있기 때문이다.,"[예측적, 범죄, 예방, 시스템, [WORD], 구축하고, [WORD], 고도화하는...","[NP, VP-CNJ, NP-MOD, X-UNDEF]","[1, 1, 1, 0]"
3,원주민들은 종합대책에 만족했다.,"[원주민들은, 종합대책에, [WORD], 만족했다., [WORD]]","[NP-AJT, X-UNDEF]","[1, 0]"
4,이런 상황에서 책임 있는 모습을 보여주는 기업은 아주 드물다.,"[이런, 상황에서, 책임, 있는, [WORD], 모습을, [WORD], 보여주는, ...","[DP-MOD, NP-OBJ, DP-MOD, X-UNDEF]","[1, 1, 1, 0]"
...,...,...,...,...
24935,오라토리오에서 테스토의 역할이 가장 중요하다.,"[오라, 토리, 오에서, 테스토의, [WORD], 역할이, [WORD], 가장, [...","[DP-MOD, NP-SBJ, DP-MOD, X-UNDEF]","[1, 2, 1, 0]"
24936,지하철역까지 도보로 5분 정도 걸립니다.,"[지하철, 역까, 지, 도보로, 5분, 정도, [WORD], 걸립니다., [WORD]]","[NP-AJT, X-UNDEF]","[1, 0]"
24937,중악방역대책본부는 집단 감염과 관련한 모든 정보를 비공개했다.,"[중악, [WORD], 방역대책본부는, 집단, 감염과, 관련한, [WORD], 모든...","[X-UNDEF, DP-MOD, NP-OBJ, X-UNDEF]","[0, 1, 1, 0]"
24938,시청자들은 마미손의 정체를 안다.,"[시청자들은, 마미, [WORD], 손의, [WORD], 정체를, [WORD], 안...","[NP, DP-MOD, NP-OBJ, X-UNDEF]","[1, 1, 1, 0]"


In [6]:
with open("chunked_premise.pkl", "rb") as premise :
    chunked_premise = pickle.load(premise)
    
with open("chunked_hypo.pkl", "rb") as hypo :
    chunked_hypo = pickle.load(hypo)
    
chunk_data = pd.concat([original, pd.json_normalize(original.premise.map(chunked_premise)).add_prefix("premise_")], axis = 1)
chunk_data = pd.concat([chunk_data, pd.json_normalize(original.hypothesis.map(chunked_hypo)).add_prefix("hypo_")], axis = 1)
chunk_data.loc[:, "rel_gov_idx"] = chunk_data["premise_rel_gov_idx"] + chunk_data["hypo_rel_gov_idx"]
chunk_data.loc[:, "tag_info"] = chunk_data["premise_chunk_tag"] + chunk_data["hypo_chunk_tag"]

In [7]:
tag_info = pd.DataFrame(chunk_data.tag_info.tolist()).fillna("padding")
tag_list = []
for i in tag_info :
    tag_list.append(tag_info[i].unique())
tag_list = np.unique([i for j in tag_list for i in j])

tag_labelr = LabelEncoder()
tag_labelr.fit(tag_list)

LabelEncoder()

In [8]:
# for i in tqdm(range(len(chunk_data))) :
#     try :
#         pre_idx = np.array(chunk_data.premise_gov_idx[i]) - np.where(np.array(chunk_data.premise_chunk[i]) == '[WORD]')[0]
#         pre_idx[np.array(chunk_data.premise_gov_idx[i]) == 0] = 0
        
#         hyp_idx = np.array(chunk_data.hypo_gov_idx[i]) - np.where(np.array(chunk_data.hypo_chunk[i]) == '[WORD]')[0]
#         hyp_idx[np.array(chunk_data.hypo_gov_idx[i]) == 0] = 0
        
#         if any(pre_idx[np.array(chunk_data.premise_gov_idx[i]) != 0] < 0) or any(hyp_idx[np.array(chunk_data.hypo_gov_idx[i]) != 0] < 0) :
#             print(len(parser(chunk_data.loc[i, "premise"])))
#             break
        
#     except :
#         print(i)

In [9]:
chunk_data.loc[:, "premise_chunk"] = chunk_data.premise_chunk.apply(lambda x : ' '.join(x))
chunk_data.loc[:, "hypo_chunk"] = chunk_data.hypo_chunk.apply(lambda x : ' '.join(x))

In [10]:
labelr = LabelEncoder()
chunk_data.loc[:, "label"] = labelr.fit_transform(chunk_data.label)

In [11]:
model_name = "klue/roberta-base"
tokenizer = BertTokenizerFast.from_pretrained(model_name,
                                              strip_accents = False,
                                              lowercase = False)
special_tokens_dict = {'additional_special_tokens': ['[WORD]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

In [12]:
premise_max_len = chunk_data.premise_rel_gov_idx.apply(len).max()
hypo_max_len = chunk_data.hypo_rel_gov_idx.apply(len).max()
idx_max_len = chunk_data.rel_gov_idx.apply(len).max()

In [13]:
batch_size = 32
collator = DataCollatorWithPadding(tokenizer, return_tensors = "tf")

def tokenizing(inputs, prem_max_len, hypo_max_len, training) :
    model_inputs = tokenizer(inputs['premise_chunk'], inputs["hypo_chunk"])
    
    tokenized = model_inputs.input_ids

    indices = []
    
    for t in tokenized :
        t = np.array(t)
        indices.append(np.where((t != 0) & (t != 2) & (t != 32000))[0])
    
    chunk_index = []
    
    for idx in indices :
        temp = [1]
        for i in range(len(idx) - 1) :
            if idx[i] + 1 != idx[i + 1] :
                temp.append(idx[i])
                temp.append(idx[i + 1])
        temp.append(idx[-1])
        temp = temp + [-1 for i in range(idx_max_len * 2 - len(temp))]
        temp  = np.array(temp).reshape(-1, 2).tolist()
        chunk_index.append(temp)
    
    model_inputs["chunk_index"] = chunk_index
    
    prem_gov_idx = []
    prem_tag_info = []
    
    for i in range(len(inputs["premise_rel_gov_idx"])) :
        idx = inputs["premise_rel_gov_idx"][i]
        tag = inputs["premise_chunk_tag"][i]
        
        prem_gov_idx.append(idx + [-1 for idx in range(prem_max_len - len(idx))])
        temp = tag + ["padding" for tag in range(prem_max_len - len(tag))]
        prem_tag_info.append(tag_labelr.transform(temp))

    model_inputs["prem_gov_idx"] = prem_gov_idx
    model_inputs["prem_tag"] = prem_tag_info
    
    
    hypo_gov_idx = []
    hypo_tag_info = []
    
    for i in range(len(inputs["hypo_rel_gov_idx"])) :
        idx = inputs["hypo_rel_gov_idx"][i]
        tag = inputs["hypo_chunk_tag"][i]
        
        hypo_gov_idx.append(idx + [-1 for idx in range(hypo_max_len - len(idx))])
        temp = tag + ["padding" for tag in range(hypo_max_len - len(tag))]
        hypo_tag_info.append(tag_labelr.transform(temp))

    model_inputs["hypo_gov_idx"] = hypo_gov_idx
    model_inputs["hypo_tag"] = hypo_tag_info
    
    if training :
        model_inputs["labels"] = inputs["label"]
        

    return model_inputs

def get_dataset(inputs, collator, batch_size, idx_max_len, training) :
    inputs = datasets.Dataset.from_pandas(inputs)
    tokenized_inputs = inputs.map(tokenizing,
                                  batched = True,
                                  fn_kwargs = {"training" : training,
                                               "prem_max_len" : premise_max_len,
                                               "hypo_max_len" : hypo_max_len})

    columns = ["input_ids", "attention_mask", "token_type_ids", "chunk_index", "prem_gov_idx", "hypo_gov_idx", "prem_tag", "hypo_tag"]
    
    if training :
        inputs_data = tokenized_inputs.to_tf_dataset(
            batch_size = batch_size,
            columns = columns,
            shuffle = True,
            collate_fn = collator,
            label_cols = "labels",
            drop_remainder = False
        )
    else :
        inputs_data = tokenized_inputs.to_tf_dataset(
            batch_size = batch_size,
            columns = columns,
            shuffle = True,
            collate_fn = collator,
            drop_remainder = False
        )
        
    return inputs_data

In [14]:
batch_size = 2

train_data = get_dataset(chunk_data, collator, batch_size, idx_max_len, True)
train_data = train_data.map(lambda x, y : ({k : tf.cast(v, tf.int32) for k, v in x.items()}, tf.cast(y, tf.int32)),
                            num_parallel_calls = tf.data.experimental.AUTOTUNE).prefetch(tf.data.experimental.AUTOTUNE)

  0%|          | 0/25 [00:00<?, ?ba/s]

In [15]:
roberta = TFRobertaModel.from_pretrained("klue/roberta-base", from_pt = True)
roberta.resize_token_embeddings(len(tokenizer))

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['lm_head.decoder.bias', 'roberta.embeddings.position_ids', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaModel were not initialized from the PyTorch model and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream 

<transformers.models.roberta.modeling_tf_roberta.TFRobertaEmbeddings at 0x1ee32e67c48>

In [16]:
def chunk_mean(premise_len, hypo_len, x, chunk_idx, sep_idx) :
        premise = tf.TensorArray(tf.float32, size = 0, dynamic_size = True)
        hypothesis = tf.TensorArray(tf.float32, size = 0, dynamic_size = True)
        for batch in range(len(chunk_idx)) :
            temp_premise = tf.TensorArray(tf.float32, size = 0, dynamic_size = True)
            temp_hypothesis = tf.TensorArray(tf.float32, size = 0, dynamic_size = True)
            for idx in chunk_idx[batch] :
                if tf.reduce_sum(idx) > 0 :
                    chunks = x[batch, idx[0] : idx[1], :] if idx[0] != idx[1] else tf.expand_dims(x[batch, idx[0], :], axis = 0)
                    if sep_idx[batch, 1] > tf.cast(idx[1], tf.int64) :
                        temp_premise = temp_premise.write(temp_premise.size(), tf.reduce_mean(chunks, axis = 0))
                    else :
                        temp_hypothesis = temp_hypothesis.write(temp_hypothesis.size(), tf.reduce_mean(chunks, axis = 0))
                        
            curr_len = temp_premise.size()
            for i in range(premise_len - curr_len) :
                temp_premise = temp_premise.write(temp_premise.size(), tf.zeros_like(x[batch, 0, :]))

            curr_len = temp_hypothesis.size()
            for i in range(hypo_len - curr_len) :
                temp_hypothesis = temp_hypothesis.write(temp_hypothesis.size(), tf.zeros_like(x[batch, 0, :]))

            premise = premise.write(premise.size(), temp_premise.stack())
            hypothesis = hypothesis.write(hypothesis.size(), temp_hypothesis.stack())
        premise = premise.stack()
        hypothesis = hypothesis.stack()
        return premise, hypothesis

In [17]:
class SyntaxStructureEmbedding(keras.layers.Layer) :
    def __init__(self, input_dim, **kwargs) :
        super(SyntaxStructureEmbedding, self).__init__()
        self.embedder = keras.layers.Embedding(input_dim = input_dim, output_dim = 768)
        
    def call(self, x) :
        return self.embedder(x)

In [18]:
def syntax_struct_info_vec(w, gov_idx, tag) :
    t = tf.TensorArray(tf.float32, size = 0, dynamic_size = True)
    for batch in range(len(gov_idx)) :
        temp = tf.TensorArray(tf.float32, size = 0, dynamic_size = True)
        for idx in range(len(gov_idx[batch])) :
            if gov_idx[batch][idx] >= 0 :
                temp = temp.write(temp.size(), w[batch][idx] + w[batch][gov_idx[batch][idx] + idx] + tag[batch][idx])                    
            else :
                pad = tf.zeros_like(w[batch][idx])
                temp = temp.write(temp.size(), pad)
        temp = temp.stack()
        t = t.write(t.size(), temp)
    t = t.stack()
    return t

In [19]:
class InfoConnectingLayer(keras.layers.Layer) :
    def __init__(self, input_shape, rnn_size, **kwargs) :
        super(InfoConnectingLayer, self).__init__()
        
        self.bias_shape = input_shape
        self.u = keras.layers.Dense(self.bias_shape, activation = None, use_bias = False)
        self.v_w = keras.layers.Dense(768, activation = None, use_bias = False)
        self.v_t = keras.layers.Dense(768, activation = None, use_bias = False)
        
        self.bi_lstm = keras.layers.Bidirectional(keras.layers.LSTM(rnn_size))
    
    def build(self, input_shape) :
        self.bias = self.add_weight("bias",
                                    shape = (self.bias_shape, 768))
    
    def call(self, w, t) :
        t = self.u(t)
        term_1 = tf.matmul(t, w)
        term_2 = self.v_w(w)
        term_3 = self.v_t(t)
        
        b = term_1 + term_2 + term_3 + self.bias
        
        return self.bi_lstm(b)

In [20]:
class ClassifierLayer(keras.layers.Layer) :
    def __init__(self, rnn_size, n_class, **kwargs) :
        super(ClassifierLayer, self).__init__()
        self.u = keras.layers.Dense(rnn_size, activation = None, use_bias = False)
        self.v_prem = keras.layers.Dense(rnn_size, activation = None, use_bias = False)
        self.v_hypo = keras.layers.Dense(rnn_size, activation = None, use_bias = False)
        self.outputs = keras.layers.Dense(n_class, activation = "softmax", use_bias = False)
        
    def call(self, h_prem, h_hypo) :
        term_1 = self.u(tf.expand_dims(h_prem, 2))
        term_1 = tf.matmul(term_1, tf.expand_dims(h_hypo, 2))
        term_1 = keras.layers.Flatten()(term_1)
        term_2 = self.v_prem(h_prem)
        term_3 = self.v_hypo(h_hypo)
        l = term_1 + term_2 + term_3
        return self.outputs(l)

In [21]:
class SyntaxRoBERTa(keras.models.Model) :
    def __init__(self, backbone, rnn_size, prem_max_len, hypo_max_len, **kargs) :
        super(SyntaxRoBERTa, self).__init__()
        
        self.backbone = backbone
        self.prem_tag_embedder = SyntaxStructureEmbedding(prem_max_len)
        self.hypo_tag_embedder = SyntaxStructureEmbedding(hypo_max_len)
        self.premise_info = InfoConnectingLayer(prem_max_len, rnn_size)
        self.hypo_info = InfoConnectingLayer(hypo_max_len, rnn_size)
        self.classifier = ClassifierLayer(rnn_size * 2, 3)
        
        self.prem_len = prem_max_len
        self.hypo_len = hypo_max_len
        
    def call(self, x) :
        backbone_input = {k : v for k, v in x.items() if k in ["input_ids", "token_type_ids", "attention_mask"]}
        chunk_idx = x["chunk_index"]
        prem_gov_idx = x["prem_gov_idx"]
        hypo_gov_idx = x["hypo_gov_idx"]
        prem_tag = x["prem_tag"]
        hypo_tag = x["hypo_tag"]
        
        prem_tag_emb = self.prem_tag_embedder(prem_tag)
        hypo_tag_emb = self.hypo_tag_embedder(hypo_tag)
        
        seq_emb = self.backbone(backbone_input).last_hidden_state
        p, h = chunk_mean(self.prem_len, self.hypo_len, seq_emb, chunk_idx, tf.where(x["input_ids"] == 2)[0::2])

        p_t = syntax_struct_info_vec(p, prem_gov_idx, prem_tag_emb)
        h_t = syntax_struct_info_vec(h, hypo_gov_idx, hypo_tag_emb)
        
        p_i = self.premise_info(p, p_t)
        h_i = self.hypo_info(h, h_t)
        res = self.classifier(p_i, h_i)
        
        return res

In [21]:
model = SyntaxRoBERTa(roberta, 64, premise_max_len, hypo_max_len)
loss_fn = keras.losses.sparse_categorical_crossentropy
epochs = 5

optimizer = keras.optimizers.Adam(learning_rate = 1e-5)

total_loss = []
total_acc = []

for epoch in range(epochs) :
        
    cum_loss = deque(maxlen = 20)
    cum_acc = deque(maxlen = 20)
    
    batch_loss = []
    batch_acc = []
    
    with tqdm(train_data, unit = "batch") as tepoch :
        for step, (x, y) in enumerate(tepoch) :
            tepoch.set_description(f"Epoch {epoch}")
            with tf.GradientTape() as t :
                y_hat = model(x)
                loss = loss_fn(y, y_hat)
            dz_dx = t.gradient(loss, model.trainable_weights,
                               unconnected_gradients = tf.UnconnectedGradients.ZERO)
            optimizer.apply_gradients(zip(dz_dx, model.trainable_weights))
            
            curr_loss = float(tf.reduce_mean(loss))
            curr_acc = float(keras.metrics.categorical_accuracy(y, tf.argmax(y_hat, axis = 1)))
            
            batch_loss.append(curr_loss)
            batch_acc.append(curr_acc)
            
            cum_loss.append(curr_loss)
            cum_acc.append(curr_acc)
            
            tepoch.set_postfix(loss = sum(cum_loss) / len(cum_loss),
                               accuracy = sum(cum_acc) / len(cum_acc))      
    total_loss.append(batch_loss)
    total_acc.append(batch_acc)

Epoch 0:  17%|█▋        | 2167/12499 [1:52:06<8:54:32,  3.10s/batch, accuracy=0.75, loss=0.667] 
[root] Internal Python error in the inspect module.
Below is the traceback from this internal error.

[root] 
Unfortunately, your original traceback can not be constructed.



Traceback (most recent call last):
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\jmp08\AppData\Local\Temp/ipykernel_25548/795805295.py", line 25, in <module>
    unconnected_gradients = tf.UnconnectedGradients.ZERO)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\tensorflow\python\eager\backprop.py", line 1090, in gradient
    unconnected_gradients=unconnected_gradients)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\tensorflow\python\eager\imperative_grad.py", line 77, in imperative_grad
    compat.as_str(unconnected_gradients.value))
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\tensorflow\python\eager\backprop.py", line 159, in _gradient_function
    return grad_fn(mock_op, *out_grads)
  File "C:\Users\jmp08\anaconda3\envs\practice\lib\site-packages\tensorflow\python\ops\math_grad.py"


KeyboardInterrupt

