**모델, 토크나이저 불러오기**

In [1]:
import json
from keras_crf import CRFModel
from tensorflow.keras import Model
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 모델을 저장하고 불러오기 위해 정의
class CustomCRFModel(CRFModel):
    def __init__(self, model, units):
        super(CustomCRFModel, self).__init__(model, units)
        self.model = model
        self.units = units

    def get_config(self):
        config = {
            'model': self.model.get_config(),
            'units': self.units
        }
        return config

    @classmethod
    def from_config(cls, config):
        model = Model.from_config(config['model'])
        units = config['units']
        return cls(model, units)

custom_objects = {
    'CustomCRFModel': CustomCRFModel
}

# 저장된 모델 불러오기
model = load_model('bilstm_crf.h5', custom_objects=custom_objects, compile=False)
model.compile(optimizer=Adam(0.001), metrics='accuracy')

# 저장된 토크나이저 불러오기
with open('sentence_tokenizer_json.json', 'r', encoding='utf-8') as f:
    sentence_tokenizer_json = json.load(f)
    sentence_tokenizer = tokenizer_from_json(sentence_tokenizer_json)

with open('tag_tokenizer_json.json', 'r', encoding='utf-8') as f:
    tag_tokenizer_json = json.load(f)
    tag_tokenizer = tokenizer_from_json(tag_tokenizer_json)


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.10.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [None]:
model.summary()

In [9]:
# text = '태안군의회 김기두 의장이 지난해 거둔 성과를 바탕으로 올 한해 군민 중심의 의정활동을 펼치겠다는 힘찬 포부를 밝혔다.'
import re
from collections import defaultdict
text = "올 가을 출시 예정인 애플의 아이폰16 시리즈 모든 기종에 A18 칩이 탑재될 것으로 보인다."

def predict(text):
    text = re.sub('[^ㄱ-ㅣ가-힣0-9a-zA-Z.]+', " ", text)
    text = text.split(" ")
    print(text)

    index_to_tag = tag_tokenizer.index_word

    X = sentence_tokenizer.texts_to_sequences([text])

    max_len = 70
    X = pad_sequences(X, padding='post', maxlen=max_len)

    y_predicted = model.predict(X)[0]

    tag = defaultdict(list)

    for i, pred in enumerate(y_predicted[0]):
        if pred not in [0, 1]: # PAD값은 제외
            print(f"{text[i]:17} {index_to_tag[pred]}")
            tag[index_to_tag[pred].split('-')[1]].append(text[i])
    
    return tag
    
predict(text)

['올', '가을', '출시', '예정인', '애플의', '아이폰16', '시리즈', '모든', '기종에', 'A18', '칩이', '탑재될', '것으로', '보인다.']
단어                 예측값
-----------------------------------
올                 B-DT
가을                I-DT
defaultdict(<class 'list'>, {'DT': ['올', '가을']})


- ps(person) = 이름
- fd(study_field) = 학문 분야
- tr(theory) = 이론, 법칙, 기법
- af(artifacts) = 인공물, 상품명
- ogg(organization) = 기관, 기업, 단체
- lc(location) = 지역, 자연물, 랜드마크
- cv(civilization) = 의식주, 문화
- dt(date) = 날짜
- ti(time) = 시간
- qt(quantity) =숫자 관련 개체명
- ev(event) = 행사/축제, 사건/사고
- am(animal) = 동물, 신체부위
- pt(plant) = 식물 관련
- mt(material) = 원소, 화학물, 금속/암석
- tm(term) = 그 외 기타