In [32]:
import os
import numpy as np
import pandas as pd
import json

from meerkat import Oracle
from keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.models import load_model

In [2]:
def transfer_prediction(df) :

    def group_by_pattern(pattern, text, key) :
        result = ""
        pattern = str(pattern)
        for p_char, t_char in zip(pattern, text):
            if p_char == str(key):
                result += t_char
        return result

    df['FLUID-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 1), axis=1)
    df['SERIAL NO-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 2), axis=1)
    df['SIZE-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 3), axis=1)
    df['SPEC-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 4), axis=1)
    df['INSULATION-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 5), axis=1)
    df['TRACING-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 6), axis=1)

    return df

In [3]:
def post_process(df) :
    df['FLUID-predict'] = ""
    df['SERIAL NO-predict'] = ""
    df['SIZE-predict'] = ""
    df['SPEC-predict'] = ""
    df['INSULATION-predict'] = ""
    df['TRACING-predict'] = ""

    def group_by_pattern(pattern, text, key):
        result = ""
        key_flag = False  # 키 값을 찾기 위한 플래그
        for p_char, t_char in zip(pattern, text):
            if p_char == str(key):
                key_flag = True  # 키 값 시작
                result += t_char
            elif p_char == '0' and t_char == '-' and key_flag:
                # 키 값 사이에 위치한 '0'이고, 텍스트 값이 '-'일 경우에만 추가
                result += t_char
            elif p_char != str(key) and key_flag:
                # 키 값이 아닌 다른 값이 나오면, 키 값 사이의 처리가 끝난 것으로 간주하고 반복문 종료
                break

        if result.startswith("(") and ")" not in result:
            result = result[1:]  # 맨 앞의 '(' 제거
        if result.endswith(")") and "(" not in result:
            result = result[:-1]  # 맨 뒤의 ')' 제거
        if "(" in result and ")" not in result:
            result += ")"  # 중간에 '('가 있고 맨 뒤에 ')'가 없는 경우, 맨 뒤에 ')' 추가
        if result.startswith("-"):
            result = result[1:]  # 맨 앞의 '-' 제거
        if result.endswith("-"):
            result = result[:-1]  # 맨 뒤의 '-' 제거
        if result.startswith("/"):
            result = result[1:]  # 맨 앞의 '/' 제거
        if result.endswith("/"):
            result = result[:-1]  # 맨 뒤의 '/' 제거
        if result.endswith("\"\""):
            result = result[:-1]  # 맨 뒤의 쌍따옴표 중 하나 제거

        return result.strip()  # 최종 결과에서 양쪽 공백 제거

    df['FLUID-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 1), axis=1)
    df['SERIAL NO-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 2), axis=1)
    df['SIZE-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 3), axis=1)
    df['SPEC-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 4), axis=1)
    df['INSULATION-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 5), axis=1)
    df['TRACING-predict'] = df.apply(lambda row : group_by_pattern(row['prediction'], row['TAG NO'], 6), axis=1)

    return df

In [19]:
oracle = Oracle()

# 개별 모델
model_name = 'shaman_meerkat_5.h5'
tokenizer_name = "magic wand_5.json"

df = oracle.validation(model_name, tokenizer_name, os.getcwd(), "test set", df_return=True, validation_state=False)
df = transfer_prediction(df)
result2 = post_process(df)



In [24]:
result2.head(50)

Unnamed: 0,TAG NO,prediction,FLUID-predict,SERIAL NO-predict,SIZE-predict,SPEC-predict,INSULATION-predict,TRACING-predict
0,"2""-SM-8550-A1(0.45T/H)",3301102222044450000000,SM,8550,"2""",A1(),0,
1,"6""-P-730529-AK2K-11/2 ""ST",3301022222204444066666666,P,730529,"6""",AK2K,,"11/2 ""ST"
2,"20""-RTO-U018-304S-3T",33301110222204444066,RTO,U018,"20""",304S,,3T
3,"2""-RTO-U008-304S-3T",3301110222204444066,RTO,U008,"2""",304S,,3T
4,"20""-RTO-U025-304S-3T",33301110222204444066,RTO,U025,"20""",304S,,3T
5,FA-60000-DIA600(SUS304)-4t,11022222033333304444444066,FA,60000,DIA600,SUS304),,4t
6,TG-60000-□1100(SUS304)-4t,1102222203333304444444066,TG,60000,□1100,SUS304),,4t
7,"20""-RTO-U011-304S-4t",33301110222204444066,RTO,U011,"20""",304S,,4t
8,"10""-RTO-U008-304S-4T",33301110222204444066,RTO,U008,"10""",304S,,4T
9,SW-23011-1-LG-A,110222320004405,SW,230,1,LG,A,


In [36]:
text = "CWS-23104A-2-AK1-H5"
text = "2-IA-5213-B1A-T30"
model = load_model(model_name, compile=False)
with open(tokenizer_name) as f:
    data = json.load(f)
    tokenizer = tokenizer_from_json(data)

result3 = oracle.do_predict(text, model, tokenizer, batch=False)
result3



['301102222044406660000000000000000000']