In [1]:
import os, json, nltk
import numpy as np
import tensorflow as tf

from konlpy.tag import Okt
from tensorflow.keras.initializers import glorot_uniform

In [2]:
if os.path.isfile('data/nsmcData/train_docs.json'):
    with open('data/nsmcData/train_docs.json') as f:
        train_docs = json.load(f)
    with open('data/nsmcData/test_docs.json') as f:
        test_docs = json.load(f)

In [3]:
okt = Okt()
tokens = [t for d in  train_docs for t in d[0]]
text = nltk.Text(tokens, name='NSMC')
selected_words = [f[0] for f in text.vocab().most_common(10000)]

model = tf.keras.models.load_model('data/modelData/model.h5', custom_objects={'GlorotUniform': glorot_uniform()})

In [4]:
def tokenize(doc):
    # norm은 정규화, stem은 근어로 표시
    return ['/'.join(t) for t in okt.pos(doc, norm=True, stem=True)]

def term_frequency(doc):
    return [doc.count(word) for word in selected_words]

In [5]:
def percentage_pos_neg(review):
    token = tokenize(review)
    tf = term_frequency(token)
    data = np.expand_dims(np.asarray(tf).astype('float32'), axis=0)
    score = float(model.predict(data))
    
    if(score > 0.5):
        result = {'result': '1', 'score': round(score*100, 2)}
        return result
    else:
        result = {'result': '0', 'score': round((1-score)*100, 2)}
        return result

In [6]:
data = {'comments': '나빠'}

data['comments']

'나빠'

In [7]:
percentage_pos_neg(data['comments'])

{'result': '0', 'score': 82.03}