In [9]:
# 필요한 패키지 불러오기
import os
from datetime import datetime
import tensorflow.compat.v1 as tf
import numpy as np
import json
from sklearn.model_selection import train_test_split

In [10]:
# 데이터 파일 불러오기
DATA_IN_PATH = './data_in/'
DATA_OUT_PATH = './data_out/'
INPUT_TRAIN_DATA = 'nsmc_train_input.npy'
LABEL_TRAIN_DATA = 'nsmc_train_label.npy'
DATA_CONFIGS = 'data_configs.json'

input_data = np.load(open(DATA_IN_PATH + INPUT_TRAIN_DATA, 'rb'))
label_data = np.load(open(DATA_IN_PATH + LABEL_TRAIN_DATA, 'rb'))
prepro_configs = json.load(open(DATA_IN_PATH + DATA_CONFIGS, 'r'))

In [11]:
# 필요한 변수 선언
TEST_SPLIT = 0.1
RNG_SEED = 13371447
# 하이퍼파라미터정의
VOCAB_SIZE = prepro_configs['vocab_size']+1
EMB_SIZE = 128
BATCH_SIZE = 16
NUM_EPOCHS = 1

# 학습 데이터랑 평가 데이터로 나누기
input_train, input_eval, label_train, label_eval = train_test_split(input_data, label_data, test_size=TEST_SPLIT, random_state=RNG_SEED)

In [12]:
# 매핑 함수
def mapping_fn(X, Y):
    input, label = {'x': X}, Y
    return input, label

# 학습 데이터 입력 함수
def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((input_train, label_train))
    dataset = dataset.shuffle(buffer_size=len(input_train))
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.map(mapping_fn)
    dataset = dataset.repeat(count=NUM_EPOCHS)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

# 평가 데이터 입력 함수
def eval_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((input_eval, label_eval))
    dataset = dataset.shuffle(buffer_size=len(input_eval))
    dataset = dataset.batch(16)
    dataset = dataset.map(mapping_fn)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [13]:
# 모델 함수
def model_fn(features, labels, mode, params):
    TRAIN = mode == tf.estimator.ModeKeys.TRAIN
    EVAL = mode == tf.estimator.ModeKeys.EVAL
    PREDICT = mode == tf.estimator.ModeKeys.PREDICT
    
    #임베딩
    embedding_layer = tf.keras.layers.Embedding(
                    VOCAB_SIZE,
                    EMB_SIZE)(features['x'])

    dropout_emb = tf.keras.layers.Dropout(rate = 0.2)(embedding_layer)
    # 합성곱
    conv = tf.keras.layers.Conv1D(
           filters=32,
           kernel_size=3,
           padding='same',
           activation=tf.nn.relu)(dropout_emb)
    # 맥스풀링
    pool = tf.keras.layers.GlobalMaxPool1D()(conv)
    # 은닉층
    hidden = tf.keras.layers.Dense(units=250, activation=tf.nn.relu)(pool)   


    dropout_hidden = tf.keras.layers.Dropout(rate=0.2)(hidden, training = TRAIN)
    logits = tf.keras.layers.Dense(units=1)(dropout_hidden)

    if labels is not None:
        labels = tf.reshape(labels, [-1, 1])
    # 학습인 경우    
    if TRAIN:
        global_step = tf.train.get_global_step()
        loss = tf.losses.sigmoid_cross_entropy(labels, logits)
        train_op = tf.train.AdamOptimizer(0.001).minimize(loss, global_step)

        return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss = loss)
    # 평가인 경우
    elif EVAL:
        loss = tf.losses.sigmoid_cross_entropy(labels, logits)
        pred = tf.nn.sigmoid(logits)
        accuracy = tf.metrics.accuracy(labels, tf.round(pred))
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops={'acc': accuracy})
    # 예측인 경우
    elif PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions={
                'prob': tf.nn.sigmoid(logits),
            }
        )

In [14]:
# 에스티메이터 객체 생성

est = tf.estimator.Estimator(model_fn, model_dir="data_out/checkpoint/cnn_model")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'data_out/checkpoint/cnn_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [None]:
# 경과 시간
time_start = datetime.utcnow()
print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
print(".......................................") 

est.train(train_input_fn)

time_end = datetime.utcnow()
print(".......................................")
print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
print("")
time_elapsed = time_end - time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))

Experiment started at 09:36:55
.......................................
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into data_out/checkpoint/cnn_model\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 0.6956238, step = 0
INFO:tensorflow:global_step/sec: 17.6362
INFO:tensorflow:loss = 0.65124094, step = 100 (5.672 sec)
INFO:tensorflow:global_step/sec: 18.6155
INFO:tensorflow:loss 

INFO:tensorflow:loss = 0.3401653, step = 7300 (5.456 sec)
INFO:tensorflow:global_step/sec: 18.3724
INFO:tensorflow:loss = 0.29424724, step = 7400 (5.443 sec)
INFO:tensorflow:global_step/sec: 18.5429
INFO:tensorflow:loss = 0.46598658, step = 7500 (5.392 sec)
INFO:tensorflow:global_step/sec: 18.413
INFO:tensorflow:loss = 0.5998529, step = 7600 (5.431 sec)
INFO:tensorflow:global_step/sec: 18.4174
INFO:tensorflow:loss = 0.278996, step = 7700 (5.431 sec)
INFO:tensorflow:global_step/sec: 18.4045
INFO:tensorflow:loss = 0.2053481, step = 7800 (5.433 sec)
INFO:tensorflow:global_step/sec: 18.3606
INFO:tensorflow:loss = 0.2901947, step = 7900 (5.445 sec)
INFO:tensorflow:global_step/sec: 18.2687
INFO:tensorflow:loss = 0.18309131, step = 8000 (5.475 sec)
INFO:tensorflow:global_step/sec: 18.413
INFO:tensorflow:loss = 0.449611, step = 8100 (5.431 sec)
INFO:tensorflow:global_step/sec: 18.3811
INFO:tensorflow:loss = 0.62885356, step = 8200 (5.440 sec)
INFO:tensorflow:global_step/sec: 18.2764
INFO:tenso

In [None]:
# 검증데이터 입력
valid = est.evaluate(eval_input_fn)

In [None]:
# 모델 평가
INPUT_TEST_DATA = 'nsmc_test_input.npy'
LABEL_TEST_DATA = 'nsmc_test_label.npy'

test_input_data = np.load(open(DATA_IN_PATH + INPUT_TEST_DATA, 'rb'))
test_label_data = np.load(open(DATA_IN_PATH + LABEL_TEST_DATA, 'rb'))

In [None]:
# 평
def test_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((test_input_data, test_label_data))
    dataset = dataset.batch(16)
    dataset = dataset.map(mapping_fn)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [None]:
predict = est.evaluate(test_input_fn) 