# CNN 텍스트 유사도 모델

 - 모델에 입력하고자 하는 데이터는 문장 2개다. 문장에 대한 유사도를 보기 위해서는 기준이 되는 문장이 필요하다. 이를 "기준 문장"이라고 한다. 그리고 "기준 문장"에 대해 비교해야 하는 문장이 있는데 이를 "대상 문장"이라고 한다.
 
 - 만약 기준 문장이 "I love deep NLP"이고, 이를 비교할 대상 문장이 "Deep NLP is awesome"이라고 하자. 이 두 문장은 의미가 상당히 유사하다. 만약 학습이 진행된 후에 두 문장에 대한 유사도를 측정하고자 한다면 아마도 높은 유사도 점수를 보일 것이다.
 
 - 이처럼 문장이 의미적으로 가까우면 유사도 점수는 높게 표현될 것이고 그렇지 않을 경우에는 낮게 표현될 것이다.

# 전반적인 흐름

 - 모델을 데이터에 입력하기 전에 기준 문장과 대상 문장에 대해서 인덱싱을 거쳐 문자열 형태의 문장을 인덱스 벡터 형태로 구성한다.
 - 인덱스 벡터로 구성된 문장 정보는 임베딩 과정을 통해 각 단어들이 임베딩 벡터로 바뀐 형태로 구성될 것이다.
 - 임베딩 과정을 통해 나온 문장 행렬은 기준 문장과 대상 문장에 각각에 해당하는 CNN 블록을 거치게 한다.
 - CNN 블록은 합성곱 층과 맥스 풀링(Max Pooling) 층을 합친 하나의 신경망을 말한다.
 - 두 블록을 거쳐 나온 벡터는 문장에 대한 의미 벡터가 된다.
 - 두 문장에 대한 의미 벡터를 가지고 여러 방식으로 유사도를 구할 수 있다.
 - 완전연결층을 거친 후 최종적으로 Logistic Regression 방법을 통해 문장 유사도 점수를 측정할 것이다.
 - 이렇게 측정한 점수에 따라 두 문장이 유사한지 유사하지 않은지 판단한다.

In [1]:
# 라이브러리
import tensorflow as tf
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import json

In [2]:
# 데이터 디렉터리 지정
DATA_IN_PATH = './data_in/'
DATA_OUT_PATH = './data_out/'
TRAIN_Q1_DATA_FILE = 'train_q1.npy'
TRAIN_Q2_DATA_FILE = 'train_q2.npy'
TRAIN_LABEL_DATA_FILE = 'train_label.npy'
DATA_CONFIGS = 'data_configs.json'

In [3]:
# 하이퍼 파라미터
TEST_SPLIT = 0.1
RNG_SEED = 13371447
EPOCH=1
BATCH_SIZE=1024
MAX_SEQUENCE_LENGTH = 31
WORD_EMBEDDING_DIM = 100
CONV_FEATURE_DIM = 300
CONV_OUTPUT_DIM = 128
CONV_WINDOW_SIZE = 3
SIMILARITY_DENSE_FEATURE_DIM = 200

prepro_configs = None

with open(DATA_IN_PATH + DATA_CONFIGS, 'r') as f:
    prepro_configs = json.load(f)
    
VOCAB_SIZE = prepro_configs['vocab_size']

In [4]:
# 데이터 불러오기
q1_data = np.load(open(DATA_IN_PATH + TRAIN_Q1_DATA_FILE, 'rb'))
q2_data = np.load(open(DATA_IN_PATH + TRAIN_Q2_DATA_FILE, 'rb'))
labels = np.load(open(DATA_IN_PATH + TRAIN_LABEL_DATA_FILE, 'rb'))

In [5]:
# 데이터 분리하기
X = np.stack((q1_data, q2_data), axis=1)
y = labels
train_X, eval_X, train_y, eval_y = train_test_split(X, y, test_size=TEST_SPLIT, random_state=RNG_SEED)
train_Q1 = train_X[:,0]
train_Q2 = train_X[:,1]
eval_Q1 = eval_X[:,0]
eval_Q2 = eval_X[:,1]

In [6]:
# 세 개의 값이 인자로 들어온다.
# 각각 기준 질문, 대상 질문, 라벨값이다.
# 이렇게 들어온 인자값을 통해 두 개의 질문을 하나의 딕셔너리 형태의 입력값으로 만든다.
def rearrange(base, hypothesis, label):
    features = {"x1": base, "x2": hypothesis}
    return features, label

# 학습을 위한 입력 함수
def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((train_Q1, train_Q2, train_y))
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.map(rearrange)
    dataset = dataset.repeat(EPOCH)
    iterator = dataset.make_one_shot_iterator()
    return iterator.get_next()

# 검증을 위한 입력 함수
# 학습을 위한 함수와 다른 점은 학습에서는 에폭을 진행하지만 평가에서는 에폭을 진행할 필요가 없기 때문이다.
def eval_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((eval_Q1, eval_Q2, eval_y))
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.map(rearrange)
    iterator = dataset.make_one_shot_iterator()
    return iterator.get_next()

In [7]:
# CNN 블록 함수 정의
# 합성곱 신경망과 풀링, Dense를 하나로 합친 형태
# 이 함수는 2개의 인자값을 받는데, 각각 입력값과 이름을 의미한다.
def basic_conv_sementic_network(inputs, name):
    conv_layer = tf.keras.layers.Conv1D(CONV_FEATURE_DIM, 
                                        CONV_WINDOW_SIZE, 
                                        activation=tf.nn.relu, 
                                        name=name + 'conv_1d',
                                        padding='same')(inputs)
    
    max_pool_layer = tf.keras.layers.MaxPool1D(MAX_SEQUENCE_LENGTH, 
                                               1)(conv_layer)

    output_layer = tf.keras.layers.Dense(CONV_OUTPUT_DIM, 
                                         activation=tf.nn.relu,
                                         name=name + 'dense')(max_pool_layer)
    output_layer = tf.squeeze(output_layer, 1)
    
    return output_layer

In [8]:
# 모델 구현

# 모델 함수의 경우 3개의 인자값을 가진다. 입력값과 라벨, 모델 함수가 사용된 모드를 인자값으로 받는다.
def model_fn(features, labels, mode):
    TRAIN = mode == tf.estimator.ModeKeys.TRAIN
    EVAL = mode == tf.estimator.ModeKeys.EVAL
    PREDICT = mode == tf.estimator.ModeKeys.PREDICT
    
    # 임렵값을 임베딩 처리한다.
    embedding = tf.keras.layers.Embedding(VOCAB_SIZE, WORD_EMBEDDING_DIM)
    base_embedded_matrix = embedding(features['x1'])
    hypothesis_embedded_matrix = embedding(features['x2'])
    
    # 드롭아웃을 실시한다.
    base_embedded_matrix = tf.keras.layers.Dropout(0.2)(base_embedded_matrix)
    hypothesis_embedded_matrix = tf.keras.layers.Dropout(0.2)(hypothesis_embedded_matrix)  
    
    # 임베딩 객체를 생성한 후 해당 객체를 사용해 기준 문장과 대상 문장을 임베딩 벡터로 만든다.
    # 임베딩 된 두 값을 앞서 정의한 CNN 블록 함수에 적용한다.
    base_sementic_matrix = basic_conv_sementic_network(base_embedded_matrix, 'base')
    hypothesis_sementic_matrix = basic_conv_sementic_network(hypothesis_embedded_matrix, 'hypothesis')  
    
    # 두개의 데이터를 합친다.
    merged_matrix = tf.concat([base_sementic_matrix, hypothesis_sementic_matrix], -1)

    # CNN 블록을 통해 하나로 만들어진 두 질문에 대해 유사도를 측정한다.
    # 유사도 측정을 위해 코사인 유사도 점수나 유클리디안 거리 점수를 활용할 수 있지만,
    # Dense 층에 이러한 유사도 점수를 계산할 수 있는 역할을 부여한다.
    similarity_dense_layer = tf.keras.layers.Dense(SIMILARITY_DENSE_FEATURE_DIM, activation=tf.nn.relu)(merged_matrix)
    similarity_dense_layer = tf.keras.layers.Dropout(0.2)(similarity_dense_layer)    
    logit_layer = tf.keras.layers.Dense(1)(similarity_dense_layer)
    logit_layer = tf.squeeze(logit_layer, 1)
    similarity = tf.nn.sigmoid(logit_layer)
    
    # 예측 데이터에 대해 예측을 실시한다.
    if PREDICT:
        return tf.estimator.EstimatorSpec(
                  mode=mode,
                  predictions={
                      'is_duplicate':similarity
                  })
    
    loss = tf.losses.sigmoid_cross_entropy(labels, logit_layer)

    # 평가 데이터의 정확도를 계산한다.
    if EVAL:
        accuracy = tf.metrics.accuracy(labels, tf.round(similarity))
        return tf.estimator.EstimatorSpec(
                  mode=mode,
                  eval_metric_ops= {'acc': accuracy},
                  loss=loss)
    
    # 학습을 위해 손실값과 정확도를 측정하고 AdamOptimzer를 통해 최적화를 진행한다.
    if TRAIN:
        global_step = tf.train.get_global_step()
        train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step)

        return tf.estimator.EstimatorSpec(
                  mode=mode,
                  train_op=train_op,
                  loss=loss)

In [13]:
# 모델 객체 생성
os.environ["CUDA_VISIBLE_DEVICES"]="6" #For TEST

model_dir = os.path.join(os.getcwd(), DATA_OUT_PATH + "checkpoint/cnn/")
os.makedirs(model_dir, exist_ok=True)

est = tf.estimator.Estimator(model_fn, model_dir=model_dir)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\user\\Desktop\\Natural Language Processing\\02. NLP_with_Tensorflow\\./data_out/checkpoint/cnn/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001E38FAA3860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [14]:
# 모델 훈련
est.train(train_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\user\Desktop\Natural Language Processing\02. NLP_with_Tensorflow\./data_out/checkpoint/cnn/model.ckpt-3750


NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key baseconv_1d/bias not found in checkpoint
	 [[node save/RestoreV2 (defined at <ipython-input-14-329b1d812262>:2)  = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]

Caused by op 'save/RestoreV2', defined at:
  File "C:\Users\user\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\user\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\user\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\user\Anaconda3\lib\asyncio\base_events.py", line 427, in run_forever
    self._run_once()
  File "C:\Users\user\Anaconda3\lib\asyncio\base_events.py", line 1440, in _run_once
    handle._run()
  File "C:\Users\user\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3191, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-329b1d812262>", line 2, in <module>
    est.train(train_input_fn)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 354, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1207, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1241, in _train_model_default
    saving_listeners)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1468, in _train_with_estimator_spec
    log_step_count_steps=log_step_count_steps) as mon_sess:
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 504, in MonitoredTrainingSession
    stop_grace_period_secs=stop_grace_period_secs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 921, in __init__
    stop_grace_period_secs=stop_grace_period_secs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 643, in __init__
    self._sess = _RecoverableSession(self._coordinated_creator)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1107, in __init__
    _WrappedSession.__init__(self, self._create_session())
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1112, in _create_session
    return self._sess_creator.create_session()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 800, in create_session
    self.tf_sess = self._session_creator.create_session()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 557, in create_session
    self._scaffold.finalize()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 215, in finalize
    self._saver.build()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1114, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1151, in _build
    build_save=build_save, build_restore=build_restore)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 789, in _build_internal
    restore_sequentially, reshape)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 459, in _AddShardedRestoreOps
    name="restore_shard"))
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 406, in _AddRestoreOps
    restore_sequentially)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 862, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_io_ops.py", line 1549, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
    op_def=op_def)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

NotFoundError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key baseconv_1d/bias not found in checkpoint
	 [[node save/RestoreV2 (defined at <ipython-input-14-329b1d812262>:2)  = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]


In [11]:
# eval 데이터 정확도 확인
est.evaluate(eval_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-08-09-08:59:16
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\user\Desktop\Natural Language Processing\02. NLP_with_Tensorflow\./data_out/checkpoint/cnn/model.ckpt-3750


NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key baseconv_1d/bias not found in checkpoint
	 [[node save/RestoreV2 (defined at <ipython-input-11-a5ea96236711>:2)  = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]

Caused by op 'save/RestoreV2', defined at:
  File "C:\Users\user\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\user\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\user\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\user\Anaconda3\lib\asyncio\base_events.py", line 427, in run_forever
    self._run_once()
  File "C:\Users\user\Anaconda3\lib\asyncio\base_events.py", line 1440, in _run_once
    handle._run()
  File "C:\Users\user\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3191, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-a5ea96236711>", line 2, in <module>
    est.evaluate(eval_input_fn)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 478, in evaluate
    return _evaluate()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 467, in _evaluate
    output_dir=self.eval_dir(name))
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1591, in _evaluate_run
    config=self._session_config)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\evaluation.py", line 271, in _evaluate_once
    session_creator=session_creator, hooks=hooks) as session:
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 921, in __init__
    stop_grace_period_secs=stop_grace_period_secs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 643, in __init__
    self._sess = _RecoverableSession(self._coordinated_creator)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1107, in __init__
    _WrappedSession.__init__(self, self._create_session())
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1112, in _create_session
    return self._sess_creator.create_session()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 800, in create_session
    self.tf_sess = self._session_creator.create_session()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 557, in create_session
    self._scaffold.finalize()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 213, in finalize
    self._saver = training_saver._get_saver_or_default()  # pylint: disable=protected-access
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 886, in _get_saver_or_default
    saver = Saver(sharded=True, allow_empty=True)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1102, in __init__
    self.build()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1114, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1151, in _build
    build_save=build_save, build_restore=build_restore)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 789, in _build_internal
    restore_sequentially, reshape)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 459, in _AddShardedRestoreOps
    name="restore_shard"))
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 406, in _AddRestoreOps
    restore_sequentially)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 862, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_io_ops.py", line 1549, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
    op_def=op_def)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

NotFoundError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key baseconv_1d/bias not found in checkpoint
	 [[node save/RestoreV2 (defined at <ipython-input-11-a5ea96236711>:2)  = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]


In [15]:
TEST_Q1_DATA_FILE = 'test_q1.npy'
TEST_Q2_DATA_FILE = 'test_q2.npy'
TEST_ID_DATA_FILE = 'test_id.npy'

test_q1_data = np.load(open(DATA_IN_PATH + TEST_Q1_DATA_FILE, 'rb'))
test_q2_data = np.load(open(DATA_IN_PATH + TEST_Q2_DATA_FILE, 'rb'))
test_id_data = np.load(open(DATA_IN_PATH + TEST_ID_DATA_FILE, 'rb'))

In [16]:
predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x1":test_q1_data,
                                                         "x2":test_q2_data},
                                                      shuffle=False)

predictions = np.array([p['is_duplicate'] for p in est.predict(input_fn=predict_input_fn)])

output = pd.DataFrame( data={"test_id":test_id_data, "is_duplicate": list(predictions)} )
output.to_csv("cnn_predict.csv", index=False, quoting=3)

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\user\Desktop\Natural Language Processing\02. NLP_with_Tensorflow\./data_out/checkpoint/cnn/model.ckpt-3750


NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key baseconv_1d/bias not found in checkpoint
	 [[node save/RestoreV2 (defined at <ipython-input-16-9aef9d08e57f>:5)  = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]

Caused by op 'save/RestoreV2', defined at:
  File "C:\Users\user\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\user\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\user\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\user\Anaconda3\lib\asyncio\base_events.py", line 427, in run_forever
    self._run_once()
  File "C:\Users\user\Anaconda3\lib\asyncio\base_events.py", line 1440, in _run_once
    handle._run()
  File "C:\Users\user\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "C:\Users\user\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\user\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "C:\Users\user\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-16-9aef9d08e57f>", line 5, in <module>
    predictions = np.array([p['is_duplicate'] for p in est.predict(input_fn=predict_input_fn)])
  File "<ipython-input-16-9aef9d08e57f>", line 5, in <listcomp>
    predictions = np.array([p['is_duplicate'] for p in est.predict(input_fn=predict_input_fn)])
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 593, in predict
    hooks=all_hooks) as mon_sess:
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 921, in __init__
    stop_grace_period_secs=stop_grace_period_secs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 643, in __init__
    self._sess = _RecoverableSession(self._coordinated_creator)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1107, in __init__
    _WrappedSession.__init__(self, self._create_session())
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 1112, in _create_session
    return self._sess_creator.create_session()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 800, in create_session
    self.tf_sess = self._session_creator.create_session()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 557, in create_session
    self._scaffold.finalize()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 213, in finalize
    self._saver = training_saver._get_saver_or_default()  # pylint: disable=protected-access
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 886, in _get_saver_or_default
    saver = Saver(sharded=True, allow_empty=True)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1102, in __init__
    self.build()
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1114, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1151, in _build
    build_save=build_save, build_restore=build_restore)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 789, in _build_internal
    restore_sequentially, reshape)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 459, in _AddShardedRestoreOps
    name="restore_shard"))
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 406, in _AddRestoreOps
    restore_sequentially)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 862, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_io_ops.py", line 1549, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
    op_def=op_def)
  File "C:\Users\user\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

NotFoundError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key baseconv_1d/bias not found in checkpoint
	 [[node save/RestoreV2 (defined at <ipython-input-16-9aef9d08e57f>:5)  = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT64, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
