# Import Packages

In [1]:
# For Data Preparation
import tensorflow as tf
import numpy as np
import pandas as pd
import re # regular expressions


# To clean up texts
import nltk.data
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english')
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
nltk.download('wordnet')
nltk.download('punkt')
tokenizer = nltk.data.load('nltk:tokenizers/punkt/english.pickle')


# For Word Embedding
from collections import Counter
import gensim
import gensim.models as g
from gensim.models import Word2Vec
from gensim.models import Phrases

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib as mpl
import matplotlib.pyplot as plt


# For the Model
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential, Model
from keras.layers import LSTM, Bidirectional,Dropout, Input, SpatialDropout1D, CuDNNLSTM, Dense
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences



from mlxtend.plotting import plot_learning_curves
import matplotlib.pyplot as plt
from mlxtend.data import iris_data
from mlxtend.preprocessing import shuffle_arrays_unison

import logging

import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit


import tensorflowjs as tfjs


[nltk_data] Downloading package wordnet to /Users/apple/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /Users/apple/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Using TensorFlow backend.


# Data Preparation

In [3]:
# Load data
IMDB_train = pd.read_csv('./IMDB-train.txt', sep='\t', encoding='latin-1', header=None)
IMDB_train_y = IMDB_train[:][1]
IMDB_valid = pd.read_csv('./IMDB-valid.txt', sep='\t', encoding='latin-1', header=None)
IMDB_valid_y = IMDB_valid[:][1]
IMDB_test = pd.read_csv('./IMDB-test.txt', sep='\t', encoding='latin-1', header=None)
IMDB_test_y = IMDB_test[:][1]
stemmer = SnowballStemmer("english", ignore_stopwords=True)

print("Data loaded.")

Data loaded.


In [4]:
frames = [IMDB_train, IMDB_valid]
frames_y = [IMDB_train_y, IMDB_valid_y]
IMDB_train = pd.concat(frames)
IMDB_train_y = pd.concat(frames_y)

In [5]:
#IMDB_train = IMDB_train[:][0]
#IMDB_test = IMDB_test[:][0]

In [6]:
IMDB_train[:10]

Unnamed: 0,0,1
0,For a movie that gets no respect there sure ar...,1
1,Bizarre horror movie filled with famous faces ...,1
2,"A solid, if unremarkable film. Matthau, as Ein...",1
3,It's a strange feeling to sit alone in a theat...,1
4,"You probably all already know this by now, but...",1
5,I saw the movie with two grown children. Altho...,1
6,You're using the IMDb.<br /><br />You've given...,1
7,This was a good film with a powerful message o...,1
8,"Made after QUARTET was, TRIO continued the qua...",1
9,"For a mature man, to admit that he shed a tear...",1


In [7]:
def preprocessing(data):
    new_data = []
    #i = 0
    for sentence in (data[:][0]):
        #clean = re.compile('<.*?>')
        new_sentence = re.sub('<.*?>', '', sentence) # remove HTML tags
        new_sentence = re.sub(r'[^\w\s]', '', new_sentence) # remove punctuation
        new_sentence = new_sentence.lower() # convert to lower case
        if new_sentence != '':
            new_data.append(new_sentence)
    return new_data

In [8]:
IMDB_train = preprocessing(IMDB_train)
IMDB_test = preprocessing(IMDB_test)

In [None]:
IMDB_train[0]

In [9]:
# Convert a sentence into a list of words
def sentence_to_wordlist(sentence, remove_stopwords=False):
    # Convert words to lower case and split them
    words = sentence.lower().split()
    # Lemmatizing
    #words = [lemmatizer.lemmatize(word) for word in words]
    # 6. Return a list of words
    return(words)

In [10]:
# whole data into a list of sentences where each sentence is a list of word items
def list_of_sentences(data):
    sentences = []
    for i in data:
        sentences.append(sentence_to_wordlist(i))
    return sentences

In [11]:
train_x = list_of_sentences(IMDB_train)
train_y = IMDB_train_y.tolist()

In [12]:
len(train_x)

25000

# Word Embedding

In [13]:
# Create Word Vectors

wv_model = Word2Vec(size=128, window=5, min_count=4, workers=4)

wv_model.build_vocab(train_x) 
wv_model.train(train_x, total_examples=wv_model.corpus_count, epochs=wv_model.iter)
word_vectors = wv_model.wv
words = list(wv_model.wv.vocab)

# Calling init_sims will make the model will be better for memory
# if we don't want to train the model over and over again
wv_model.init_sims(replace=True)

#n_words = print(len(words))

print("Number of word vectors: {}".format(len(word_vectors.vocab)))

# save model
wv_model.wv.save_word2vec_format('model.txt', binary=False)

# load model
#new_model = Word2Vec.load('model.bin')



  


Number of word vectors: 35674


In [14]:
import gensim
# Need the interactive Tools for Matplotlib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

In [15]:
from gensim.models.keyedvectors import KeyedVectors

new_model = KeyedVectors.load_word2vec_format('model.txt')
#model.save_word2vec_format('model.txt', binary=False)

In [16]:
#from gensim.scripts.glove2word2vec import glove2word2vec
#glove2word2vec(glove_input_file="model.txt", word2vec_output_file="model.word2vec.txt")
#glove2word2vec(glove_input_file="out_vec.txt", word2vec_output_file="retrofittedglove.word2vec.txt")

In [17]:
original_model = KeyedVectors.load_word2vec_format('model.txt')
retrofitted_model = KeyedVectors.load_word2vec_format('out_vec.txt')

In [18]:
new_words = list(retrofitted_model.wv.vocab)

  """Entry point for launching an IPython kernel.


In [19]:
new_words

['the',
 'and',
 'a',
 'of',
 'to',
 'is',
 'in',
 'it',
 'this',
 'i',
 'that',
 'was',
 'as',
 'with',
 'for',
 'movie',
 'but',
 'film',
 'on',
 'not',
 'you',
 'are',
 'his',
 'have',
 'be',
 'he',
 'one',
 'its',
 'at',
 'all',
 'by',
 'an',
 'they',
 'who',
 'from',
 'like',
 'so',
 'her',
 'or',
 'just',
 'about',
 'has',
 'out',
 'if',
 'some',
 'what',
 'there',
 'good',
 'more',
 'very',
 'when',
 'she',
 'even',
 'up',
 'no',
 'would',
 'my',
 'which',
 'only',
 'time',
 'really',
 'story',
 'their',
 'were',
 'had',
 'see',
 'can',
 'me',
 'than',
 'we',
 'much',
 'been',
 'get',
 'well',
 'will',
 'into',
 'because',
 'people',
 'other',
 'do',
 'also',
 'bad',
 'great',
 'how',
 'first',
 'most',
 'him',
 'dont',
 'made',
 'movies',
 'then',
 'films',
 'make',
 'could',
 'way',
 'them',
 'any',
 'too',
 'after',
 'characters',
 'think',
 'watch',
 'two',
 'many',
 'being',
 'seen',
 'character',
 'never',
 'little',
 'acting',
 'plot',
 'where',
 'best',
 'love',
 'did',


In [20]:
# Build dictionary & inv_vocab

def create_vocab(data_collect, max_vocab):
    # Get raw data
    x_list = data_collect
    sample_count = sum([len(x) for x in x_list])
    words = []
    for data in x_list:
        words.extend([data])
    count = Counter(words) # word count
    inv_vocab = [x[0] for x in count.most_common(max_vocab)]
    vocab = {x: i for i, x in enumerate(inv_vocab, 1)}
    return vocab, inv_vocab

In [21]:
vocab, inv_vocab = create_vocab(words, len(words))
ret_vocab, ret_inv_vocab = create_vocab(new_words, len(new_words))

In [22]:
len(inv_vocab)

35674

In [23]:
# Find the max length sentence
def find_max_length_sentence(sentence):
    max_length = 0
    for i in sentence:
        length = len(sentence_to_wordlist(i))
        if max_length < length:
            max_length = length
    return max_length

In [24]:
seq_length = find_max_length_sentence(IMDB_train)
print(seq_length)

2450


In [25]:
# Map each word to corresponding vector
def map_to_vec(word):
    vec = wv_model[word]
    return vec

#map_to_vec('care')

In [26]:
# Embedding Matrix
def make_emb_matrix(inv_vocab):
    emb_matrix = []
    for word in inv_vocab:
        emb_matrix.append(map_to_vec(word))
    return emb_matrix

In [27]:
embedding = np.asarray(make_emb_matrix(inv_vocab))
ret_embedding = np.asarray(make_emb_matrix(ret_inv_vocab))


  This is separate from the ipykernel package so we can avoid doing imports until


In [81]:
len(inv_vocab)

35674

# Initialize Word Embeddings in Keras

In [30]:
wv_dim = 100
num_words = len(word_vectors.vocab)
vocab = Counter(words)
ret_vocab = Counter(new_words)

In [31]:

word_index = {t[0]: i+1 for i,t in enumerate(vocab.most_common(num_words-1))}

train_sequences = [[word_index.get(t, 0) for t in sentence]
             for sentence in IMDB_train[:len(IMDB_train)]]

test_sequences = [[word_index.get(t, 0)
                   for t in sentence] for sentence in IMDB_test[:len(IMDB_test)]]

# Pad zeros to match the size of matrix
train_data = pad_sequences(train_sequences, maxlen=seq_length, padding="post", truncating="post")
test_data = pad_sequences(test_sequences, maxlen=seq_length, padding="post", truncating="post")


In [32]:
# Initialize the matrix with random numbers
wv_matrix = (np.random.rand(num_words, wv_dim) - 0.5) / 5.0
for word, i in word_index.items():
    if i >= num_words:
        continue
    try:
        embedding_vector = word_vectors[word]
        # words not found in embedding index will be all-zeros.
        wv_matrix[i] = embedding_vector
    except:
        pass


In [33]:

ret_word_index = {t[0]: i+1 for i,t in enumerate(ret_vocab.most_common(num_words-1))}

ret_train_sequences = [[ret_word_index.get(t, 0) for t in sentence]
             for sentence in IMDB_train[:len(IMDB_train)]]

ret_test_sequences = [[ret_word_index.get(t, 0)
                   for t in sentence] for sentence in IMDB_test[:len(IMDB_test)]]

# Pad zeros to match the size of matrix
ret_train_data = pad_sequences(ret_train_sequences, maxlen=seq_length, padding="post", truncating="post")
ret_test_data = pad_sequences(ret_test_sequences, maxlen=seq_length, padding="post", truncating="post")


In [34]:
# Initialize the matrix with random numbers
ret_wv_matrix = (np.random.rand(num_words, wv_dim) - 0.5) / 5.0
for word, i in ret_word_index.items():
    if i >= num_words:
        continue
    try:
        ret_embedding_vector = ret_word_vectors[word]
        # words not found in embedding index will be all-zeros.
        ret_wv_matrix[i] = ret_embedding_vector
    except:
        pass


#  Model

In [35]:
# Embedding
wv_layer = Embedding(num_words,
                     wv_dim,
                     mask_zero=False,
                     weights=[wv_matrix],
                     input_length=seq_length,
                     trainable=False)

ret_wv_layer = Embedding(num_words,
                     wv_dim,
                     mask_zero=False,
                     weights=[ret_wv_matrix],
                     input_length=seq_length,
                     trainable=False)

# Inputs
comment_input = Input(shape=(seq_length,), dtype='int64')
embedded_sequences = wv_layer(comment_input) # regular word2vec
embedded_sequences = SpatialDropout1D(0.2)(embedded_sequences)


#ret_embedded_sequences = ret_wv_layer(comment_input)# retrofitted word2vec


# LSTM
x = Bidirectional(LSTM(64, return_sequences=False))(embedded_sequences)

# Output
x = Dropout(0.2)(x)
x = BatchNormalization()(x)
preds = Dense(1, activation='sigmoid')(x)

# build the model
model = Model(inputs=[comment_input], outputs=preds)
model.compile(loss='binary_crossentropy',   #binary_crossentropy
              optimizer=Adam(lr=0.001, clipnorm=.25, beta_1=0.7, beta_2=0.99),
              metrics=['accuracy'])

print(model.summary())

hist = model.fit(train_data, IMDB_train_y, validation_data=(test_data, IMDB_test_y), epochs=15, batch_size=32)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 2450)              0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 2450, 100)         3567400   
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 2450, 100)         0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 128)               84480     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 128)               512       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total para

KeyboardInterrupt: 

In [90]:
# Embedding
wv_layer = Embedding(num_words,
                     wv_dim,
                     mask_zero=False,
                     weights=[wv_matrix],
                     input_length=seq_length,
                     trainable=False)

ret_wv_layer = Embedding(num_words,
                     wv_dim,
                     mask_zero=False,
                     weights=[ret_wv_matrix],
                     input_length=seq_length,
                     trainable=False)

# Inputs
comment_input = Input(shape=(seq_length,), dtype='int64')
embedded_sequences = wv_layer(comment_input) # regular word2vec
embedded_sequences = SpatialDropout1D(0.2)(embedded_sequences)

In [91]:
sess=tf.InteractiveSession()
with sess.as_default():
    embedded_sequences.eval()

InvalidArgumentError: You must feed a value for placeholder tensor 'input_3' with dtype int64 and shape [?,2450]
	 [[node input_3 (defined at /Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:515)  = Placeholder[dtype=DT_INT64, shape=[?,2450], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'input_3', defined at:
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/asyncio/base_events.py", line 427, in run_forever
    self._run_once()
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/asyncio/base_events.py", line 1440, in _run_once
    handle._run()
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-90-c9e0f4dfc298>", line 17, in <module>
    comment_input = Input(shape=(seq_length,), dtype='int64')
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/keras/engine/input_layer.py", line 177, in Input
    input_tensor=tensor)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/keras/engine/input_layer.py", line 86, in __init__
    name=self.name)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 515, in placeholder
    x = tf.placeholder(dtype, shape=shape, name=name)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1747, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 5206, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_3' with dtype int64 and shape [?,2450]
	 [[node input_3 (defined at /Users/apple/anaconda3/envs/py36/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:515)  = Placeholder[dtype=DT_INT64, shape=[?,2450], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [92]:
def cnn(sequence,labels,mode):
    wv_layer = Embedding(num_words,
                     wv_dim,
                     mask_zero=False,
                     weights=[wv_matrix],
                     input_length=seq_length,
                     trainable=False)
    embed_seq=tf.reshape(wv_layer(sequence),[-1,2450,100,1])
    
    conv1=tf.layers.conv2d(inputs=embed_seq,filters=32,kernel_size=[5,100],padding="valid",
      activation=tf.nn.relu)
    
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
    
    conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[6, 100],
      padding="valid",
      activation=tf.nn.relu)
    
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    pool2_flat = tf.reshape(pool2, [-1, 609 * 100 * 64])
    
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    
    dropout = tf.layers.dropout(
    inputs=dense, rate=0.1, training=mode == tf.estimator.ModeKeys.TRAIN)
    
    logits = tf.layers.dense(inputs=dropout, units=2)
    
    predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    
    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) # !tune hyperparameter!
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
    
    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}
    
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)


In [None]:
def main(inputs,labels):
    classifier = tf.estimator.Estimator(
        model_fn=cnn, model_dir="/tmp/cnn_model")
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(
        tensors=tensors_to_log, every_n_iter=50)
    # train model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=IMDB_train_y,
        batch_size=100, # tune
        num_epochs=8, # tune
        shuffle=True)
    classifier.train(input_fn=train_input_fn,
        steps=20000,
        hooks=[logging_hook])
    # Eval model
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": eval_data},
        y=eval_labels,
        num_epochs=1,
        shuffle=False)
comment_input = Input(shape=(seq_length,), dtype='int64')
cnn_model=Model(inputs=[comment_input],outputs=cnn(inputs,))