In [1]:
import tensorflow as tf
import numpy as np

In [2]:
import os
import time
import datetime
import data_helpers
from tensorflow.contrib import learn

In [3]:
# Parameters
# ==================================================
"""
DEFINE_integer(flag_name, default_value, docstring):
 Defines a flag of type 'int'.Args:
   flag_name: The name of the flag as a string.
   default_value: The default value the flag should take as an int.
   docstring: A helpful message explaining the use of the flag.
"""
# Data loading params
tf.flags.DEFINE_float("dev_sample_percentage", .1, "Percentage of the training data to use for validation")
tf.flags.DEFINE_string("positive_data_file", "./data/rt-polaritydata/rt-polarity.pos", "Data source for the positive data.")
tf.flags.DEFINE_string("negative_data_file", "./data/rt-polaritydata/rt-polarity.neg", "Data source for the positive data.")

# Model Hyperparameters
tf.flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of character embedding (default: 128)")
tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularizaion lambda (default: 0.0)")

In [4]:
# Training parameters
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 2, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

In [5]:
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")
# Data Preparation
# ===========================================================================
# Load data
print("Loading data...Started")
x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_data_file, FLAGS.negative_data_file)
print("Loading data...Finished")


Parameters:
ALLOW_SOFT_PLACEMENT=True
BATCH_SIZE=64
CHECKPOINT_EVERY=100
DEV_SAMPLE_PERCENTAGE=0.1
DROPOUT_KEEP_PROB=0.5
EMBEDDING_DIM=128
EVALUATE_EVERY=100
FILTER_SIZES=3,4,5
L2_REG_LAMBDA=0.0
LOG_DEVICE_PLACEMENT=False
NEGATIVE_DATA_FILE=./data/rt-polaritydata/rt-polarity.neg
NUM_EPOCHS=2
NUM_FILTERS=128
POSITIVE_DATA_FILE=./data/rt-polaritydata/rt-polarity.pos

Loading data...Started
Loading data...Finished


In [6]:
# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in x_text])#56 for Current Dataset
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
#vocab_processor.min_frequency,vocab_processor.max_document_length ~~> 0,56
#vocab_processor.vocabulary_.__dict__ ~~>{'_freeze': False, '_freq': defaultdict(int, {}), '_mapping': {'<UNK>': 0},
# '_reverse_mapping': ['<UNK>'], '_support_reverse': True,  '_unknown_token': '<UNK>'}
trainedVocab_Processor=vocab_processor.fit_transform(x_text)
listTrainedVocab=list(trainedVocab_Processor)
"""
len(listTrainedVocab)~>10662
x_text[0] ~> "the rock is destined to be the 21st century 's new conan and that he 's going to make a splash even 
greater than arnold schwarzenegger , jean claud van damme or steven segal"
listTrainedVocab[0] ~> array([ 1,  2,  3,  4,  5,  6,  1,  7,  8,  9, 10, 11, 12, 13, 14,  9, 15,5, 16, 17, 18, 19, 20, 
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,  0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  0,  
0,  0,  0]) 
listTrainedVocab[1] ~> array([ 1, 31, 32, 33, 34,  1, 35, 34,  1, 36, 37,  3, 38, 39, 13, 17, 40, 34, 41, 42, 43, 44, 45,
46, 47, 48, 49,  9, 50, 51, 34, 52, 53, 53, 54,  9, 55, 56,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  0,  
0,  0,  0])
x_text[1] ~> "the gorgeously elaborate continuation of the lord of the rings trilogy is so huge that a column of words 
cannot adequately describe co writer director peter jackson 's expanded vision of j r r tolkien 's middle earth"
len(listTrainedVocab[0]) ~> 56
len(listTrainedVocab[1] ~> 56
x_text[10661]~> "enigma is well made , but it 's just too dry and too placid"
listTrainedVocab[10661] ~> array([11512,     3,   147,   113,    58,    84,     9,   655,    59,2766,    12,    59,  8453,     0,     0,     0,     
 0,0, 0,     0,     0,     0,     0,     0,     0,     0,     0,0,     0,     0,     0,     0,     0,     0,     0,     
 0, 0,     0,     0,     0,     0,     0,     0,     0,     0, 0,0, 0,0,0,0,     0,     0,     0, 0,     0])
listTrainedVocab[10659]~> array([   75,    84,  1949, 10191,  2045,   114,     1, 18755, 12889,1293,835,34, 1, 18756,  
5333,188,  1682,  1334,34,    17,  4317,  2490,   996,   121, 12311,  8524,    34,7369, 12085, 18757,   419,     1,  
2490,     9,  1473,    34,  7327, 0,0,0,0, 0,0,0, 0, 0,0, 0, 0,0,0, 0,0,0,0,0])
"""
x = np.array(list(vocab_processor.fit_transform(x_text)))
len(x),x[0]

(10662,
 array([ 1,  2,  3,  4,  5,  6,  1,  7,  8,  9, 10, 11, 12, 13, 14,  9, 15,
         5, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0]))

In [7]:
# Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]

In [8]:
# Split train/test set
# TODO: This is very crude, should use cross-validation
dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))#Picking 10% of indexes from end
print(dev_sample_index,len(y))
x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

-1066 10662
Vocabulary Size: 18758
Train/Dev split: 9596/1066


In [9]:
dictVocab=vocab_processor.vocabulary_
type(dictVocab.__dict__)
dictVocab.__dict__.keys()

dict_keys(['_unknown_token', '_reverse_mapping', '_freeze', '_mapping', '_support_reverse', '_freq'])

In [10]:
print(x_train.shape,y_train.shape,len(vocab_processor.vocabulary_),FLAGS.embedding_dim)
print(list(map(int, FLAGS.filter_sizes.split(","))),FLAGS.num_filters,FLAGS.l2_reg_lambda)

(9596, 56) (9596, 2) 18758 128
[3, 4, 5] 128 0.0


In [11]:
sequence_length=x_train.shape[1]
num_classes=y_train.shape[1]
vocab_size=len(vocab_processor.vocabulary_)
embedding_size=FLAGS.embedding_dim
filter_sizes=list(map(int, FLAGS.filter_sizes.split(",")))
num_filters=FLAGS.num_filters
l2_reg_lambda=FLAGS.l2_reg_lambda

In [12]:
# tf.placeholder creates a placeholder variable and feeded to the network when we execute it at train/test time.
#Second argument~>shape of tensor(None means that the length of that dimension could be anything)
#Using None allows the network to handle arbitrarily sized batches.
input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
#The probability of keeping a neuron in the dropout layer is also an input to the network because 
#we enable dropout only during training. We disable it while evaluating the model.
dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
# Keeping track of l2 regularization loss (optional)
l2_loss = tf.constant(0.0)

In [13]:
# Embedding layer
"""
The first layer we define is the embedding layer, which maps vocabulary word indices into low-dimensional 
vector representations. It’s essentially a lookup table that we learn from data.
"""
"""
tf.device("/cpu:0") forces an operation to be executed on the CPU. By default TensorFlow will try to put the 
operation on the GPU if one is available, but the embedding implementation doesn’t currently have GPU support 
and throws an error if placed on the GPU.
tf.name_scope creates a new Name Scope with the name “embedding”. The scope adds all operations into a top-level 
node called “embedding” so that we get a nice hierarchy when visualizing our network in TensorBoard.
"""
"""
W is our embedding matrix that we learn during training. We initialize it using a random uniform distribution. 

tf.nn.embedding_lookup creates the actual embedding operation. The result of the embedding operation is a 
3-dimensional tensor of shape [None, sequence_length, embedding_size].

TensorFlow’s convolutional conv2d operation expects a 4-dimensional tensor with dimensions corresponding to 
batch, width, height and channel. The result of our embedding doesn’t contain the channel dimension, so we 
add it manually, leaving us with a layer of shape [None, sequence_length, embedding_size, 1].
"""
print(vocab_size,embedding_size)#18758,128
embedded_chars=None
embedded_chars_expanded=None
with tf.device('/cpu:0'), tf.name_scope("embedding"):
    W = tf.Variable(
        tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
        name="W")#W has shape [18758,128]~>[vocab_size,embedding_size]
    print("W : ",W.value())
    print("Input_x[0] : ",input_x[0])
    embedded_chars = tf.nn.embedding_lookup(W, input_x)#[18758,128],[None,56]
    print("Embedded Chars : ",embedded_chars)#[None,56,128]
    embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
    print("Expanded Embedded Chars : ",embedded_chars_expanded)#[None,56,128,1]

18758 128
W :  Tensor("embedding/W/read:0", shape=(18758, 128), dtype=float32, device=/device:CPU:0)
Input_x[0] :  Tensor("embedding/strided_slice:0", shape=(56,), dtype=int32, device=/device:CPU:0)
Embedded Chars :  Tensor("embedding/embedding_lookup:0", shape=(?, 56, 128), dtype=float32, device=/device:CPU:0)
Expanded Embedded Chars :  Tensor("embedding/ExpandDims:0", shape=(?, 56, 128, 1), dtype=float32, device=/device:CPU:0)


In [22]:
# Create a convolution + maxpool layer for each filter size
"""
We use filters of different sizes. Because each convolution produces tensors of different shapes we need to 
iterate through them, create a layer for each of them, and then merge the results into one big feature vector.

W :filter matrix and h is the result of applying the nonlinearity to the convolution output. 
Each filter slides over the whole embedding, but varies in how many words it covers. 

"VALID" padding means that we slide the filter over our sentence without padding the edges, performing a narrow 
convolution that gives us an output of shape [1, sequence_length - filter_size + 1, 1, 1]. 

Performing max-pooling over the output of a specific filter size leaves us with a tensor of shape 
[batch_size, 1, 1, num_filters]. This is essentially a feature vector, where the last dimension corresponds to our 
features. Once we have all the pooled output tensors from each filter size we combine them into one long feature 
vector of shape [batch_size, num_filters_total]. 

Using -1 in tf.reshape tells TensorFlow to flatten the dimension when possible.
"""
pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
    with tf.name_scope("conv-maxpool-%s" % filter_size):
        # Convolution Layer
        filter_shape = [filter_size, embedding_size, 1, num_filters]
        print("Filter Shape (filter_size, embedding_size, 1, num_filters) : ",filter_shape)
        W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
        print("W : ",W.get_shape())
        b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
        print("B : ",b.get_shape())
        print("Embedded Chars : ",embedded_chars_expanded.get_shape())
        conv = tf.nn.conv2d(
            embedded_chars_expanded,
            W,
            strides=[1, 1, 1, 1],
            padding="VALID",
            name="conv")
        print("Conv : ",conv.get_shape())
        print("b : ",b.get_shape())
        # Apply nonlinearity
        conv_Plus_b=tf.nn.bias_add(conv, b)
        print("conv+b : ",conv_Plus_b.get_shape())
        h = tf.nn.relu(conv_Plus_b, name="relu")
        print("h : ",h.get_shape())
        # Maxpooling over the outputs
        print("sequence_length, filter_size",(sequence_length , filter_size))
        print("ksize[1, sequence_length - filter_size + 1, 1, 1]~>",[1, sequence_length - filter_size + 1, 1, 1])
        pooled = tf.nn.max_pool(
            h,
            ksize=[1, sequence_length - filter_size + 1, 1, 1],
            strides=[1, 1, 1, 1],
            padding='VALID',
            name="pool")
        print("pooled : ",pooled.get_shape())
        pooled_outputs.append(pooled)
        print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")

Filter Shape (filter_size, embedding_size, 1, num_filters) :  [3, 128, 1, 128]
W :  (3, 128, 1, 128)
B :  (128,)
Embedded Chars :  (?, 56, 128, 1)
Conv :  (?, 54, 1, 128)
b :  (128,)
conv+b :  (?, 54, 1, 128)
h :  (?, 54, 1, 128)
sequence_length, filter_size (56, 3)
ksize[1, sequence_length - filter_size + 1, 1, 1]~> [1, 54, 1, 1]
pooled :  (?, 1, 1, 128)
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
Filter Shape (filter_size, embedding_size, 1, num_filters) :  [4, 128, 1, 128]
W :  (4, 128, 1, 128)
B :  (128,)
Embedded Chars :  (?, 56, 128, 1)
Conv :  (?, 53, 1, 128)
b :  (128,)
conv+b :  (?, 53, 1, 128)
h :  (?, 53, 1, 128)
sequence_length, filter_size (56, 4)
ksize[1, sequence_length - filter_size + 1, 1, 1]~> [1, 53, 1, 1]
pooled :  (?, 1, 1, 128)
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
Filter Shape (filter_size, embedding_size, 1, num_filters) :  [5, 128, 1, 128]
W :  (5, 128, 1, 

In [34]:
# Combine all the pooled features
num_filters_total = num_filters * len(filter_sizes)
print("num_filters,numOfFilters,num_filters_total ~> ",(num_filters,len(filter_sizes),num_filters_total))
print("pooled_outputs : ",len(pooled_outputs),pooled_outputs[0].get_shape(),pooled_outputs[1].get_shape(),
      pooled_outputs[2].get_shape())
h_pool = tf.concat(3, pooled_outputs)
print("h_pool ~> ",h_pool.get_shape())
h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
print("h_pool_flat ~> ",h_pool_flat.get_shape())

num_filters,numOfFilters,num_filters_total ~>  (128, 3, 384)
pooled_outputs :  3 (?, 1, 1, 128) (?, 1, 1, 128) (?, 1, 1, 128)
h_pool ~>  (?, 1, 1, 384)
h_pool_flat ~>  (?, 384)


In [41]:
# Add dropout
h_drop=None
print("dropout_keep_prob : ",dropout_keep_prob)
print("h_pool_flat : ",h_pool_flat.get_shape())
with tf.name_scope("dropout"):
    h_drop = tf.nn.dropout(h_pool_flat,dropout_keep_prob)
print("h_drop : ",h_drop.get_shape())

dropout_keep_prob :  Tensor("dropout_keep_prob:0", dtype=float32)
h_pool_flat :  (?, 384)
h_drop :  (?, 384)


In [45]:
help(tf.get_variable)

Help on function get_variable in module tensorflow.python.ops.variable_scope:

get_variable(name, shape=None, dtype=None, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, validate_shape=True, custom_getter=None)
    Gets an existing variable with these parameters or create a new one.
    
    This function prefixes the name with the current variable scope
    and performs reuse checks. See the
    [Variable Scope How To](../../how_tos/variable_scope/index.md)
    for an extensive description of how reusing works. Here is a basic example:
    
    ```python
    with tf.variable_scope("foo"):
        v = tf.get_variable("v", [1])  # v.name == "foo/v:0"
        w = tf.get_variable("w", [1])  # w.name == "foo/w:0"
    with tf.variable_scope("foo", reuse=True)
        v1 = tf.get_variable("v")  # The same as v above.
    ```
    
    If initializer is `None` (the default), the default initializer passed in
    the variable scope wi

In [44]:
# Final (unnormalized) scores and predictions
scores=None
predictions=None
with tf.name_scope("output"):
    #tf_initializer=tf.contrib.layers.xavier_initializer()
    #print("tf_initializer : ",tf_initializer)
    print("[num_filters_total, num_classes] : ",[num_filters_total, num_classes])
    W_1 = tf.get_variable(
        "W",
        shape=[num_filters_total, num_classes],
        initializer=tf.contrib.layers.xavier_initializer())
    b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
    l2_loss += tf.nn.l2_loss(W)
    l2_loss += tf.nn.l2_loss(b)
    scores = tf.nn.xw_plus_b(h_drop, W, b, name="scores")
    predictions = tf.argmax(scores, 1, name="predictions")

[num_filters_total, num_classes] :  [384, 2]


ValueError: Variable W already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:

  File "<ipython-input-42-cb99040426a7>", line 11, in <module>
    initializer=tf_initializer)
  File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):


In [2]:
# tf.placeholder creates a placeholder variable and feeded to the network when we execute it at train/test time.
#Second argument~>shape of tensor(None means that the length of that dimension could be anything)
#Using None allows the network to handle arbitrarily sized batches.
sequence_length=x_train.shape[1]
input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
#The probability of keeping a neuron in the dropout layer is also an input to the network because 
#we enable dropout only during training. We disable it while evaluating the model.
dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
# Keeping track of l2 regularization loss (optional)
l2_loss = tf.constant(0.0)

NameError: name 'sequence_length' is not defined

In [2]:
class TextCNN(object):
    """
    A CNN for text classification.
    Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
    """
    """
        sequence_length – Length of our sentences. All sentences are padded to have the same length[Say 59].
        num_classes - Number of classes in the output layer, two in our case (positive and negative).
        embedding_size – Dimensionality of the Embedding layer
        vocab_size – Size of vocabulary. Defines the size of Embedding layer with shape [vocab_size, embedding_size].
        num_filters – Number of filters per filter size
        filter_sizes - number of words we want Convolutional filters to cover. We will have num_filters for each size 
        specified here. For example, [3, 4, 5] means that we will have filters that slide over 3, 4 and 5 words 
        respectively, for a total of len(filter_sizes) * num_filters filters.
    """
    def __init__(self, sequence_length, num_classes, vocab_size,embedding_size, filter_sizes, 
                 num_filters, l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout [Pay Special attention to the "self."]
        # tf.placeholder creates a placeholder variable and feeded to the network when we execute it at train/test time.
        #Second argument~>shape of tensor(None means that the length of that dimension could be anything)
        #Using None allows the network to handle arbitrarily sized batches.
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        #The probability of keeping a neuron in the dropout layer is also an input to the network because 
        #we enable dropout only during training. We disable it while evaluating the model.
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        """
        The first layer we define is the embedding layer, which maps vocabulary word indices into low-dimensional 
        vector representations. It’s essentially a lookup table that we learn from data.
        """
        """
        tf.device("/cpu:0") forces an operation to be executed on the CPU. By default TensorFlow will try to put the 
        operation on the GPU if one is available, but the embedding implementation doesn’t currently have GPU support 
        and throws an error if placed on the GPU.
        tf.name_scope creates a new Name Scope with the name “embedding”. The scope adds all operations into a top-level 
        node called “embedding” so that we get a nice hierarchy when visualizing our network in TensorBoard.
        """
        """
        W is our embedding matrix that we learn during training. We initialize it using a random uniform distribution. 
        
        tf.nn.embedding_lookup creates the actual embedding operation. The result of the embedding operation is a 
        3-dimensional tensor of shape [None, sequence_length, embedding_size].
        
        TensorFlow’s convolutional conv2d operation expects a 4-dimensional tensor with dimensions corresponding to 
        batch, width, height and channel. The result of our embedding doesn’t contain the channel dimension, so we 
        add it manually, leaving us with a layer of shape [None, sequence_length, embedding_size, 1].
        """
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            W = tf.Variable(
                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                name="W")
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        """
        We use filters of different sizes. Because each convolution produces tensors of different shapes we need to 
        iterate through them, create a layer for each of them, and then merge the results into one big feature vector.
        
        W :filter matrix and h is the result of applying the nonlinearity to the convolution output. 
        Each filter slides over the whole embedding, but varies in how many words it covers. 
        
        "VALID" padding means that we slide the filter over our sentence without padding the edges, performing a narrow 
        convolution that gives us an output of shape [1, sequence_length - filter_size + 1, 1, 1]. 
        
        Performing max-pooling over the output of a specific filter size leaves us with a tensor of shape 
        [batch_size, 1, 1, num_filters]. This is essentially a feature vector, where the last dimension corresponds to our 
        features. Once we have all the pooled output tensors from each filter size we combine them into one long feature 
        vector of shape [batch_size, num_filters_total]. 
        
        Using -1 in tf.reshape tells TensorFlow to flatten the dimension when possible.
        """
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
