Reference: https://github.com/dennybritz/cnn-text-classification-tf

## Processing data

In [1]:
import numpy as np
import re
import itertools
from collections import Counter
from urllib.request import urlopen
import tensorflow as tf


In [2]:

def clean_str(string):
    """
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip().lower()

def load_data_and_labels(pos_link, neg_link):
    """
    Loads MR polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """
#     # Pull sentences with positive sentiment
    pos_file = urlopen(pos_link)
 
    # Pull sentences with negative sentiment
    neg_file = urlopen(neg_link)

    # Load data from files
    positive_examples = list(pos_file.readlines())
    positive_examples = [s.strip() for s in positive_examples]
    negative_examples = list(neg_file.readlines())
    negative_examples = [s.strip() for s in negative_examples]
    
    # Split by words
    x_text = positive_examples + negative_examples
    x_text = [clean_str(sent.decode('latin1')) for sent in x_text] 
    # or:  x_text = [clean_str(str(sent)) for sent in x_text]

    
    # Generate labels (with two types)
    positive_labels = [[0, 1] for _ in positive_examples]
    negative_labels = [[1, 0] for _ in negative_examples]
    
    y = np.concatenate([positive_labels, negative_labels], 0)
    return x_text, y



def batch_iter(data, batch_size, num_epochs, shuffle=True):
    """
    Generates a batch iterator for a dataset.
    """
    data = np.array(data)
    data_size = len(data)
    num_batches_per_epoch = int((len(data)-1)/batch_size) + 1 #nb of iterations per epoch
    
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data = data[shuffle_indices]
        else:
            shuffled_data = data
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield shuffled_data[start_index:end_index] #return a generator

## Word-level CNN model

In [3]:
import tensorflow as tf
import numpy as np

class Word_CNN_Model:
    """Word-level CNN model 
    """
    
    def __init__(self, input_dim=32, num_classes=2, vocab_size=59, embedding_size=128, 
            filter_sizes=[7], num_filters=128, l2_reg_lambda=0.0, dtype=tf.float32):
        """
        Initialize a new network.

            Inputs:
            - input_dim: Tuple (H, W, C) giving size of input data
            - num_classes: Number of scores to produce from the final affine layer
            - vocab_size: Number of vocabs
            - embedding_size: size of embedded text
            - num_filters: Number of filters to use in the convolutional layer
            - filter_size: Size of filters to use in the convolutional layer
            - weight_scale: Scalar giving standard deviation for random initialization
              of weights.
            - l2_reg_lambda: Scalar giving L2 regularization strength
            - dtype: tf datatype to use for computation
        """
        
        self.input_x = tf.placeholder(dtype=tf.int32, shape=[None, input_dim], name='input_x')
        self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, num_classes], name='input_y')
        self.keep_prob = tf.placeholder(dtype=tf.float32, name='dropout')
        
        l2_loss = tf.constant(0.0) #l2 regularization loss (optional)
 
        #Embedding layer
        with tf.device("/cpu:0"), tf.name_scope("embedding"):
            gen = tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0)
            self.W = tf.Variable (gen, name ="W")
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) #expand one more dimension
            
        # Create a set of blocks
        pooled_outputs = []
        for i, filtersize in enumerate(filter_sizes):
            with tf.name_scope("conv_maxpool_block_%s" %filtersize) as main_scope: 

                    ###conv layer
    #                 #conv+relu
    #                 conv = tf.layers.conv2d(inputs=self.embedded_chars_expanded, filters=num_filters, 
    #                                         kernel_size=(filtersize, embedding_size), 
    # #                                         kernel_initializer= tf.truncated_normal(shape=[filtersize, embedding_size], stddev=0.1),
    #                                         strides=(1,1), padding='valid', activation='relu',
    # #                                         bias_initializer=tf.constant(0.1, shape=[num_filters]), 
    #                                         name=str("conv_%d"%i) )
    #                 #pool
    #                 pooled = tf.layers.max_pooling2d(conv, pool_size=(1, sequence_length - filter_size + 1),
    #                                                  strides=(1,1), padding='valid', name=str('pool_%d'%i))

                    #define parameters
                    ran = tf.truncated_normal(shape=[filtersize, embedding_size, 1, num_filters], stddev=0.1)
                    W = tf.Variable(ran, name='W')
#                     W = tf.get_variable("W", initializer=ran)
                    b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b_%d" %i)
                    conv = tf.nn.conv2d(
                        input=self.embedded_chars_expanded,
                        filter=W,
                        strides=[1, 1, 1, 1],
                        padding="VALID",
                        name=str("conv%d"%i))
                    # relu
                    h = tf.nn.relu(tf.nn.bias_add(conv, b), name=str("relu%d"%i))
                    # maxpool
                    pooled = tf.nn.max_pool(
                        h,
                        ksize=[1, input_dim - filtersize + 1, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        name=str("pool%d"%i))

                    pooled_outputs.append(pooled)

        # combine all pools
        h_pool = tf.concat(pooled_outputs, 3) #Concatenates tensors along  dimension 3
        self.h_pool = tf.reshape(h_pool, [-1, num_filters*len(filter_sizes)]) #flatten
        print(len(pooled_outputs), h_pool.get_shape(), self.h_pool.get_shape()) #1 (?, 1, 1, 128) (?, 128)
        
        #dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool, keep_prob=self.keep_prob)
            
        # Final (unnormalized) scores and predictions
#         with tf.name_scope("output") as output_scope:
        
        with tf.variable_scope(tf.get_variable_scope()) as vscope:
#             with tf.variable_scope("var_scope", reuse=True): # to reuse variable 
                W = tf.get_variable(
                    "W",
                    shape=[num_filters*len(filter_sizes), num_classes],
                    initializer=tf.contrib.layers.xavier_initializer())
                b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
    #             b = tf.get_variable(shape=[num_classes], name="b", initializer=tf.constant_initializer(0.1))

                l2_loss += tf.nn.l2_loss(W)
                l2_loss += tf.nn.l2_loss(b)

                self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")#x,w,b, computes matmul(x,w)+b
                self.predictions = tf.argmax(self.scores, 1, name="predictions")
                tf.get_variable_scope().reuse_variables() 
                
        # Calculate mean cross-entropy loss (with regularization)
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

    
a = Word_CNN_Model()


1 (?, 1, 1, 128) (?, 128)


## Train the model

In [4]:
import tensorflow as tf
import numpy as np
import os
import time
import datetime
from tensorflow.contrib import learn

### Parameter definition

In [5]:

# Data loading params
tf.flags.DEFINE_string("pos_link", 'https://raw.githubusercontent.com/yoonkim/CNN_sentence/master/rt-polarity.pos', 'positive link')
tf.flags.DEFINE_string("neg_link", 'https://raw.githubusercontent.com/yoonkim/CNN_sentence/master/rt-polarity.neg', 'neg link')
tf.flags.DEFINE_float("test_sample_percentage", .1, "Percentage of the training data to use for validation")
tf.flags.DEFINE_float("num_folds", 2, "Number of folds in cross-validation")

# Model Hyperparameters
tf.flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of character embedding (default: 128)")
tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
tf.flags.DEFINE_float("keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularization lambda (default: 0.0)")

# Training parameters
tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 5, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on testing set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")

# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for k,v in sorted(FLAGS.__flags.items()):
    print(k.upper(), "=", v)



Parameters:
ALLOW_SOFT_PLACEMENT = True
BATCH_SIZE = 128
CHECKPOINT_EVERY = 100
EMBEDDING_DIM = 128
EVALUATE_EVERY = 100
FILTER_SIZES = 3,4,5
KEEP_PROB = 0.5
L2_REG_LAMBDA = 0.0
LOG_DEVICE_PLACEMENT = False
NEG_LINK = https://raw.githubusercontent.com/yoonkim/CNN_sentence/master/rt-polarity.neg
NUM_CHECKPOINTS = 5
NUM_EPOCHS = 5
NUM_FILTERS = 128
NUM_FOLDS = 2
POS_LINK = https://raw.githubusercontent.com/yoonkim/CNN_sentence/master/rt-polarity.pos
TEST_SAMPLE_PERCENTAGE = 0.1


### Process data

In [6]:
# Load data
x_text, y = load_data_and_labels(FLAGS.pos_link, FLAGS.neg_link)



In [7]:
# Build vocabulary
max_document_length = max([len(x) for x in x_text])

#convert document to [word_ids]
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
#Learn the vocabulary dictionary and give matrix of indexies of words (Word-Id matrix)
x = np.array(list(vocab_processor.fit_transform(x_text))) 
print("Vocabulary Size: ", len(vocab_processor.vocabulary_))

print(x.shape, x)

    
#permutation of data
np.random.seed(10)
indices = np.random.permutation(range(len(x)))
print(indices)
x_shuffled = x[indices]
y_shuffled = y[indices]

# Split train/test set
# We first test with simple split, and use cross-validation later
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_shuffled, y_shuffled, test_size=FLAGS.test_sample_percentage, random_state=1 )


# test_sample_index = -1 * int(FLAGS.test_sample_percentage * float(len(y)))
# x_train, x_test = x_shuffled[:test_sample_index], x_shuffled[test_sample_index:]
# y_train, y_test = y_shuffled[:test_sample_index], y_shuffled[test_sample_index:]

# del x_shuffled, y_shuffled

print("Train/Test split: {:d}/{:d}".format(len(y_train), len(y_test)))


Vocabulary Size:  18759
(10662, 266) [[    1     2     3 ...,     0     0     0]
 [    1    31    32 ...,     0     0     0]
 [   57    58    59 ...,     0     0     0]
 ..., 
 [   75    84  1949 ...,     0     0     0]
 [    1  2191  2690 ...,     0     0     0]
 [11512     3   147 ...,     0     0     0]]
[ 7359  5573 10180 ...,  1344  7293  1289]
Train/Test split: 9595/1067


### Training

In [8]:
    
def train():

      with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=FLAGS.allow_soft_placement,
          log_device_placement=FLAGS.log_device_placement)
        
#         sess = tf.Session(config=session_conf)
#         with sess.as_default(): #
        with tf.Session(config=session_conf) as sess:
            #init model
            cnn = Word_CNN_Model(
                input_dim=x_train.shape[1],
                num_classes=y_train.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=FLAGS.embedding_dim,
                filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                num_filters=FLAGS.num_filters,
                l2_reg_lambda=FLAGS.l2_reg_lambda)

            ### Define training procedure
            #global step value to be used throughout training and testing
            global_step = tf.Variable(0, name="global_step", trainable=False)
            #define Adam optim
            optimizer = tf.train.AdamOptimizer(1e-3) 
            #compute gradients
            grads_and_vars = optimizer.compute_gradients(cnn.loss) #cnn.loss is a property defined in the scope "loss"
            #apply above gradients 
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) #be used in train_step()

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    #Adding a histogram summary to visualize data's distribution in TensorBoar
                    grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) #(node_name, values)
                    sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries) # merge all grad summaries

            # Output directory for models and summaries
            timestamp = str(int(time.time())) 
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) #out_dir=./runs/timestamp
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss) # a summary to monitor cost tensor
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) #a summary to monitor accuracy

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) #will be used in train_step()
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Test summaries
            test_summary_op = tf.summary.merge([loss_summary, acc_summary])# be used in test_step()
            test_summary_dir = os.path.join(out_dir, "summaries", "test")
            test_summary_writer = tf.summary.FileWriter(test_summary_dir, sess.graph)

            # Checkpoint directory
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) #./runs/timestamp/checkpoints
            checkpoint_prefix = os.path.join(checkpoint_dir, "model") #prefix of filename
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir) # default, TF assumes checkpoint dir exists
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

            # Save vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab")) # ./runs/timestamp/vocab

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.keep_prob: FLAGS.keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict) # run all summaries with the input=feed_dict
                
                time_str = datetime.datetime.now().isoformat()
                
                #display info: 2017-12-08T16:00:45.606711: step 8, loss 3.45242, acc 0.5
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                #add summary
                train_summary_writer.add_summary(summaries, step) #(buffer, global step value)

            def test_step(x_batch, y_batch, writer=None):
                """
                Evaluates model on a test set
                """
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_y: y_batch,
                  cnn.keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, test_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 
                if writer:
                    writer.add_summary(summaries, step)

            # Generate batches (from preprocessing part above)
            batches = batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
            
            # Training loop. Note, nb of batchs/iterations = len(data)/batch_size
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step) #get the current step of training
                if current_step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    test_step(x_test, y_test, writer=test_summary_writer)
                    print("")
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
    
    
train()



3 (?, 1, 1, 384) (?, 384)
INFO:tensorflow:Summary name embedding/W:0/grad/hist is illegal; using embedding/W_0/grad/hist instead.
INFO:tensorflow:Summary name embedding/W:0/grad/sparsity is illegal; using embedding/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv_maxpool_block_3/W:0/grad/hist is illegal; using conv_maxpool_block_3/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv_maxpool_block_3/W:0/grad/sparsity is illegal; using conv_maxpool_block_3/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv_maxpool_block_3/b_0:0/grad/hist is illegal; using conv_maxpool_block_3/b_0_0/grad/hist instead.
INFO:tensorflow:Summary name conv_maxpool_block_3/b_0:0/grad/sparsity is illegal; using conv_maxpool_block_3/b_0_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv_maxpool_block_4/W:0/grad/hist is illegal; using conv_maxpool_block_4/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv_maxpool_block_4/W:0/grad/sparsity is illegal; using conv_maxpool_block

2017-12-14T10:42:58.068132: step 95, loss 1.27776, acc 0.492188
2017-12-14T10:42:59.409642: step 96, loss 1.40586, acc 0.460938
2017-12-14T10:43:00.762664: step 97, loss 1.06197, acc 0.625
2017-12-14T10:43:02.213684: step 98, loss 1.1804, acc 0.601562
2017-12-14T10:43:03.598473: step 99, loss 0.981073, acc 0.632812
2017-12-14T10:43:05.223072: step 100, loss 1.13196, acc 0.570312

Evaluation:
2017-12-14T10:43:09.072536: step 100, loss 0.700377, acc 0.609185

Saved model checkpoint to /home/hoang/Documents/svn/reposn/code/text-classification/runs/1513244435/checkpoints/model-100

2017-12-14T10:43:10.667661: step 101, loss 1.30117, acc 0.554688
2017-12-14T10:43:12.045126: step 102, loss 1.23259, acc 0.625
2017-12-14T10:43:13.577521: step 103, loss 1.2361, acc 0.554688
2017-12-14T10:43:15.066985: step 104, loss 1.12002, acc 0.617188
2017-12-14T10:43:16.564321: step 105, loss 1.02452, acc 0.65625
2017-12-14T10:43:18.341632: step 106, loss 1.02739, acc 0.65625
2017-12-14T10:43:19.813225: ste

2017-12-14T10:46:04.778173: step 215, loss 0.880674, acc 0.585938
2017-12-14T10:46:06.351376: step 216, loss 0.949091, acc 0.523438
2017-12-14T10:46:07.821899: step 217, loss 0.67371, acc 0.6875
2017-12-14T10:46:09.343492: step 218, loss 0.815117, acc 0.640625
2017-12-14T10:46:10.756151: step 219, loss 0.763286, acc 0.648438
2017-12-14T10:46:12.119648: step 220, loss 0.780008, acc 0.617188
2017-12-14T10:46:13.473375: step 221, loss 0.643472, acc 0.703125
2017-12-14T10:46:14.973780: step 222, loss 0.703708, acc 0.648438
2017-12-14T10:46:16.445631: step 223, loss 0.744776, acc 0.640625
2017-12-14T10:46:17.892007: step 224, loss 0.735036, acc 0.625
2017-12-14T10:46:19.311487: step 225, loss 0.887355, acc 0.626016
2017-12-14T10:46:20.659822: step 226, loss 0.86387, acc 0.601562
2017-12-14T10:46:22.226788: step 227, loss 0.605147, acc 0.710938
2017-12-14T10:46:23.628554: step 228, loss 0.683583, acc 0.65625
2017-12-14T10:46:25.260815: step 229, loss 0.742237, acc 0.703125
2017-12-14T10:46:2

2017-12-14T10:49:07.894646: step 338, loss 0.554529, acc 0.710938
2017-12-14T10:49:09.289390: step 339, loss 0.614212, acc 0.671875
2017-12-14T10:49:10.672654: step 340, loss 0.63254, acc 0.695312
2017-12-14T10:49:12.105598: step 341, loss 0.635943, acc 0.6875
2017-12-14T10:49:13.481069: step 342, loss 0.556423, acc 0.671875
2017-12-14T10:49:15.005300: step 343, loss 0.616148, acc 0.757812
2017-12-14T10:49:16.528108: step 344, loss 0.528351, acc 0.734375
2017-12-14T10:49:18.074791: step 345, loss 0.656053, acc 0.648438
2017-12-14T10:49:19.403520: step 346, loss 0.603632, acc 0.664062
2017-12-14T10:49:20.778068: step 347, loss 0.517037, acc 0.757812
2017-12-14T10:49:22.290019: step 348, loss 0.619751, acc 0.6875
2017-12-14T10:49:23.658793: step 349, loss 0.582411, acc 0.703125
2017-12-14T10:49:25.221096: step 350, loss 0.646569, acc 0.671875
2017-12-14T10:49:26.904471: step 351, loss 0.450287, acc 0.789062
2017-12-14T10:49:28.370430: step 352, loss 0.560632, acc 0.734375
2017-12-14T10:4

In [None]:
####### Training with CV #######
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import recall_score


k_fold = KFold(n_splits=FLAGS.num_folds, shuffle=False)
for train_indices, test_indices in k_fold.split(x_text):
    print('Train: %s | test: %s' % (train_indices, test_indices))
    x_train = x[train_indices]
    y_train = y[train_indices]
    x_test = x[test_indices]
    y_test = y[test_indices]

    
def train_kfold():
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=FLAGS.allow_soft_placement,
          log_device_placement=FLAGS.log_device_placement)
        
        for train_indices, test_indices in k_fold.split(x_text):
                print('Train: %s | test: %s' % (train_indices, test_indices))
                x_train = x[train_indices]
                y_train = y[train_indices]
                x_test = x[test_indices]
                y_test = y[test_indices]
        with tf.Session(config=session_conf) as sess:
            k_fold = KFold(n_splits=FLAGS.num_folds, shuffle=False)
            
 
                #init model
                cnn = Word_CNN_Model(
                    input_dim=x_train.shape[1],
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)

                ### Define training procedure
                #global step value to be used throughout training and testing
                global_step = tf.Variable(0, name="global_step", trainable=False)
                #define Adam optim
                optimizer = tf.train.AdamOptimizer(1e-3) 
                #compute gradients
                grads_and_vars = optimizer.compute_gradients(cnn.loss) #cnn.loss is a property defined in the scope "loss"
                #apply above gradients 
                train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) #be used in train_step()

                # Keep track of gradient values and sparsity (optional)
                grad_summaries = []
                for g, v in grads_and_vars:
                    if g is not None:
                        #Adding a histogram summary to visualize data's distribution in TensorBoar
                        grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) #(node_name, values)
                        sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                        grad_summaries.append(grad_hist_summary)
                        grad_summaries.append(sparsity_summary)
                grad_summaries_merged = tf.summary.merge(grad_summaries) # merge all grad summaries

                # Output directory for models and summaries
                timestamp = str(int(time.time())) 
                out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) #out_dir=./runs/timestamp
                print("Writing to {}\n".format(out_dir))

                # Summaries for loss and accuracy
                loss_summary = tf.summary.scalar("loss", cnn.loss) # a summary to monitor cost tensor
                acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) #a summary to monitor accuracy

                # Train Summaries
                train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) #will be used in train_step()
                train_summary_dir = os.path.join(out_dir, "summaries", "train")
                train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

                # Test summaries
                test_summary_op = tf.summary.merge([loss_summary, acc_summary])# be used in test_step()
                test_summary_dir = os.path.join(out_dir, "summaries", "test")
                test_summary_writer = tf.summary.FileWriter(test_summary_dir, sess.graph)

                # Checkpoint directory
                checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) #./runs/timestamp/checkpoints
                checkpoint_prefix = os.path.join(checkpoint_dir, "model") #prefix of filename
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir) # default, TF assumes checkpoint dir exists
                saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

                # Save vocabulary
                vocab_processor.save(os.path.join(out_dir, "vocab")) # ./runs/timestamp/vocab

                # Initialize all variables
                sess.run(tf.global_variables_initializer())

                def train_step(x_batch, y_batch):
                    """
                    A single training step
                    """
                    feed_dict = {
                      cnn.input_x: x_batch,
                      cnn.input_y: y_batch,
                      cnn.keep_prob: FLAGS.keep_prob
                    }
                    _, step, summaries, loss, accuracy = sess.run(
                        [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                        feed_dict) # run all summaries with the input=feed_dict

                    time_str = datetime.datetime.now().isoformat()

                    #display info: 2017-12-08T16:00:45.606711: step 8, loss 3.45242, acc 0.5
                    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                    #add summary
                    train_summary_writer.add_summary(summaries, step) #(buffer, global step value)

                def test_step(x_batch, y_batch, writer=None):
                    """
                    Evaluates model on a test set
                    """
                    feed_dict = {
                      cnn.input_x: x_batch,
                      cnn.input_y: y_batch,
                      cnn.keep_prob: 1.0
                    }
                    step, summaries, loss, accuracy = sess.run(
                        [global_step, test_summary_op, cnn.loss, cnn.accuracy],
                        feed_dict)
                    time_str = datetime.datetime.now().isoformat()

                    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 
                    if writer:
                        writer.add_summary(summaries, step)

                # Generate batches (from preprocessing part above)
                batches = batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)

                # Training loop. Note, nb of batchs/iterations = len(data)/batch_size
                for batch in batches:
                    x_batch, y_batch = zip(*batch)
                    train_step(x_batch, y_batch)
                    current_step = tf.train.global_step(sess, global_step) #get the current step of training
                    if current_step % FLAGS.evaluate_every == 0:
                        print("\nEvaluation:")
                        test_step(x_test, y_test, writer=test_summary_writer)
                        print("")
                    if current_step % FLAGS.checkpoint_every == 0:
                        path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                        print("Saved model checkpoint to {}\n".format(path))
    
train_kfold()

Train: [ 5331  5332  5333 ..., 10659 10660 10661] | test: [   0    1    2 ..., 5328 5329 5330]
3 (?, 1, 1, 384) (?, 384)
INFO:tensorflow:Summary name embedding/W:0/grad/hist is illegal; using embedding/W_0/grad/hist instead.
INFO:tensorflow:Summary name embedding/W:0/grad/sparsity is illegal; using embedding/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv_maxpool_block_3/W:0/grad/hist is illegal; using conv_maxpool_block_3/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv_maxpool_block_3/W:0/grad/sparsity is illegal; using conv_maxpool_block_3/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv_maxpool_block_3/b_0:0/grad/hist is illegal; using conv_maxpool_block_3/b_0_0/grad/hist instead.
INFO:tensorflow:Summary name conv_maxpool_block_3/b_0:0/grad/sparsity is illegal; using conv_maxpool_block_3/b_0_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv_maxpool_block_4/W:0/grad/hist is illegal; using conv_maxpool_block_4/W_0/grad/hist instead.
INFO:tens

In [None]:
import tensorflow as tf

def conv_relu(input, kernel_shape, bias_shape):
    # Create variable named "weights".
    weights = tf.get_variable("weights", kernel_shape,
        initializer=tf.random_normal_initializer())
    # Create variable named "biases".
    biases = tf.get_variable("biases", bias_shape,
        initializer=tf.constant_initializer(0.0))
    conv = tf.nn.conv2d(input, weights,
        strides=[1, 1, 1, 1], padding='SAME')
    return tf.nn.relu(conv + biases)


def my_image_filter(input_images):
    with tf.variable_scope("conv1"):
        # Variables created here will be named "conv1/weights", "conv1/biases".
        relu1 = conv_relu(input_images, [5, 5, 32, 32], [32])
    with tf.variable_scope("conv2"):
        # Variables created here will be named "conv2/weights", "conv2/biases".
        return conv_relu(relu1, [5, 5, 32, 32], [32])

input1 = tf.random_normal([1,10,10,32])
input2 = tf.random_normal([1,20,20,32])

x = conv_relu(input1, kernel_shape=[5, 5, 32, 32], bias_shape=[32])
x = conv_relu(x, kernel_shape=[5, 5, 32, 32], bias_shape = [32])  # This fails.

with tf.variable_scope("model"):
  output1 = my_image_filter(input1)
with tf.variable_scope("model", reuse=True):
  output2 = my_image_filter(input2)



In [None]:
import tensorflow as tf
import pdb

def main():

    ## !!! change this to test the different behaviors !!!
    #optimizer = tf.train.GradientDescentOptimizer(1e-3)                 # This one is working
    optimizer = tf.train.AdamOptimizer(1e-3, beta1=0.9, beta2=0.999999) # This one is not working
    #optimizer = tf.train.AdagradOptimizer(1e-3)                         # This one is not working
    #optimizer = tf.train.AdadeltaOptimizer(1e-3)                        # This one is not working

    list_grads = []
    with tf.variable_scope(tf.get_variable_scope()) as scope:

        for i in range(2):
            with tf.name_scope('%d' % i) as scope:
                    W = tf.get_variable(name="filter", initializer=tf.random_uniform_initializer(dtype=tf.float32), shape=[5, 1])
                    X = tf.get_variable(name="data", initializer=tf.random_uniform_initializer(dtype=tf.float32), shape=[5, 1])
                    Y_ = tf.get_variable(name="out", initializer=tf.random_uniform_initializer(dtype=tf.float32), shape=[5, 1])
                    Y = W+X
                    loss =tf.reduce_mean(Y-Y_)
                    grad = optimizer.compute_gradients(loss)
                    list_grads.append(grad)

                    tf.get_variable_scope().reuse_variables()
    
    grads = list_grads[0] + list_grads[1]
    #pdb.set_trace()

    op_train = optimizer.apply_gradients(grads)

    init_global = tf.global_variables_initializer()
    init_local =  tf.local_variables_initializer()

    sess = tf.Session()
    sess.run([init_global, init_local])

    _, sol = sess.run([op_train, loss])
    print(str(sol))

if (__name__ == '__main__'):
    main()

In [None]:
b = TextCNN(sequence_length=200, num_classes=10, vocab_size=200, embedding_size=100, filter_sizes=[5],  num_filters=100)