In [5]:
import os
import time
import json
import datetime

import numpy as np
import pickle as pckl
import tensorflow as tf

from tensorflow.contrib import learn
from sklearn.preprocessing import LabelBinarizer

In [2]:
alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}\n"

class YelpDataset():    
    
    def __init__(self, path):
        self.path = path

    def __len__(self):
        return len(self.dataset)
   
    def load(self):
        
        x, y = self.generate_data()
        
        print("X: {}".format(x.shape))
        print("Y: {}".format(y.shape))
        
        return x, y
    
    def generate_data(self):
        x = []
        y = []
        with open(self.path) as dfile:
            count = 0
            
            for line in dfile:
                review = json.loads(line)
                stars = review["stars"]
                text = review["text"]
                
                # Non neutral reviews
                if stars != 3:
                    clipped = self.clip_seq(list(text.lower()))
                    padded = self.pad_seq(clipped)
                    int_seq = self.str_to_int8(padded)
                    if stars == 1 or stars == 2:
                        x.append(int_seq)
                        y.append([1, 0])
                    elif stars == 4 or stars == 5:
                        x.append(int_seq)
                        y.append([0, 1])
                    count += 1
                    if count % 1000 == 0:
                        print("{} non-neutral instances processed".format(count))
                        break
        return np.array(x), np.array(y)


    def clip_seq(self, char_seq):
        if len(char_seq) > 1014:
            char_seq = char_seq[-1014:]
        return char_seq


    def pad_seq(self, char_seq, seq_length=1014, pad_char=" "):
        pad_width = seq_length - len(char_seq)
        padded_seq = char_seq + [pad_char] * pad_width
        return padded_seq


    def str_to_int8(self, char_seq):
        return np.array([alphabet.find(char) for char in char_seq], dtype=np.int8)


def one_hot_x(x, y, start_idx, end_idx):
    x_batch = x[start_idx:end_idx]
    y_batch = y[start_idx:end_idx]
    one_hot_batch = []
    
    binarizer = LabelBinarizer()
    binarizer.fit(range(len(alphabet)))
    
    for x in x_batch:
        one_hot_batch.append(binarizer.transform(x))
    one_hot_batch = np.array([one_hot_batch])
    x_batch = np.transpose(one_hot_batch, (1, 3, 2, 0))
    return x_batch, y_batch

def batch_iter(x, y, batch_size, num_epochs, shuffle=True):
    """
    Generates a batch iterator for a dataset.
    """
    print ("Generating batch iterator ...")
    data_size = len(x)
    num_batches_per_epoch = int(data_size/batch_size) + 1
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            x_shuff = x[shuffle_indices]
            y_shuff = y[shuffle_indices]
        else:
            x_shuff = x
            y_shuff = y
        for batch_num in range(num_batches_per_epoch):
            start_idx = batch_num * batch_size
            end_idx = min((batch_num + 1) * batch_size, data_size)
            x_batch, y_batch = one_hot_x(x_shuff, y_shuff, start_idx, end_idx)
            yield list(zip(x_batch, y_batch))

In [3]:
class CharCNN(object):
    """
    A CNN for text classification.
    based on the Character-level Convolutional Networks for Text Classification paper.
    """
    def __init__(self, sequence_length, quantization_size, num_classes, filter_sizes, num_filters, 
        learning_rate, l2_reg_lambda=0.0, jac_reg=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.float32, [None, quantization_size, sequence_length, 1], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # ================ Layer 1 ================
        with tf.name_scope("conv-maxpool-1"):
            filter_shape = [quantization_size, filter_sizes[0], 1, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.05), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(self.input_x, W, strides=[1, 1, 1, 1], padding="VALID", name="conv1")
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, 1, 3, 1],
                strides=[1, 1, 3, 1],
                padding='VALID',
                name="pool1")

        # ================ Layer 2 ================
        with tf.name_scope("conv-maxpool-2"):
            filter_shape = [1, filter_sizes[1], num_filters, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.05), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(pooled, W, strides=[1, 1, 1, 1], padding="VALID", name="conv2")
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, 1, 3, 1],
                strides=[1, 1, 3, 1],
                padding='VALID',
                name="pool2")

        # ================ Layer 3 ================
        with tf.name_scope("conv-3"):
            filter_shape = [1, filter_sizes[2], num_filters, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.05), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(pooled, W, strides=[1, 1, 1, 1], padding="VALID", name="conv3")
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

        # ================ Layer 4 ================
        with tf.name_scope("conv-4"):
            filter_shape = [1, filter_sizes[3], num_filters, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.05), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(h, W, strides=[1, 1, 1, 1], padding="VALID", name="conv4")
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

        # ================ Layer 5 ================
        with tf.name_scope("conv-5"):
            filter_shape = [1, filter_sizes[4], num_filters, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.05), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(h, W, strides=[1, 1, 1, 1], padding="VALID", name="conv5")
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

        # ================ Layer 6 ================
        with tf.name_scope("conv-maxpool-6"):
            filter_shape = [1, filter_sizes[5], num_filters, num_filters]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.05), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            conv = tf.nn.conv2d(h, W, strides=[1, 1, 1, 1], padding="VALID", name="conv6")
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, 1, 3, 1],
                strides=[1, 1, 3, 1],
                padding='VALID',
                name="pool6")

        # ================ Layer 7 ================
        feature_vec_length = 34 * num_filters
        h_pool_flat = tf.reshape(pooled, [-1, feature_vec_length])

        # Add dropout
        with tf.name_scope("dropout-1"):
            drop1 = tf.nn.dropout(h_pool_flat, self.dropout_keep_prob)

        # Fully connected layer 1
        with tf.name_scope("fc-1"):
            W = tf.Variable(tf.truncated_normal([feature_vec_length, 1024], stddev=0.05), name="W")
            # W = tf.get_variable("W", shape=[num_features_total, 1024],
            #                     initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[1024]), name="b")
            # l2_loss += tf.nn.l2_loss(W)
            # l2_loss += tf.nn.l2_loss(b)

            fc_1_output = tf.nn.relu(tf.nn.xw_plus_b(drop1, W, b), name="fc-1-out")

        # ================ Layer 8 ================
        # Add dropout
        with tf.name_scope("dropout-2"):
            drop2 = tf.nn.dropout(fc_1_output, self.dropout_keep_prob)

        # Fully connected layer 2
        with tf.name_scope("fc-2"):
            W = tf.Variable(tf.truncated_normal([1024, 1024], stddev=0.05), name="W")
            # W = tf.get_variable("W", shape=[1024, 1024],
            #                     initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[1024]), name="b")
            # l2_loss += tf.nn.l2_loss(W)
            # l2_loss += tf.nn.l2_loss(b)

            fc_2_output = tf.nn.relu(tf.nn.xw_plus_b(drop2, W, b), name="fc-2-out")

        # ================ Layer 9 ================
        # Fully connected layer 3
        with tf.name_scope("fc-3"):
            W = tf.Variable(tf.truncated_normal([1024, num_classes], stddev=0.05), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            # l2_loss += tf.nn.l2_loss(W)
            # l2_loss += tf.nn.l2_loss(b)

            scores = tf.nn.xw_plus_b(fc_2_output, W, b, name="output")
            predictions = tf.argmax(scores, 1, name="predictions")
        # ================ Loss and Accuracy ================
        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

In [6]:
# Load dataset
print ("Loading Dataset ...")

pcklfile = "./data/dump.pckl"

if not os.path.isfile(pcklfile):
    print ("No data dump found. Pickling dataset ...")
    dataset = YelpDataset('./data/review.json')
    X, Y = dataset.load()
    pckl.dump([X, Y], open(pcklfile, "wb"))
else:
    X, Y = pckl.load(open(pcklfile, "rb"))

print ("Dataset loaded. Preparing data ...")

Loading Dataset ...
No data dump found. Pickling dataset ...
1000 non-neutral instances processed
X: (1000, 1014)
Y: (1000, 2)
Dataset loaded. Preparing data ...


In [9]:
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(Y)))

x_shuff = X[shuffle_indices]
y_shuff = Y[shuffle_indices]

# Percentage of the training data to use for validation
val_sample = .2

# Split train/test set
idx = -1 * int(val_sample * float(len(Y)))
x_train, x_val = x_shuff[:idx], x_shuff[idx:]
y_train, y_val = y_shuff[:idx], y_shuff[idx:]
print("Train/Val split: {:d}/{:d}".format(len(y_train), len(y_val)))

Train/Val split: 800/200


In [10]:
sequence_length = 1014
quantization_size = 70
num_classes = 2

# Model parameters
filter_sizes = (7, 7, 3, 3, 3, 3)
num_filters = 256
l2_reg_lambda = 0.0
jac_reg = 0.0

# Training parameters
batch_size = 128
num_epochs = 50
learning_rate = 1e-3
checkpoint_every = 1000
validate_every = 5000
num_checkpoints = 3

In [71]:
print("Starting training ...")

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=True,
      log_device_placement=False)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = CharCNN(
            sequence_length=sequence_length,
            quantization_size=quantization_size,
            num_classes=num_classes,
            filter_sizes=filter_sizes,
            num_filters=num_filters,
            learning_rate=learning_rate,
            l2_reg_lambda=l2_reg_lambda,
            jac_reg=jac_reg)
        
        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)             
                
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
      
        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", cnn.loss)
        acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

        # Train Summaries
        train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Val summaries
        val_summary_op = tf.summary.merge([loss_summary, acc_summary])
        val_summary_dir = os.path.join(out_dir, "summaries", "val")
        val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=num_checkpoints)

        # Initialize all variables
        sess.run(tf.global_variables_initializer())
               
        def train_step(x_batch, y_batch):
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: 0.5
            }
            _, step, summaries, loss, accuracy = sess.run(
                [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

        def validation_step(x_batch, y_batch, writer=None):
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: 1.0
            }
            _, step, summaries, loss, accuracy = sess.run(
                [train_op, global_step, val_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            if writer:
                writer.add_summary(summaries, step)
        
        batches = batch_iter(x_train, y_train, batch_size, num_epochs)
        
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            
            if current_step % validate_every == 0:
                print("\nValidation: ")
                validation_step(x_val, y_val, writer=val_summary_writer)

            if current_step % checkpoint_every == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))

Starting training ...
Dimensions:
INFO:tensorflow:Summary name conv-maxpool-1/W:0/grad/hist is illegal; using conv-maxpool-1/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-1/W:0/grad/hist is illegal; using conv-maxpool-1/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-1/W:0/grad/sparsity is illegal; using conv-maxpool-1/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-1/W:0/grad/sparsity is illegal; using conv-maxpool-1/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-1/b:0/grad/hist is illegal; using conv-maxpool-1/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-1/b:0/grad/hist is illegal; using conv-maxpool-1/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-1/b:0/grad/sparsity is illegal; using conv-maxpool-1/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-1/b:0/grad/sparsity is illegal; using conv-maxpool-1/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-2/W:0/grad/hist is illegal; using conv-maxpool-2/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-2/W:0/grad/hist is illegal; using conv-maxpool-2/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-2/W:0/grad/sparsity is illegal; using conv-maxpool-2/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-2/W:0/grad/sparsity is illegal; using conv-maxpool-2/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-2/b:0/grad/hist is illegal; using conv-maxpool-2/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-2/b:0/grad/hist is illegal; using conv-maxpool-2/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-2/b:0/grad/sparsity is illegal; using conv-maxpool-2/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-2/b:0/grad/sparsity is illegal; using conv-maxpool-2/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-3/W:0/grad/hist is illegal; using conv-3/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-3/W:0/grad/hist is illegal; using conv-3/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-3/W:0/grad/sparsity is illegal; using conv-3/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-3/W:0/grad/sparsity is illegal; using conv-3/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-3/b:0/grad/hist is illegal; using conv-3/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-3/b:0/grad/hist is illegal; using conv-3/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-3/b:0/grad/sparsity is illegal; using conv-3/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-3/b:0/grad/sparsity is illegal; using conv-3/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-4/W:0/grad/hist is illegal; using conv-4/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-4/W:0/grad/hist is illegal; using conv-4/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-4/W:0/grad/sparsity is illegal; using conv-4/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-4/W:0/grad/sparsity is illegal; using conv-4/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-4/b:0/grad/hist is illegal; using conv-4/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-4/b:0/grad/hist is illegal; using conv-4/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-4/b:0/grad/sparsity is illegal; using conv-4/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-4/b:0/grad/sparsity is illegal; using conv-4/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-5/W:0/grad/hist is illegal; using conv-5/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-5/W:0/grad/hist is illegal; using conv-5/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-5/W:0/grad/sparsity is illegal; using conv-5/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-5/W:0/grad/sparsity is illegal; using conv-5/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-5/b:0/grad/hist is illegal; using conv-5/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-5/b:0/grad/hist is illegal; using conv-5/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-5/b:0/grad/sparsity is illegal; using conv-5/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-5/b:0/grad/sparsity is illegal; using conv-5/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-6/W:0/grad/hist is illegal; using conv-maxpool-6/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-6/W:0/grad/hist is illegal; using conv-maxpool-6/W_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-6/W:0/grad/sparsity is illegal; using conv-maxpool-6/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-6/W:0/grad/sparsity is illegal; using conv-maxpool-6/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-6/b:0/grad/hist is illegal; using conv-maxpool-6/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-6/b:0/grad/hist is illegal; using conv-maxpool-6/b_0/grad/hist instead.


INFO:tensorflow:Summary name conv-maxpool-6/b:0/grad/sparsity is illegal; using conv-maxpool-6/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name conv-maxpool-6/b:0/grad/sparsity is illegal; using conv-maxpool-6/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-1/W:0/grad/hist is illegal; using fc-1/W_0/grad/hist instead.


INFO:tensorflow:Summary name fc-1/W:0/grad/hist is illegal; using fc-1/W_0/grad/hist instead.


INFO:tensorflow:Summary name fc-1/W:0/grad/sparsity is illegal; using fc-1/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-1/W:0/grad/sparsity is illegal; using fc-1/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-1/b:0/grad/hist is illegal; using fc-1/b_0/grad/hist instead.


INFO:tensorflow:Summary name fc-1/b:0/grad/hist is illegal; using fc-1/b_0/grad/hist instead.


INFO:tensorflow:Summary name fc-1/b:0/grad/sparsity is illegal; using fc-1/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-1/b:0/grad/sparsity is illegal; using fc-1/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-2/W:0/grad/hist is illegal; using fc-2/W_0/grad/hist instead.


INFO:tensorflow:Summary name fc-2/W:0/grad/hist is illegal; using fc-2/W_0/grad/hist instead.


INFO:tensorflow:Summary name fc-2/W:0/grad/sparsity is illegal; using fc-2/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-2/W:0/grad/sparsity is illegal; using fc-2/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-2/b:0/grad/hist is illegal; using fc-2/b_0/grad/hist instead.


INFO:tensorflow:Summary name fc-2/b:0/grad/hist is illegal; using fc-2/b_0/grad/hist instead.


INFO:tensorflow:Summary name fc-2/b:0/grad/sparsity is illegal; using fc-2/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-2/b:0/grad/sparsity is illegal; using fc-2/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-3/W:0/grad/hist is illegal; using fc-3/W_0/grad/hist instead.


INFO:tensorflow:Summary name fc-3/W:0/grad/hist is illegal; using fc-3/W_0/grad/hist instead.


INFO:tensorflow:Summary name fc-3/W:0/grad/sparsity is illegal; using fc-3/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-3/W:0/grad/sparsity is illegal; using fc-3/W_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-3/b:0/grad/hist is illegal; using fc-3/b_0/grad/hist instead.


INFO:tensorflow:Summary name fc-3/b:0/grad/hist is illegal; using fc-3/b_0/grad/hist instead.


INFO:tensorflow:Summary name fc-3/b:0/grad/sparsity is illegal; using fc-3/b_0/grad/sparsity instead.


INFO:tensorflow:Summary name fc-3/b:0/grad/sparsity is illegal; using fc-3/b_0/grad/sparsity instead.


Writing to /home/aayush/char-level-cnn/runs/1512759807

Generating batch iterator ...
2017-12-09T00:33:39.312370: step 1, loss 1.81456, acc 0.40625
2017-12-09T00:33:49.690599: step 2, loss 20.8427, acc 0.804688
2017-12-09T00:33:59.519883: step 3, loss 6.29215, acc 0.789062
2017-12-09T00:34:10.259297: step 4, loss 1.34167, acc 0.804688
2017-12-09T00:34:21.118806: step 5, loss 0.90234, acc 0.382812
2017-12-09T00:34:32.246150: step 6, loss 0.547548, acc 0.796875
2017-12-09T00:34:35.967202: step 7, loss 0.812425, acc 0.75
2017-12-09T00:34:50.184814: step 8, loss 0.467912, acc 0.8125
2017-12-09T00:35:04.641061: step 9, loss 0.606213, acc 0.75
2017-12-09T00:35:17.318532: step 10, loss 0.545974, acc 0.804688
2017-12-09T00:35:30.033896: step 11, loss 0.614976, acc 0.789062
2017-12-09T00:35:43.308117: step 12, loss 0.54714, acc 0.796875
2017-12-09T00:35:54.887713: step 13, loss 0.6652, acc 0.773438
2017-12-09T00:35:58.696428: step 14, loss 0.554933, acc 0.8125
2017-12-09T00:36:12.206746: step 1

KeyboardInterrupt: 