In [1]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
import tensorflow as tf
import utils.ymr_data as ymr
from sklearn import metrics
from sklearn.cross_validation import train_test_split
import math
import itertools

In [None]:
NUM_EPOCHS = 100
SENTENCE_LENGTH_PADDED=512
EMBEDDING_SIZE = 150
BATCH_SIZE=128
EVALUATE_DEV_EVERY=16
L1_NUM_FILTERS = 150
L1_FILTER_SIZES = [2,3,4]
CHECKPOINTS_DIR = "./checkpoints/"
TRAIN_SUMMARY_DIR = "./summaries/train"
DEV_SUMMARY_DIR = "./summaries/dev"

PADDING_CHARACTER =  u"\u0000"
NUM_CLASSES=6

In [None]:
# Load data
df = ymr.load()

In [None]:
# Preprocessing: Pad all sentences
df.text = df.text.str.slice(0,SENTENCE_LENGTH_PADDED).str.ljust(SENTENCE_LENGTH_PADDED, PADDING_CHARACTER)

In [None]:
# Generate vocabulary and dataset
vocab, vocab_inv = ymr.vocab(df)
data = ymr.make_polar(df)
train, test = ymr.train_test_split(data)
train_x, train_y_ = ymr.make_xy(train, vocab)
test_x, test_y_ = ymr.make_xy(test, vocab)

VOCABULARY_SIZE = len(vocab)

# Convert ys to probability distribution
train_y = np.zeros((len(train_y_), NUM_CLASSES))
train_y[np.arange(len(train_y_)), train_y_] = 1.
test_y = np.zeros((len(test_y_), NUM_CLASSES))
test_y[np.arange(len(test_y_)), test_y_] = 1.

# Use a dev set
train_x, dev_x, train_y, dev_y = train_test_split(train_x, train_y, test_size=0.05)

# BATCH
SPLIT_SIZE = math.ceil(len(train_x)/float(BATCH_SIZE))
train_x_batched = np.array_split(train_x, SPLIT_SIZE)
train_y_batched = np.array_split(train_y, SPLIT_SIZE)

In [None]:
print("Training set size: %d" % len(train_y))
print("Dev set size: %d" % len(dev_y))
print("Test set size: %d" % len(test_y))

In [None]:
# Build the graph
# ==================================================

shape_variables = []

def build_graph():
    
    # Network inputs and output
    x = tf.placeholder(tf.int32, shape=[None, SENTENCE_LENGTH_PADDED], name="x")
    y_ = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES], name="y")

    # Layer 1: Embedding
    with tf.name_scope("embedding") as scope:
        W_embeddings = tf.Variable(tf.random_uniform([VOCABULARY_SIZE, EMBEDDING_SIZE], -1.0, 1.0), name="W_embeddings")
        embed = tf.nn.embedding_lookup(W_embeddings, x)
        # Add a dimension corresponding to the channel - it's expected by the conv layer
        embed_expanded = tf.expand_dims(embed, -1)
        shape_variables.append(("Embedding", tf.shape(embed_expanded, name="embed_shape")))
    
    # Convolutional filters
    def build_convpool(filter_size, num_filters):
        W = tf.get_variable("weights", [filter_size, EMBEDDING_SIZE, 1, num_filters],
                       initializer=tf.truncated_normal_initializer(stddev=0.1))
        b = tf.get_variable("bias", [num_filters], initializer=tf.constant_initializer(0.1))
        conv_tmp = tf.nn.conv2d(embed_expanded, W, strides=[1, 1, 1, 1], padding='VALID')
        h_conv = tf.nn.relu(conv_tmp + b)
        pooled = tf.nn.max_pool(h_conv, 
            ksize=[1, SENTENCE_LENGTH_PADDED-filter_size+1, 1, 1],
            strides=[1, 1, 1, 1], padding='VALID')
        return pooled
    
    pooled_outputs = []
    total_filters = L1_NUM_FILTERS * len(L1_FILTER_SIZES)
    for filter_size in L1_FILTER_SIZES:
        with tf.variable_scope("conv-%s" % filter_size):
            # Layer 2: Simple Convolutional Layer
            with tf.name_scope("conv-%s" % filter_size):
                pooled = build_convpool(filter_size, L1_NUM_FILTERS)
                pooled_outputs.append(pooled)
                shape_variables.append(("Pooled Output (%s)" % filter_size, tf.shape(pooled)))
    
    # Combine all the pooled features
    h_pool = tf.concat(3, pooled_outputs)
    shape_variables.append(("Pooled Output Final", tf.shape(h_pool)))

    # Layer 4: Fully connected
    with tf.name_scope("affine_1") as scope: 
        h_pool1_flat = tf.reshape(h_pool, [-1, total_filters])
        W_fc1 = tf.Variable(tf.truncated_normal([total_filters, 256], stddev=0.1), name="W_fc1")
        b_fc1 = tf.Variable(tf.constant(0.1, shape=[256]), name="b_fc1")
        h_fc1 = tf.nn.relu(tf.matmul(h_pool1_flat, W_fc1) + b_fc1)
        shape_variables.append(("Pooled Flat", tf.shape(h_pool1_flat)))
        shape_variables.append(("Affine", tf.shape(h_fc1)))

    # Dropout
    with tf.name_scope("dropout1") as scope: 
        h_fc1_drop = tf.nn.dropout(h_fc1, 0.5)

    # Layer 5: Softmax / Readout
    with tf.name_scope("softmax") as scope: 
        W_fc2 = tf.Variable(tf.truncated_normal([256, NUM_CLASSES], stddev=0.1), name="W_fc2")
        b_fc2 =  tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]), name="b_fc2")
        y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    with tf.name_scope("loss") as scope: 
        cross_entropy = -tf.reduce_mean(y_ * tf.log(y_conv), name="crossentropy_sum")

    # Training procedure
    with tf.name_scope("accuracy") as scope: 
        correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")

    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    # Summaries
    ce_summary = tf.scalar_summary("cross-entropy", cross_entropy)
    accuracy_summary = tf.scalar_summary("accuracy", accuracy)
    summary_op = tf.merge_all_summaries()

In [None]:
build_graph()
s = tf.get_default_graph().as_graph_def()

In [22]:
g = tf.get_default_graph()

<tensorflow.python.framework.ops.Graph at 0x1062b8390>

In [23]:
g.get_operations()

[<tensorflow.python.framework.ops.Operation at 0x10e052510>,
 <tensorflow.python.framework.ops.Operation at 0x10e052590>,
 <tensorflow.python.framework.ops.Operation at 0x10e052650>,
 <tensorflow.python.framework.ops.Operation at 0x10e052790>,
 <tensorflow.python.framework.ops.Operation at 0x10e052690>,
 <tensorflow.python.framework.ops.Operation at 0x10e052d50>,
 <tensorflow.python.framework.ops.Operation at 0x10e06b310>,
 <tensorflow.python.framework.ops.Operation at 0x10e06b490>,
 <tensorflow.python.framework.ops.Operation at 0x10e06b550>,
 <tensorflow.python.framework.ops.Operation at 0x10e06b790>,
 <tensorflow.python.framework.ops.Operation at 0x10e06be50>,
 <tensorflow.python.framework.ops.Operation at 0x10e06bd50>,
 <tensorflow.python.framework.ops.Operation at 0x10e06bc50>,
 <tensorflow.python.framework.ops.Operation at 0x10e06bcd0>,
 <tensorflow.python.framework.ops.Operation at 0x10e07a090>,
 <tensorflow.python.framework.ops.Operation at 0x10e07a0d0>,
 <tensorflow.python.fram

In [32]:
with tf.Graph().as_default():
    with open("./runs/1448022103/graph/graph.pb", 'rb') as f:
        graph_def = tf.GraphDef.FromString(f.read()) 
        tf.import_graph_def(graph_def)


In [33]:
tf.variables.all_variables()

<function tensorflow.python.ops.variables.all_variables>

In [None]:
build_graph()
tf.get_default_graph()

In [None]:
tf.GraphDef.ListFields()

In [None]:
g = tf.get_default_graph()

In [None]:
g.a

In [None]:
def train_batch(batch_x, batch_y, step):
    feed_dict = { x: batch_x, y_ : batch_y}
    # Train
    _, loss = sess.run([train_step, cross_entropy], feed_dict=feed_dict)
    # print("step %d, train loss: %g" % (step, loss))
    # Summary
    train_summary_str = sess.run(summary_op, feed_dict=feed_dict)
    summary_writer_train.add_summary(train_summary_str, step)

def evaluate_dev(step):
    feed_dict = { x: dev_x, y_ :dev_y}
    # Evaluate
    dev_loss, dev_accuracy, dev_summary_str = sess.run([cross_entropy, accuracy, summary_op], feed_dict=feed_dict)
    print "step %d, dev loss %g"%(step, dev_loss)
    print "step %d, dev accuracy %g"%(step, dev_accuracy)
    # Write summary
    summary_writer_dev.add_summary(dev_summary_str, step)

def print_shapes(batch_x, batch_y):
    feed_dict = { x: batch_x, y_ : batch_y}
    names, vals = zip(*shape_variables)
    shapes = sess.run(vals, feed_dict=feed_dict)
    print("Shapes")
    print("-----")
    for k,v in zip(names, shapes):
        print("%s: %s" % (k,v))
    print("-----")

In [None]:
shape_variables = []
step = 0

with tf.Graph().as_default():
    build_graph()
    with tf.Session() as sess:
        # Initialize variables
        sess.run(tf.initialize_all_variables())
        # Print shapes
        print_shapes(train_x_batched[0], train_y_batched[0])
        # Initialize summary writers and savers
        summary_writer_train = tf.train.SummaryWriter(TRAIN_SUMMARY_DIR, graph_def=sess.graph_def)
        summary_writer_dev = tf.train.SummaryWriter(DEV_SUMMARY_DIR, graph_def=sess.graph_def)
        saver = tf.train.Saver()
        # For each epoch and batch...
        for epoch in range(NUM_EPOCHS):
            print("\nEpoch %d" % epoch)
            print("----------")
            # Save each epoch
            saver.save(sess, CHECKPOINTS_DIR, global_step=step)
            for i in range(len(train_x_batched)):
                train_batch(train_x_batched[i], train_y_batched[i], step)
                if(step % EVALUATE_DEV_EVERY == 0):
                    evaluate_dev(step)
                step += 1
            