In [290]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
import tensorflow as tf
import utils.ymr_data as ymr
from sklearn import metrics
from sklearn.cross_validation import train_test_split
import math
import time
import itertools
import pickle
import datetime

In [98]:
NUM_EPOCHS = 100
SENTENCE_LENGTH_PADDED=512
EMBEDDING_SIZE = 150
BATCH_SIZE=128
EVALUATE_DEV_EVERY=16
L1_NUM_FILTERS = 150
L1_FILTER_SIZES = [2,3,4]
CHECKPOINTS_DIR = "./checkpoints/"
TRAIN_SUMMARY_DIR = "./summaries/train"
DEV_SUMMARY_DIR = "./summaries/dev"

PADDING_CHARACTER =  u"\u0000"
NUM_CLASSES=6

In [99]:
# Load data
df = ymr.load()

# Preprocessing: Pad all sentences
df.text = df.text.str.slice(0,SENTENCE_LENGTH_PADDED).str.ljust(SENTENCE_LENGTH_PADDED, PADDING_CHARACTER)

# Generate vocabulary and dataset
vocab, vocab_inv = ymr.vocab(df)
data = ymr.make_polar(df)
train, test = ymr.train_test_split(data)
train_x, train_y_ = ymr.make_xy(train, vocab)
test_x, test_y_ = ymr.make_xy(test, vocab)

VOCABULARY_SIZE = len(vocab)

# Convert ys to probability distribution
train_y = np.zeros((len(train_y_), NUM_CLASSES))
train_y[np.arange(len(train_y_)), train_y_] = 1.
test_y = np.zeros((len(test_y_), NUM_CLASSES))
test_y[np.arange(len(test_y_)), test_y_] = 1.

# Use a dev set
train_x, dev_x, train_y, dev_y = train_test_split(train_x, train_y, test_size=0.05)

print("Training set size: %d" % len(train_y))
print("Dev set size: %d" % len(dev_y))
print("Test set size: %d" % len(test_y))

Training set size: 29017
Dev set size: 1528
Test set size: 7637


In [151]:
class CharCNN:
    def __init__(self, vocabulary_size, embedding_size=128, filters_sizes=[1, 2, 3], num_filters=128,
                affine_dim=256, dropout_keep_prob=0.5, num_gpus=1, optimizer=None):
        self.vocabulary_size = vocabulary_size
        self.embedding_size = embedding_size
        self.filters_sizes = filters_sizes
        self.num_filters = num_filters
        self.affine_dim = affine_dim
        self.dropout_keep_prob = dropout_keep_prob
        self.num_gpus = num_gpus
        self.optimizer = optimizer if optimizer else tf.train.AdamOptimizer(1e-4)
        # Assigned when building the graph
        self.tensors = {}

    def build_input_batches(self, train_x, train_y, num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE):
        """
        Builds a graph that stores the input in memory and uses queues
        to slice it into bactches.

        Returns a node representing batches of x and y.
        """
        # Store the data in graph notes
        train_x_const = tf.constant(train_x.astype("int32"))
        train_y_const = tf.constant(train_y.astype("float32"))
        # Use Tensorflow's queues and batching features
        x_slice, y_slice = tf.train.slice_input_producer([train_x_const, train_y_const], num_epochs=num_epochs)
        x, y = tf.train.batch([x_slice, y_slice], batch_size=BATCH_SIZE)
        return [x, y]

    def build_embedding_layer(self, shape, input_tensor):
        """
        Builds an embedding layer.

        Returns the final embedding.
        """
        # We force this on the CPU because the op isn't implemented for the GPU yet
        with tf.device('/cpu:0'):
            W_intializer = tf.random_uniform(shape, -1.0, 1.0)
            W_embeddings = tf.Variable(W_intializer, name="W")
            return tf.nn.embedding_lookup(W_embeddings, input_tensor)

    def build_conv_maxpool(self, filter_shape, pool_shape, input_tensor):
        """
        Builds a convolutional layer followed by a max-pooling layer.
        """
        W = tf.get_variable("W", filter_shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
        b = tf.get_variable("b", filter_shape[-1], initializer=tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(input_tensor, W, strides=[1, 1, 1, 1], padding="VALID")
        h = tf.nn.relu(conv + b, name="conv")
        return tf.nn.max_pool(h, ksize=pool_shape, strides=[1, 1, 1, 1], padding='VALID', name="pool")

    def build_affine(self, shape, input_tensor):
        """
        Builds an affine (fully-connected) layer
        """
        W = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=shape[-1:]), name="b")
        h = tf.nn.relu(tf.matmul(input_tensor, W) + b, name="h")
        return h

    def build_softmax(self, shape, input_tensor):
        """
        Builds a softmax layer
        """
        W = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=shape[-1:]), name="b")
        return tf.nn.softmax(tf.matmul(input_tensor, W) + b, name="y")

    def inference(self, x, labels):
        """
        Builds the graph and returns the final prediction.
        """
        
        sequence_length = x.get_shape().as_list()[1]
        num_classes = labels.get_shape().as_list()[1]

        with tf.variable_scope("embedding"):
            embedded_chars = self.build_embedding_layer([self.vocabulary_size, self.embedding_size], x)
            embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter
        pooled_outputs = []
        for i, filter_size in enumerate(self.filters_sizes):
            with tf.variable_scope("conv-maxpool-%s" % filter_size):
                with tf.device("/gpu:%d" % (i % self.num_gpus)):
                    filter_shape = [filter_size, self.embedding_size, 1, self.num_filters]
                    pool_shape = [1, sequence_length - filter_size + 1, 1, 1]
                    pooled = self.build_conv_maxpool(filter_shape, pool_shape, embedded_chars_expanded)
                    pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = self.num_filters * len(self.filters_sizes)
        h_pool = tf.concat(3, pooled_outputs)
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

        # Affine Layer with dropout
        with tf.variable_scope("affine"):
            h_affine = self.build_affine([num_filters_total, self.affine_dim], h_pool_flat)
        h_affine_drop = tf.nn.dropout(h_affine, self.dropout_keep_prob)

        # Softmax Layer (Final output)
        with tf.variable_scope("softmax"):
            y = self.build_softmax([self.affine_dim, num_classes], h_affine_drop)
        
        return y
        
    def loss(self, predictions, labels):
        """
        Calculates the mean cross-entropy loss
        """
        return -tf.reduce_mean(labels * tf.log(predictions), name="loss")
    
    def accuracy(self, y, labels):
        """
        Returns accuracy tensor
        """
        correct_predictions = tf.equal(tf.argmax(y, 1), tf.argmax(labels, 1), name="correct_predictions")
        return tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

    def train(self, loss, global_step):
        """
        Returns train op
        """
        return self.optimizer.minimize(loss, global_step=global_step)

    def print_parameters(self):
        print "\nParameters:"
        print("----------")
        total_parameters = 0
        for v in tf.trainable_variables():
            num_parameters = v.get_shape().num_elements()
            print("{}: {:,}".format(v.name, num_parameters))
            total_parameters += num_parameters
        print("Total Parameters: {:,}\n".format(total_parameters))      



In [274]:
class GraphSerDe:
    
    def serialize(self, dirname, graph_def):
        graph_file = os.path.join(dirname, "graph.pb")
        varfile = os.path.join(dirname, "variables.pk1")
        # Write graph
        tf.train.write_graph(graph_def, dirname, "graph.pb", as_text=False)
        print("Wrote GraphDef to {}".format(graph_file))
        # Write variables
        var_names = [v.name for v in tf.all_variables()]
        with open(varfile, "wb") as f:
            pickle.dump(var_names, f)
        print("Wrote variables to {}".format(varfile))
    
    def deserialize(self, dirname):
        graph_file = os.path.join(dirname, "graph.pb")
        varfile = os.path.join(dirname, "variables.pk1")
        with open(graph_file, "rb") as f:
            graph_def = tf.GraphDef.FromString(f.read())
        with open(varfile, "rb") as f:
            var_names = pickle.load(f)
        return [graph_def, var_names]

In [270]:
import tempfile

print

/var/folders/5z/1fqtfnl112q6_595rkrfh6z80000gn/T/tmpzL0GS4


In [287]:
tmpdir =  tempfile.mkdtemp() + "-graph"
with tf.Graph().as_default():
    a = tf.Variable(tf.truncated_normal([5,5], stddev=0.1), name="W")
    print [v.name for v in tf.all_variables()]
    serde = GraphSerDe()
    ser.serialize(tmpdir, tf.get_default_graph().as_graph_def())
    
with tf.Graph().as_default():
    print [v.name for v in tf.all_variables()]
    serde = GraphSerDe()
    graph_def, var_names = serde.deserialize(tmpdir)
    variables = tf.import_graph_def(graph_def, return_elements=var_names)
    for v in variables:
        v.name = v.name.replace("import/", "")
    print [v.name for v in variables]

[u'W:0']
Wrote GraphDef to /var/folders/5z/1fqtfnl112q6_595rkrfh6z80000gn/T/tmpBvi89g-graph/graph.pb
Wrote variables to /var/folders/5z/1fqtfnl112q6_595rkrfh6z80000gn/T/tmpBvi89g-graph/variables.pk1
[]


AttributeError: can't set attribute

In [293]:
class Trainer:
    def __init__(self, train_op, global_step, summary_op, eval_feed_dict,
                 save_every=10, evaluate_every=10):
        self.train_op = train_op
        self.global_step = global_step
        self.summary_op = summary_op
        self.save_every = save_every
        self.evaluate_every = evaluate_every
        self.eval_feed_dict = eval_feed_dict
        
        # All data goes in here
        RUNDIR = "./runs/%s" % int(time.time())
        
        # Write graph
        # gsd = GraphSerDe()
        # gsd.serialize("%s/graph" % RUNDIR)      
        
        # Initialize summary writers
        self.train_writer = tf.train.SummaryWriter("%s/summaries/train" % RUNDIR)
        self.eval_writer = tf.train.SummaryWriter("%s/summaries/eval" % RUNDIR)
        
        # Initialize saver
        self.save_prefix = "%s/checkpoints/model" % RUNDIR
        if not os.path.exists(os.path.dirname(self.save_prefix)):
            os.makedirs(os.path.dirname(self.save_prefix))    
        self.saver = tf.train.Saver(tf.all_variables())        
    
    def evaluate(self):
        summaries_, global_step_ = sess.run([summary_op,self.global_step], feed_dict=self.eval_feed_dict)
        self.eval_writer.add_summary(summaries_, global_step_)
        # Print summaries
        print("\n========== Evaluation ==========")
        summary_obj = tf.Summary.FromString(summaries_)
        interesting_summaries = [v for v in summary_obj.value if not "queue/" in v.tag]
        print "\n".join(["{}: {:f}".format(v.tag, v.simple_value) for v in interesting_summaries])
        print("")
    
    def step(self):
        sess = tf.get_default_session()
        # Run training step
        _, global_step_, summaries_ = sess.run([self.train_op, self.global_step, self.summary_op])
        print("{}: Step {}".format(datetime.datetime.now().isoformat(), global_step_))
        # Write summary
        self.train_writer.add_summary(summaries_, global_step_)
        # Maybe save
        if global_step_ % SAVE_EVERY == 0:
            save_path = saver.save(sess, self.save_prefix, global_step_)
            print("\nSaved model parameters to %s" % save_path)
        # Maybe evaluate
        if global_step_ % EVALUATE_EVERY == 0:
            self.evaluate()
            

In [294]:
with tf.Graph().as_default():
    cnn = CharCNN(VOCABULARY_SIZE)
    
    # Generate input batches
    with tf.variable_scope("input"):
        x, labels = cnn.build_input_batches(train_x, train_y)
    
    # Generate predictions
    predictions = cnn.inference(x, labels)
    
    # Loss
    with tf.variable_scope("loss"):
        loss = cnn.loss(predictions, labels)
    
    # Train
    global_step = tf.Variable(0, name="global_step")
    train_op = cnn.train(loss, global_step)
    
    # Summaries
    tf.scalar_summary("loss", loss)
    tf.scalar_summary("accuracy", cnn.accuracy(predictions, labels))
    summary_op = tf.merge_all_summaries()

    eval_feed_dict = { x: dev_x, labels: dev_y }
    trainer = Trainer(train_op, global_step, summary_op, eval_feed_dict)
    
    # Create an run session
    step = 0
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    with sess.as_default():
        # Initialize
        sess.run(tf.initialize_all_variables())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        try:
            while not coord.should_stop():
                trainer.step()
        except tf.errors.OutOfRangeError:
            print("Done!")
        finally:
            coord.request_stop()      
        coord.join(threads)

2015-11-21T00:53:03.190714: Step 1
2015-11-21T00:53:05.248097: Step 2
2015-11-21T00:53:07.203884: Step 3
2015-11-21T00:53:09.140016: Step 4
2015-11-21T00:53:11.096717: Step 5

Saved model to ./runs/1448063580/checkpoints/model-5

loss: 0.753661
accuracy: 0.263743

2015-11-21T00:53:23.602652: Step 6
2015-11-21T00:53:25.711613: Step 7
2015-11-21T00:53:27.784368: Step 8
2015-11-21T00:53:29.752793: Step 9
2015-11-21T00:53:31.696584: Step 10

Saved model to ./runs/1448063580/checkpoints/model-10


KeyboardInterrupt: 