In [1]:
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt
import pickle
%matplotlib inline

# Load toy data
train_data = np.load("data/toy_train.npz")
X_train = train_data["questions"]
X_train_mask = train_data["mask"]
y_train = train_data["answers"]

val_data = np.load("data/toy_val_questions.npz")
X_val = val_data["questions"]
X_val_mask = val_data["mask"]
with open ('data/toy_val_answers', 'rb') as fp:
    y_val = pickle.load(fp)
    
np_embeddings = np.load("data/glove.trimmed.100.npz")["glove"]

def most_common(lst):
    return max(set(lst), key=lst.count)
def y_first(y_all):
    y_f = []
    for answers  in y_all:
        y_f.append(most_common([val[0] for val in answers]))
    return np.array(y_f)
y_val = y_first(y_val)
print("y_val", y_val.shape)

print("X_train", X_train.shape)
print("X_train_mask", X_train_mask.shape)
print("y_train", y_train.shape)
print("X_val", X_val.shape)
print("X_val_mask", X_val_mask.shape)
print("y_val", y_val.shape)
print("np_embeddings", np_embeddings.shape)
# print(y_val[:4])


y_val (214354,)
X_train (443757, 20)
X_train_mask (443757,)
y_train (443757,)
X_val (214354, 20)
X_val_mask (214354,)
y_val (214354,)
np_embeddings (47382, 100)


In [None]:
print()

In [None]:
# clear old variables
tf.reset_default_graph()



# setup input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.int64, [None, 20])
y = tf.placeholder(tf.int64, [None])
mask = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)

def simple_model(X,y,mask,np_embeddings):
    num_hidden = 100
    
    with tf.variable_scope("embeddings"):
        embeddings = tf.get_variable(initializer=np_embeddings, name="L")
        
    X_embed = tf.nn.embedding_lookup(embeddings, X)  
    
    print(X_embed)
    
    gru_cell = tf.contrib.rnn.GRUCell(num_hidden)
    outputs, state = tf.nn.dynamic_rnn(cell=gru_cell,
                                       inputs=X_embed,
                                       sequence_length=mask,
                                       dtype=tf.float64)
    
    print("outputs", outputs)
    print("state", state)
    
    yout = tf.layers.dense(inputs=state, units=47382, activation=tf.nn.relu,
                           kernel_initializer=tf.contrib.layers.xavier_initializer())
    print("yout", yout)
    return yout


y_out = simple_model(X,y,mask,np_embeddings)


# define our loss
total_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_out,labels=y)
mean_loss = tf.reduce_mean(total_loss)

# define our optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(mean_loss)

In [None]:
def run_model(session, predict, loss_val, Xd, maskd, yd,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%X_train.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         mask: maskd[idx],
                         is_training: training_now }
            # get batch size
            actual_batch_size = yd[i:i+batch_size].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                      .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss,total_correct

with tf.Session() as sess:
    with tf.device("/cpu:0"): #"/cpu:0" or "/gpu:0" 
        sess.run(tf.global_variables_initializer())
        print('Training')
        run_model(sess,y_out,mean_loss,X_train,X_train_mask,y_train,1,64,100,train_step,True)
        print('Validation')
        run_model(sess,y_out,mean_loss,X_val,X_val_mask,y_val,1,64)