In [1]:
##############################
# import modules
##############################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from gensim.models import word2vec
from gensim import models
import subprocess



In [2]:
# read the file, 
def read_data(filename):
    data = []
    with open(filename, 'r', encoding='utf-8') as myfile:
        f = myfile.readlines()
        s_num = 0
        i =0
        sentence_s = []
        tag_s = []
        dep_s = []
        s  = []   # sentence
        p = []    # tag
        d = []    # dependency
        for l in f:
            
            v = l.replace('\n','').split("\t")
            v.append(s_num)
            if len(l) != 1:
                data.append(v)
                dep = v[6] + '_' + v[7]
                word = v[1].lower()
                if any(char.isdigit() for char in word):
                    word = 'NUM'       # replace numbers with NUM
                s.append(word)
                p.append(v[3])
                d.append(dep)
                i +=1
            else:
                sentence_s.append(s)
                tag_s.append(p)
                dep_s.append(d)
                s_num +=1
                s  = []
                p = []
                d = []
        
    return data, sentence_s, tag_s, dep_s

In [3]:
def process_data(dataname):
    #reads in files, produces data structure with all actions
        #does so by applying produce_rule_list to every sentence.
        #for loop that sets actions to empty, calls p_r_l giving it
        #the stack and buffer, actions and correct_parse, adds finished action list
        #to new data file, for each sentence in the input data
    #input: name of the data file with all parses. Run with data file in same directory.
    #output: data file with all actions
    file = open(dataname)
    data = file.read()
    correct_parses = correct_parse_list(data)
    #gets rid of final whitespace
    del correct_parses[len(correct_parses)-1]
    
    #iterates over all parses, producing action list for each
    complete_rule_list = []
    arc_dict = {'Shift':0,'L_root':1,'R_root':2}
    for sentence_parse in correct_parses:
        stack = []
#         print(len(sentence_parse))
        buff = list(range(1,len(sentence_parse)+1))
        actions = []
        rule_list, arc_dict = produce_rule_list(stack, buff, actions, sentence_parse, arc_dict)
        complete_rule_list.append(np.array(rule_list))

    
    return complete_rule_list, arc_dict

def correct_parse_list(data):
    #Turns data into a list of lists of lists with relevant information
    correct_parse = data.split("\n\n")
    for index, paragraph in enumerate(correct_parse):
        correct_parse[index] = paragraph.split("\n")
    for paragraph in correct_parse:
        for index, line in enumerate(paragraph):
            paragraph[index] = line.split("\t")
    return correct_parse

In [4]:
def produce_rule_list(stack, buff, actions, sentence_parse, arc_dict):
    #recursive function that works through words in the sentence (stack/buffer)
        #until only one word is left, creating the list of actions 
        #that was taken to parse it.
    #input: stack, buffer, actions, correct parse
    #output: actions with the actions taken for each buff/stack configuration
    
    #base case
    if len(stack) == 1 and len(buff) == 0:
        #actions.append([stack[:], "empty", "R_arc"])
        actions.append([0,stack[0], 0, 2])
        return actions, arc_dict

    #If enough of the sentence is still left:
    #If there is not enough material in the stack, shift:
    if len(stack) == 0 :
        #print('chose S - small stack')
        actions.append([0,0,buff[0], 0])
        stack.append(buff[0])
        del buff[0]        
        return produce_rule_list(stack,buff,actions,sentence_parse, arc_dict)
    if len(stack) == 1:
        actions.append([0,stack[-1],buff[0], 0])
        stack.append(buff[0])
        del buff[0]
        return produce_rule_list(stack,buff,actions,sentence_parse, arc_dict)
    #If there are 2 or more words in the stack, decide which action to perform and perform it
    if len(stack) > 1:
        action = rule_decision(stack,buff,sentence_parse)
        stack, buff, actions, arc_dict = action(stack,buff,actions, sentence_parse, arc_dict)
        return produce_rule_list(stack,buff,actions,sentence_parse, arc_dict)
    

def rule_decision(stack, buff, sentence_parse):
    #determines which action to apply
    #input: words on stack, words on buff, correct parse
    #output: one of three methods, Shift(), L_arc(), R_arc()

    #find ids/heads (index [6]) from stack and sentence_parse
    s1 = stack[-2]
    head_of_s1 = int(sentence_parse[s1-1][6])
    s2 = stack[-1]
    head_of_s2 = int(sentence_parse[s2-1][6])
    
    #L arcs can always be applied if possible
    if head_of_s1 == s2:
        action = L_arc
        #print('chose L')
    else:
        #R arcs can only be applied if there is no word in the buffer which has the last word in the stack as a head
        if head_of_s2 == s1:
            buff_heads = [int(sentence_parse[x-1][6]) for x in buff]
            if s2 in buff_heads:
                action = Shift
                #print('chose S - s2 in buffheads')
            else:
                action = R_arc
                #print('chose R')
        #if there is no match between s1 and s2, simply shift another word from the buffer
        else:
            action = Shift
            #print('chose S - no matching s1s2')

    return action

#The following methods perform an arc or shift. These can be changed if more data is needed in the network.

def L_arc(stack, buff, actions, sentence_parse, arc_dict):
    #removes second to last item from stack, writes action to actions
    #input: stack and actions
    #output: new stack and actions with one L_arc line
    #s1, s2, b1, action
    s1 = int(stack[-2])
    s2 = int(stack[-1])
    if len(buff) == 0:
        b1 = 0
    else:
        b1 = int(buff[0])
    relation = "L_"+sentence_parse[s1-1][7]

    if relation not in arc_dict:
        maximum = max(arc_dict, key=arc_dict.get)
        arc_dict['L_'+relation[2:]] = arc_dict[maximum]+1
        arc_dict['R_'+relation[2:]] = arc_dict[maximum]+2
    

    actions.append([s1,s2,b1, arc_dict[relation]])
    del stack[-2]
    return stack, buff, actions, arc_dict



def R_arc(stack, buff, actions, sentence_parse, arc_dict):
    #removes last item from the stack, writes action to actions
    #input: stack and actions
    #output: new stack and actions with one R_arc line
    #s1, s2, b1, action
    s1 = int(stack[-2])
    s2 = int(stack[-1])
    if len(buff) == 0:
        b1 = 0
    else:
        b1 = int(buff[0])
        
    relation = "R_"+sentence_parse[s2-1][7]

    if relation not in arc_dict:
        maximum = max(arc_dict, key=arc_dict.get)
        arc_dict['L_'+relation[2:]] = arc_dict[maximum]+1
        arc_dict['R_'+relation[2:]] = arc_dict[maximum]+2 
    
    actions.append([s1,s2,b1, arc_dict[relation]])
    del stack[-1]
    return stack, buff, actions, arc_dict



def Shift(stack, buff, actions, sentence_parse, arc_dict):
    #moves an item from the buff to the stack, writes action to actions
    #input: stack, buff and actions
    #output: new stack and actions with one extra shift line
    #s1, s2, b1, action
    s1 = int(stack[-2])
    s2 = int(stack[-1])
    b1 = int(buff[0])
    #actions.append([stack[:], buff[:], "Shift"])
    actions.append([s1,s2,b1, 0])
    stack.append(buff[0])
    del buff[0]
    return stack, buff, actions, arc_dict

In [5]:
train_data, train_sentences, train_tags, train_dependencies = read_data('./data/train-stanford-raw.conll')
dev_data, dev_sentences, dev_tags, dev_dependencies = read_data('./data/dev-stanford-raw.conll')
test_data, test_sentences, test_tags, test_dependencies = read_data('./data/test-stanford-raw.conll')

# create a full set of all the words in our train, test, and dev sets for word2vec model
# in order to avoid unseen words during test and validation
total_sentences = train_sentences + dev_sentences + test_sentences
print('data: ', train_data[:2])
print('words sentences: ', total_sentences[2:4])
print('tags sentences: ', train_tags[2:4])
print('dependencies: ', train_dependencies[2:4])

data:  [['1', 'In', '_', 'IN', 'IN', '_', '45', 'prep', '_', '_', 0], ['2', 'an', '_', 'DT', 'DT', '_', '5', 'det', '_', '_', 0]]
words sentences:  [['rolls-royce', 'motor', 'cars', 'inc.', 'said', 'it', 'expects', 'its', 'u.s.', 'sales', 'to', 'remain', 'steady', 'at', 'about', 'NUM', 'cars', 'in', 'NUM', '.'], ['the', 'luxury', 'auto', 'maker', 'last', 'year', 'sold', 'NUM', 'cars', 'in', 'the', 'u.s.']]
tags sentences:  [['NNP', 'NNP', 'NNPS', 'NNP', 'VBD', 'PRP', 'VBZ', 'PRP$', 'NNP', 'NNS', 'TO', 'VB', 'JJ', 'IN', 'IN', 'CD', 'NNS', 'IN', 'CD', '.'], ['DT', 'NN', 'NN', 'NN', 'JJ', 'NN', 'VBD', 'CD', 'NNS', 'IN', 'DT', 'NNP']]
dependencies:  [['4_nn', '4_nn', '4_nn', '5_nsubj', '0_root', '7_nsubj', '5_ccomp', '10_poss', '10_nn', '12_nsubj', '12_aux', '7_xcomp', '12_acomp', '12_prep', '16_quantmod', '17_num', '14_pobj', '12_prep', '18_pobj', '5_punct'], ['4_det', '4_nn', '4_nn', '7_nsubj', '6_amod', '7_tmod', '0_root', '9_num', '7_dobj', '7_prep', '12_det', '10_pobj']]


In [6]:
action_data, arc_dict = process_data('./data/train-stanford-raw.conll')

In [7]:
###############################################
# Interactive parser and evaluation functions #
###############################################

def sentences_to_conll(sentences, arc_dict, file_name):
    action_dict = {v:k for k,v in arc_dict.items()}
    all_sentences_listed = []
    for sentence in sentences:
        sentence_parse = [[i+1,word,'_','_','_','_','_','_','_','_'] 
                          for i, word in enumerate(sentence)]
        w2v_matrix = create_sentence_embeddings([sentence])[0]
        stack = []
        buff = list(range(1,len(sentence)+1))
        sentence_parse = single_sentence_parse(stack, buff, sentence_parse, action_dict, w2v_matrix)
        all_sentences_listed.append(sentence_parse)
    convert_to_conll(all_sentences_listed, file_name)        
    return

def single_sentence_parse(stack, buff, sentence_parse, action_dict, w2v_matrix):
    #If there are 2 or more words in the stack, decide which action to perform and perform it
    if len(stack) > 1:
        s1 = int(stack[-2])
        s2 = int(stack[-1])
        #checks whether buffer contains words
        if len(buff) > 0:
            b1 = int(buff[0])
            action = model_action_decision(w2v_matrix,s1,s2,b1,False)
        else:
            b1 = 0
            action = model_action_decision(w2v_matrix,s1,s2,b1,True)
        
        if action == 0:
            # perform a shift
            stack, buff = Shift(stack, buff)
        elif action%2 == 1:
            # left-arc. All left tags are odd in the dictionary
            stack, sentence_parse = L_arc(stack,s1,s2, sentence_parse, action_dict, action)
        else:
            # right-arc. All right tags are even in the dictionary
            stack, sentence_parse = R_arc(stack,s1,s2, sentence_parse, action_dict, action)
        return single_sentence_parse(stack, buff, sentence_parse, action_dict, w2v_matrix)
    
    #base case (R_arc): if only one word is left, perform the last right arc with root.
    if len(stack) == 1 and len(buff) == 0:
        sentence_parse[stack[0]-1][6] = 0
        sentence_parse[stack[0]-1][7] = 'root'
        return sentence_parse    

    #If there is not enough material in the stack, shift:
    if len(stack) == 0 :
        #print('chose S - small stack')
        stack, buff = Shift(stack, buff)       
        return single_sentence_parse(stack, buff, sentence_parse, action_dict, w2v_matrix)
    if len(stack) == 1:
        stack, buff = Shift(stack, buff)
        return single_sentence_parse(stack, buff, sentence_parse, action_dict, w2v_matrix)
    
def model_action_decision(w2v_matrix,s1,s2,b1,emptybuffer):
    #if emptybuffer is true, exclude option 0 (shift).
    pred_input = {sentence_length: [len(w2v_matrix)], 
                  lstm_x: [w2v_matrix],  parse_indices: 
                  [np.array([[s1,s2,b1]])]} # feed_dict without labels
    prediction = session.run(output_mlp, pred_input)[0]
    if emptybuffer == False:
        action = np.argmax(prediction)
    else:
        pred = np.delete(prediction,[0])
        action = np.argmax(pred)+1
    return action
        
    
def L_arc(stack,s1,s2, sentence_parse, action_dict, action):
    #removes second to last item from stack, sends info to sentence_parse

    action_type = action_dict[action]
    
    #update head and relation for s1
    sentence_parse[s1-1][6] = s2
    sentence_parse[s1-1][7] = action_type[2:]
    
    del stack[-2]
    return stack, sentence_parse


def R_arc(stack,s1,s2, sentence_parse, action_dict, action):
    #removes last item from the stack, sends info to sentence_parse
    
    action_type = action_dict[action]

    #update head and relation for s2
    sentence_parse[s2-1][6] = s1
    sentence_parse[s2-1][7] = action_type[2:]
    
    del stack[-1]
    return stack, sentence_parse


def Shift(stack, buff):
    #moves an item from the buff to the stack
    #input: stack, buff
    #output: new stack and buff
    stack.append(buff[0])
    del buff[0]
    return stack, buff

def convert_to_conll(sentences, file_name):
    content = "\n\n".join(["\n".join(["\t".join([str(var) 
                                                 for var in word]) 
                                      for word in sentence]) 
                           for sentence in sentences]) + "\n"
    with open(file_name+".conll", "w") as text_file:
        text_file.write(content)

In [8]:
##############################
# TF functions
##############################

def mlp(_X, _weights, _biases):
    """
    function that defines a multilayer perceptron in the graph
    input shape: parse_steps (=?) x filtered_words (=3) x lstm_output_length (=400)
    output shape: parse_steps (=?) x num_classes
    """
    # ReLU hidden layer (output shape: parse_steps x n_hidden)
    layer_1 = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h']), _biases['b'])) 
    # return output layer (output shape: parse_steps x n_classes)
    return tf.add(tf.matmul(layer_1, _weights['out']), _biases['out'])


def create_sentence_embeddings(sentences):
    """
    for each sentence, get embedded representation
    """
    embedded_train_sentences = []
    for sentence in sentences:
        embed = model[sentence]
        embedded_train_sentences.append(embed)
    return embedded_train_sentences


In [9]:
##############################
# load word2vec model & input data
##############################

model_name = "dep_parser_word2vec_total"
model = word2vec.Word2Vec.load(model_name)

# embeddings for all sentences
sentence_embeddings = create_sentence_embeddings(train_sentences)

In [10]:
##############################
# TensorFlow model
##############################

graph = tf.Graph()
with graph.as_default():
        
    # hyperparameters (from Cross & Huang, 2016)
    word2vec_length = model['a'].size
    n_input = word2vec_length # for nolstm-model
    n_hidden = 200
    n_classes = 99 # there are 99 possible actions to take
    num_epochs = 8
    dropout = 0.5
    L2_penalty = 0.
    rho = 0.99
    epsilon = 1e-07
    learning_rate = 0.02 # default is 0.001, Cross & Huang do not specify learning rate

    # Store layers weight & bias
    weights = {
        'h': tf.Variable(tf.random_normal([3*n_input, n_hidden], dtype=tf.float64), name='weights_h'),
        'out': tf.Variable(tf.random_normal([n_hidden, n_classes], dtype=tf.float64), name='weights_out')
    }
    biases = {
        'b': tf.Variable(tf.random_normal([n_hidden], dtype=tf.float64), name='biases_b'),
        'out': tf.Variable(tf.random_normal([n_classes], dtype=tf.float64), name='biases_out')
    }
    
    # placeholders
    sentence_length = tf.placeholder(tf.int32)
    lstm_x = tf.placeholder(tf.float64, [1, None, word2vec_length])
    parse_indices = tf.placeholder(tf.int64, [1, None, 3])
    labels = tf.placeholder(tf.int64, [None])

    # directly use word2vec as output per word (so lstm_input = lstm_output)
    output_lstm = lstm_x
    
    # zero-padding of LSTM-output (sentence gets a "dummy word" in front of it)
    zero_padding = tf.zeros([1, 1, n_input], tf.float64)
    output_lstm = tf.concat(1, [zero_padding, output_lstm])
   
    # mlp_x: make a matrix with all corresponding word vector 3-tuples, CONCATENATED (up to the no of parse steps)
    mlp_x = tf.nn.embedding_lookup(output_lstm[0,:,:], parse_indices[0,:,:])
    dims = tf.shape(mlp_x)
    mlp_x = tf.reshape(mlp_x, [dims[0], dims[1]*dims[2]])

    output_mlp = mlp(mlp_x, weights, biases)

    cost = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(output_mlp, labels))
    cost_indication = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(output_mlp, labels))

    train_op = tf.train.AdadeltaOptimizer(rho=rho, epsilon=epsilon, learning_rate=learning_rate).minimize(cost)
    
    # saver:
    saver = tf.train.Saver()
    
    with tf.Session() as session:
        init = tf.initialize_all_variables()
        session.run(init)
        
        for epoch in range(0,num_epochs): 
            for i in range(0,len(sentence_embeddings)): 
                
                # get sentence embedding
                sentence = sentence_embeddings[i]
                
                # get parse data for sentence
                parse_data = action_data[i]
                
                indices = parse_data[:,:3]
                actions = parse_data[:,3]
                
                # important variables
                seq_length = len(sentence)
                                
                feed_dict_batch = {sentence_length: [seq_length], lstm_x: [sentence],  parse_indices: [indices], labels: actions}
                
                result = session.run([train_op, cost_indication], feed_dict_batch)
                
                if i%1000 == 0:
                    print("epoch ", epoch, ", iteration ", i, "\n", "current average loss: ", result[1])
                
                if i%5000 == 0:
                    print("evaluating..")
                    sentences_to_conll(train_sentences[:10], arc_dict, "dev_pred")
                    val_output = subprocess.check_output(["perl", "eval.pl", "-g", "dev_true.conll", "-s", "dev_pred.conll", "-q"])
                    print(val_output.decode("utf-8"))
                
                if i%10000 == 0:
                    name = "tf_models_nolstm/model-" + str(epoch) + "-" + str(i)
                    print("saving model with name " + name)
                    saver.save(session, name)
                
                if i == len(sentence_embeddings)-1:
                    name = "tf_models_nolstm/model-" + str(epoch)
                    print("saving model at end of epoch..")
                    saver.save(session, name)
                    
        print("DONE")


epoch  0 , iteration  0 
 current average loss:  285.287768747
evaluating..
  Labeled   attachment score: 1 / 167 * 100 = 0.60 %
  Unlabeled attachment score: 20 / 167 * 100 = 11.98 %
  Label accuracy score:       2 / 167 * 100 = 1.20 %

saving model with name tf_models_nolstm/model-0-0
epoch  0 , iteration  1000 
 current average loss:  130.067720395
epoch  0 , iteration  2000 
 current average loss:  109.833931728
epoch  0 , iteration  3000 
 current average loss:  89.2713291921
epoch  0 , iteration  4000 
 current average loss:  83.0232905261
epoch  0 , iteration  5000 
 current average loss:  56.3369658198
evaluating..
  Labeled   attachment score: 11 / 167 * 100 = 6.59 %
  Unlabeled attachment score: 23 / 167 * 100 = 13.77 %
  Label accuracy score:       22 / 167 * 100 = 13.17 %

epoch  0 , iteration  6000 
 current average loss:  80.5913741313
epoch  0 , iteration  7000 
 current average loss:  109.479593096
epoch  0 , iteration  8000 
 current average loss:  55.7923794984
epoch 

In [54]:
##############################
# TensorFlow model
##############################

tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
        
    # hyperparameters (from Cross & Huang, 2016)
    word2vec_length = model['a'].size
    n_input = word2vec_length # for nolstm-model
    n_hidden = 200
    n_classes = 99 # there are 99 possible actions to take
    num_epochs = 8
    dropout = 0.5
    L2_penalty = 0.
    rho = 0.99
    epsilon = 1e-07
    learning_rate = 0.02 # default is 0.001, Cross & Huang do not specify learning rate

    # Store layers weight & bias
    weights = {
        'h': tf.Variable(tf.random_normal([3*n_input, n_hidden], dtype=tf.float64), name='weights_h'),
        'out': tf.Variable(tf.random_normal([n_hidden, n_classes], dtype=tf.float64), name='weights_out')
    }
    biases = {
        'b': tf.Variable(tf.random_normal([n_hidden], dtype=tf.float64), name='biases_b'),
        'out': tf.Variable(tf.random_normal([n_classes], dtype=tf.float64), name='biases_out')
    }
    
    # placeholders
    sentence_length = tf.placeholder(tf.int32)
    lstm_x = tf.placeholder(tf.float64, [1, None, word2vec_length])
    parse_indices = tf.placeholder(tf.int64, [1, None, 3])
    labels = tf.placeholder(tf.int64, [None])

    # directly use word2vec as output per word (so lstm_input = lstm_output)
    output_lstm = lstm_x
    
    # zero-padding of LSTM-output (sentence gets a "dummy word" in front of it)
    zero_padding = tf.zeros([1, 1, n_input], tf.float64)
    output_lstm = tf.concat(1, [zero_padding, output_lstm])
   
    # mlp_x: make a matrix with all corresponding word vector 3-tuples, CONCATENATED (up to the no of parse steps)
    mlp_x = tf.nn.embedding_lookup(output_lstm[0,:,:], parse_indices[0,:,:])
    dims = tf.shape(mlp_x)
    mlp_x = tf.reshape(mlp_x, [dims[0], dims[1]*dims[2]])

    output_mlp = mlp(mlp_x, weights, biases)

    cost = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(output_mlp, labels))
    cost_indication = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(output_mlp, labels))

    train_op = tf.train.AdadeltaOptimizer(rho=rho, epsilon=epsilon, learning_rate=learning_rate).minimize(cost)
    
    with tf.Session() as session:
        init = tf.initialize_all_variables()
        session.run(init)
        saver = tf.train.import_meta_graph('tf_models_nolstm/model-7.meta')
        saver.restore(session, 'tf_models_nolstm/model-7')
        all_vars = tf.trainable_variables()
        for v in all_vars:
            print(v.name)
        session.run(all_vars)
#         print(session.run(tf.all_variables()))

        print("evaluating..")
        sentences_to_conll(train_sentences[:10], arc_dict, "dev_pred")
        val_output = subprocess.check_output(["perl", "eval.pl", "-g", "dev_true.conll", "-s", "dev_pred.conll", "-q"])
        print(val_output.decode("utf-8"))


    print("DONE")
    # saver:
#     session = tf.Session()
#     init = tf.initialize_all_variables()
#     session.run(init)
#     new_saver = tf.train.import_meta_graph('tf_models_nolstm/model-7.meta')
# #     init = tf.initialize_all_variables()
# #     session.run(init)
#     new_saver.restore(session, tf.train.latest_checkpoint('tf_models_nolstm/'))
#     all_vars = tf.trainable_variables()
#     session.run(all_vars)
#     for v in all_vars:
#         print(v.name)
    
#     with tf.Session() as session:

        
#         init = tf.initialize_all_variables()
#         session.run(init)
        
#         for epoch in range(0,num_epochs): 
#             for i in range(0,len(sentence_embeddings)): 
                
#                 # get sentence embedding
#                 sentence = sentence_embeddings[i]
                
#                 # get parse data for sentence
#                 parse_data = action_data[i]
                
#                 indices = parse_data[:,:3]
#                 actions = parse_data[:,3]
                
#                 # important variables
#                 seq_length = len(sentence)
                                
#                 feed_dict_batch = {sentence_length: [seq_length], lstm_x: [sentence],  parse_indices: [indices], labels: actions}
                
#                 result = session.run([train_op, cost_indication], feed_dict_batch)
                
                
#             if i%5000 == 0:





# tf.reset_default_graph()
# sess = tf.Session()
# # get the model that was trained furthest. model-1-5000.meta means epoch 1, iter 5000
# new_saver = tf.train.import_meta_graph('tf_models_nolstm/model-7.meta')
# new_saver.restore(sess, tf.train.latest_checkpoint('tf_models_nolstm/'))
# all_vars = tf.trainable_variables()
# for v in all_vars:
#     print(v.name)

weights_h:0
weights_out:0
biases_b:0
biases_out:0
weights_h:0
weights_out:0
biases_b:0
biases_out:0
evaluating..
  Labeled   attachment score: 0 / 167 * 100 = 0.00 %
  Unlabeled attachment score: 13 / 167 * 100 = 7.78 %
  Label accuracy score:       4 / 167 * 100 = 2.40 %

DONE


In [68]:
with tf.Session() as session:
    tf.reset_default_graph()
    print("graph reset")
    # placeholders
    sentence_length = tf.placeholder(tf.int32)
    lstm_x = tf.placeholder(tf.float64, [1, None, word2vec_length])
    parse_indices = tf.placeholder(tf.int64, [1, None, 3])
    labels = tf.placeholder(tf.int64, [None])
    print("made new placeholders")

    # directly use word2vec as output per word (so lstm_input = lstm_output)
    output_lstm = lstm_x
    
    # zero-padding of LSTM-output (sentence gets a "dummy word" in front of it)
    zero_padding = tf.zeros([1, 1, n_input], tf.float64)
    output_lstm = tf.concat(1, [zero_padding, output_lstm])
   
    # mlp_x: make a matrix with all corresponding word vector 3-tuples, CONCATENATED (up to the no of parse steps)
    mlp_x = tf.nn.embedding_lookup(output_lstm[0,:,:], parse_indices[0,:,:])
    dims = tf.shape(mlp_x)
    mlp_x = tf.reshape(mlp_x, [dims[0], dims[1]*dims[2]])

#     output_mlp = mlp(mlp_x, weights, biases)

    print("now initializing variables")
    
#     init = tf.initialize_all_variables()
    
#     print("running session with init...")
#     session.run(init)
    
    print("importing graph")
    saver = tf.train.import_meta_graph('tf_models_nolstm/model-7.meta')
    saver.restore(session, 'tf_models_nolstm/model-7')
    print("session restored")
    all_vars = tf.trainable_variables()
    
    for v in all_vars:
        print(v.name)
    
    weights = {
        'h': all_vars[0],
        'out': all_vars[1]
    }
    biases = {
        'b': all_vars[2],
        'out': all_vars[3]
    }
    output_mlp = mlp(mlp_x, weights, biases)
    
    cost = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(output_mlp, labels))
    cost_indication = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(output_mlp, labels))

    train_op = tf.train.AdadeltaOptimizer(rho=rho, epsilon=epsilon, learning_rate=learning_rate).minimize(cost)
    
#     session.run(all_vars)
#         print(session.run(tf.all_variables()))

    print("evaluating...")
    sentences_to_conll(train_sentences[:10], arc_dict, "dev_pred")
    print("predictions made, checking...")
    val_output = subprocess.check_output(["perl", "eval.pl", "-g", "dev_true.conll", "-s", "dev_pred.conll", "-q"])
    print(val_output.decode("utf-8"))

graph reset
made new placeholders
now initializing variables
importing graph
session restored
weights_h:0
weights_out:0
biases_b:0
biases_out:0
evaluating...


IndexError: list index out of range