In [295]:
import tensorflow as tf 
import numpy as np
import helpers
import operator
from sklearn.model_selection import train_test_split 
from random import shuffle

# GLOBAL CONTANTS
PAD = 0
BOS = 2
EOS = 1
character_changing_num = 10
batches_in_epoch = 100

# x (store encoder inputs [source morphological tags + target morphological tags + source word])
source_data = []
# y (store decoder expected outputs [source morphological tags + target morphological tags + target word])      
target_data = []

# stores encoded forms
alphabet_and_morph_tags = dict()

# create (source morphological tags + target morphological tags + source/target word) sequence
def create_sequence(data_line_, word_index):
    sequence = []
    
    # append beginning of the input
    sequence.append(BOS)
    
    # task 2
    if len(data_line_) == 4:
        # source and target morphological tags are appended only to the input
        if word_index != 3:
            for i in data_line_[0]:
                sequence.append(i)
            
            for i in data_line_[2]:
                sequence.append(i)
    # task 1,3
    else:
        if word_index != 2:
            # source and target morphological tags are appended only to the input
            for i in data_line_[1]:
                sequence.append(i)
        
    for i in data_line_[word_index]:
        sequence.append(i)
        
    # append end of the input
    sequence.append(EOS)
        
    return sequence


# encoding input data
def encoding(data, coded_word, alphabet_and_morph_tags):
    for character in data:
        index = alphabet_and_morph_tags.setdefault(character, len(alphabet_and_morph_tags) + 3)
        coded_word.append(index)
        
    return coded_word


# read, split and encode input data
with open('task2.tsv','r') as input_file:
    idx = 0
    # read it line-by-line
    for line in input_file:
        data_line_ = line.strip('\n').split('\t')
        
        # encode words into vector of ints 
        for item in range(0,len(data_line_)):         
            # contains encoded form of word
            coded_word = []
            
            # task 2
            if len(data_line_) == 4:
                if item == 1 or item == 3:
                    # encode source and target word
                    coded_word = encoding(data_line_[item], coded_word, alphabet_and_morph_tags)
                else:
                    # split morphological tags
                    tags = data_line_[item].split(',')
                
                    coded_word = encoding(tags, coded_word, alphabet_and_morph_tags)
            # task 1,3
            else:
                if item == 1:
                    # split morphological tags
                    tags = data_line_[item].split(',')
                
                    coded_word = encoding(tags, coded_word, alphabet_and_morph_tags)
                else:
                    # encode source and target word
                    coded_word = encoding(data_line_[item], coded_word, alphabet_and_morph_tags)
                        
            # store encoded form
            data_line_[item] = coded_word
        
        # defines source and target words' index
        source_idx = len(data_line_) - 3
        target_idx = len(data_line_) - 1 
        
        # store encoder input task 2:(source morphological tags + target morphological tags + source word)
        # task 1,3: (source/target morphological tags + source word)
        source_data.append([create_sequence(data_line_, source_idx), idx])
        
        # store decoder expected outputs:(target word)
        target_data.append(create_sequence(data_line_, target_idx))
        
        # stores line number (needed for shuffle) - reference for the target_data
        idx += 1
        
# split data into train and test sets
source_data_train, source_data_test, target_data_train, target_data_test = train_test_split(source_data, target_data, test_size = 0.1)

A programhoz felhasznált forrásfájl a következőképpen néz ki:
    
    task1 esetén:  (forrás szóalak + cél morfológiai tagek + cél szóalak)
    agyondicsér	pos=V,mood=IND,def=DEF,tense=PST,per=1,num=PL	agyondicsértük
    ürít	pos=V,mood=POT	üríthet
    vitat	pos=V,polite=INFM,per=2,num=SG,finite=NFIN	vitatnod
     
    task 2 esetén: (forrás morfológiai tagek + forrás szóalak + cél morfológiai tagek + cél szóalak)
    pos=V,mood=IND,def=INDF,tense=PRS,per=1,num=PL	agyondicsérünk	pos=V,mood=IND,def=DEF,tense=PST,per=1,num=PL	agyondicsértük
    pos=V,tense=PRS	ürítő	pos=V,mood=POT	üríthet
    pos=V,finite=NFIN	vitatni	pos=V,polite=INFM,per=2,num=SG,finite=NFIN	vitatnod
    
    task 3 esetén:  (forrás szóalak + forrás morfológiai tagek + cél szóalak)
    agyondicsérünk	pos=V,mood=IND,def=DEF,tense=PST,per=1,num=PL	agyondicsértük
    ürítő	pos=V,mood=POT	üríthet
    vitatni	pos=V,polite=INFM,per=2,num=SG,finite=NFIN	vitatnod
 
Minden sora egy-egy bemeneti adatot reprezentál.

Beolvassuk a fájlból soronként, elvégezzük a szükséges feldarabolási lépéseket, majd kódoljuk mind a morfológiai tageket, mind a szóalakokat is számok formájában. Ehhez a már megszokott +1-gyel növelt kódolást használja. A morfológiai tageket páronként kódolja szintén mindig +1-gyel növelt értéktől kezdve: (FONTOS! mivel EOS=1 és PAD=0 és BOS=2 ezért a kódolást a 3-as értéktől kezdi)
    
    pl.: POS=V -> 3-as érték
         MOOD=IND -> 4-es érték
         
A morfológiai tagek kódolt formájából szekvenciát épít és mind a forrás mind a cél tagek szekvenciáját a forrás szóalak szekvenciája elé fűzi (ha van), így állítja elő az source_data változóba az encoder bemenetét. 
             (BOS + forrás morfológiai tagek szekvenciája + cél morfológiai tagek szekvenciája + forrás szóalak szekvenciája + EOS)
             
Az előbbihez hasonlóan készíti el a decoder elvárt kimenetét is, amit a target_data változóban tárol:
             (BOS + cél szóalak szekvenciája + EOS)


In [296]:
# Clears the default graph stack and resets the global default graph.
tf.reset_default_graph() 
# initializes a tensorflow session
sess = tf.InteractiveSession() 

# get max value of encoded forms
max_alphabet_and_morph_tags = alphabet_and_morph_tags[max(alphabet_and_morph_tags.items(), key=operator.itemgetter(1))[0]]

# calculate vocab_size
vocab_size = max_alphabet_and_morph_tags + 1
#character length
input_embedding_size = 300 

A vocab_size-ot manuálisan kell kiszámolni, hogy pontosan megállapíthassuk, hogy hány különböző kódolt karakterünk van. (Ennek pontos értékére az embedding miatt van szükség)

In [297]:
# num neurons
encoder_hidden_units = 100 
# in original paper, they used same number of neurons for both encoder
# and decoder, but we use twice as many so decoded output is different, the target value is the original input 
#in this example
decoder_hidden_units = encoder_hidden_units * 2

# input placehodlers
encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs')
# contains the lengths for each of the sequence in the batch, we will pad so all the same
# if you don't want to pad, check out dynamic memory networks to input variable length sequences
encoder_inputs_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='encoder_inputs_length')
decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets')

# randomly initialized embedding matrrix that can fit input sequence
# used to convert sequences to vectors (embeddings) for both encoder and decoder of the right size
# reshaping is a thing, in TF you gotta make sure you tensors are the right shape (num dimensions)
embeddings = tf.Variable(tf.random_uniform([vocab_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32)

# this thing could get huge in a real world application
encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)

# define encoder
encoder_cell = tf.contrib.rnn.GRUCell(encoder_hidden_units)

# define bidirectionel function of encoder (backpropagation)
((encoder_fw_outputs,
  encoder_bw_outputs),
 (encoder_fw_final_state,
  encoder_bw_final_state)) = (
    tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell,
                                    cell_bw=encoder_cell,
                                    inputs=encoder_inputs_embedded,
                                    sequence_length=encoder_inputs_length,
                                    dtype=tf.float32, time_major=True)
    )

#Concatenates tensors along one dimension.
encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)


#letters h and c are commonly used to denote "output value" and "cell state". 
#http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 
#Those tensors represent combined internal state of the cell, and should be passed together. 

# because by GRUCells the state is a Tensor, not a Tuple like by LSTMCells
encoder_final_state = tf.concat(
    (encoder_fw_final_state, encoder_bw_final_state), 1)

'''
encoder_final_state_h = tf.concat(
    (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

#TF Tuple used by LSTM Cells for state_size, zero_state, and output state.
encoder_final_state = tf.contrib.rnn.GRUStateTuple(
    c=encoder_final_state_c,
    h=encoder_final_state_h
)
'''

decoder_cell = tf.contrib.rnn.GRUCell(decoder_hidden_units)

#we could print this, won't need
encoder_max_time, batch_size = tf.unstack(tf.shape(encoder_inputs))

decoder_lengths = encoder_inputs_length + character_changing_num
# +(character_changing_num-1) additional steps, +1 leading <EOS> token for decoder inputs

#manually specifying since we are going to implement attention details for the decoder in a sec
#weights
W = tf.Variable(tf.random_uniform([decoder_hidden_units, vocab_size], -1, 1), dtype=tf.float32)
#bias
b = tf.Variable(tf.zeros([vocab_size]), dtype=tf.float32)

#create padded inputs for the decoder from the word embeddings
#were telling the program to test a condition, and trigger an error if the condition is false.
assert EOS == 1 and PAD == 0 and BOS == 2

bos_time_slice = tf.ones([batch_size], dtype=tf.int32, name='BOS')
eos_time_slice = tf.ones([batch_size], dtype=tf.int32, name='EOS')
pad_time_slice = tf.zeros([batch_size], dtype=tf.int32, name='PAD')

#retrieves rows of the params tensor. The behavior is similar to using indexing with arrays in numpy
bos_step_embedded = tf.nn.embedding_lookup(embeddings, bos_time_slice)
eos_step_embedded = tf.nn.embedding_lookup(embeddings, eos_time_slice)
pad_step_embedded = tf.nn.embedding_lookup(embeddings, pad_time_slice)

#manually specifying loop function through time - to get initial cell state and input to RNN
#normally we'd just use dynamic_rnn, but lets get detailed here with raw_rnn

#we define and return these values, no operations occur here
def loop_fn_initial():
    initial_elements_finished = (0 >= decoder_lengths)  # all False at the initial step
    #end of sentence
    initial_input = eos_step_embedded
    #last time steps cell state
    initial_cell_state = encoder_final_state
    #none
    initial_cell_output = None
    #none
    initial_loop_state = None  # we don't need to pass any additional information
    return (initial_elements_finished,
            initial_input,
            initial_cell_state,
            initial_cell_output,
            initial_loop_state)


#attention mechanism --choose which previously generated token to pass as input in the next timestep
def loop_fn_transition(time, previous_output, previous_state, previous_loop_state):

    def get_next_input():
        #dot product between previous ouput and weights, then + biases
        output_logits = tf.add(tf.matmul(previous_output, W), b)
        #Logits simply means that the function operates on the unscaled output of 
        #earlier layers and that the relative scale to understand the units is linear. 
        #It means, in particular, the sum of the inputs may not equal 1, that the values are not probabilities 
        #(you might have an input of 5).
        #prediction value at current time step
        
        #Returns the index with the largest value across axes of a tensor.
        prediction = tf.argmax(output_logits, axis=1)
        #embed prediction for the next input
        next_input = tf.nn.embedding_lookup(embeddings, prediction)
        return next_input
    
    
    elements_finished = (time >= decoder_lengths) # this operation produces boolean tensor of [batch_size]
                                                  # defining if corresponding sequence has ended

    
    #Computes the "logical and" of elements across dimensions of a tensor.
    finished = tf.reduce_all(elements_finished) # -> boolean scalar
    #Return either fn1() or fn2() based on the boolean predicate pred.
    input = tf.cond(finished, lambda: pad_step_embedded, get_next_input)
    
    #set previous to current
    state = previous_state
    output = previous_output
    loop_state = None

    return (elements_finished, 
            input,
            state,
            output,
            loop_state)

def loop_fn(time, previous_output, previous_state, previous_loop_state):
    if previous_state is None:    # time == 0
        assert previous_output is None and previous_state is None
        return loop_fn_initial()
    else:
        return loop_fn_transition(time, previous_output, previous_state, previous_loop_state)

#Creates an RNN specified by RNNCell cell and loop function loop_fn.
#This function is a more primitive version of dynamic_rnn that provides more direct access to the 
#inputs each iteration. It also provides more control over when to start and finish reading the sequence, 
#and what to emit for the output.
#ta = tensor array
decoder_outputs_ta, decoder_final_state, _ = tf.nn.raw_rnn(decoder_cell, loop_fn)
# emiatt nem lehet lefelezni decoder hidden unit számot DE MIÉRT??? (talán azért, mert bidirekciónál a fw és bw is encoder_hidden_units ezért lesz kétszeres)

decoder_outputs = decoder_outputs_ta.stack()

decoder_outputs

#to convert output to human readable prediction
#we will reshape output tensor

#Unpacks the given dimension of a rank-R tensor into rank-(R-1) tensors.
#reduces dimensionality
decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(tf.shape(decoder_outputs))
#flettened output tensor
decoder_outputs_flat = tf.reshape(decoder_outputs, (-1, decoder_dim))
#pass flattened tensor through decoder
decoder_logits_flat = tf.add(tf.matmul(decoder_outputs_flat, W), b)
#prediction vals
decoder_logits = tf.reshape(decoder_logits_flat, (decoder_max_steps, decoder_batch_size, vocab_size))

#final prediction
decoder_prediction = tf.argmax(decoder_logits, 2)

#cross entropy loss
#one hot encode the target values so we don't rank just differentiate
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
    labels=tf.one_hot(decoder_targets, depth=vocab_size, dtype=tf.float32),
    logits=decoder_logits,
)

#loss function
loss = tf.reduce_mean(stepwise_cross_entropy)
#train it 
#train_op = tf.train.AdamOptimizer().minimize(loss)
train_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss) # set learning_rate = 0.1

sess.run(tf.global_variables_initializer())

# send 20 sequences into encoder at one time
batch_size = 20

In [298]:
# create batches with size of batch_size
def create_batches(source_data, target_data,batch_size):
    # stores batches
    source_batches = []
    target_batches = []
    # stores last batch ending index
    prev_batch_end = 0
    
    for j in range(0, len(source_data)):
        if j % batch_size == 0 and j != 0:
            # stores a batch
            sbatch = []
            tbatch = []
            for k in range(prev_batch_end+1,j):
                # store sequence
                sbatch.append(source_data[k][0])
                # store expected target_data (know from source_data index)
                tbatch.append(target_data[source_data[k][1]])
            # add created batch
            source_batches.append(sbatch)
            target_batches.append(tbatch)
            prev_batch_end = j
            
    # put the rest of it in another batch
    if prev_batch_end != j:
        sbatch = []
        tbatch = []
        for k in range(prev_batch_end+1,j):
            sbatch.append(source_data[k][0])
            tbatch.append(target_data[source_data[k][1]])
        source_batches.append(sbatch)
        target_batches.append(tbatch)
        
    return source_batches, target_batches

A bemeneti adatokból és az elvárt kimenetekből legyártja a batch_size-nak megfelelő méretű batcheket. 

Maga a batch szekvenciák kötegét jelenti, hogy egyszerre hány input sort adunk be a rendszerünknek. Ezért fontos, hogy mind az encoder bemenetén, mind a decoder kimenetén azonos méretű batchek legyenek. Emiatt hívjuk meg a source_data és target_data-ra is egyaránt. 

In [299]:
def next_feed(batch_num, source_batches, target_batches):
    # get transpose of source_batches[batch_num]
    encoder_inputs_, encoder_input_lengths_ = helpers.batch(source_batches[batch_num])
    
    # get max input sequence length
    max_input_length = max(encoder_input_lengths_)
    
    # target word is max character_changing_num character longer than source word 
    # get transpose of target_batches[i] and put an EOF and PAD at the end
    decoder_targets_, _ = helpers.batch(
            [(sequence) + [PAD] * ((max_input_length + character_changing_num) - len(sequence))  for sequence in target_batches[batch_num]]
    )
   
    return {
        encoder_inputs: encoder_inputs_,
        encoder_inputs_length: encoder_input_lengths_,
        decoder_targets: decoder_targets_,
    }

Mivel a forrás szóalak hossza nem feltétlenül egyezik meg a cél szóalak hosszával, ezért fontos hogy lehetővé tegyük a rendszer számára, hogy további karaktereket fűzhessen az eredetihez. Azt, hogy hány karakterrel lehet hosszabb a képzett szó (cél szó) az character_changing_num változó definiálja.

    Ha a character_changing_num = 10 ez azt jelenti, hogy 10 karakterben térhet el az eredeti szóalaktól.
    
Az ehhez szükséges padding karakterek számának kiszámolásához megkeressük a legnagyobb bemeneti szekvencia hosszát, amit a max_input_length változóban tárolunk el. Ezután a legnagyobb bemeneti szekvencia hosszához hozzáadjuk a (character_changing_num) értéket és kivonjuk belőle az aktuális szekvencia hosszát. Ezzel az ettől való eltéréseket 0-val töltjük fel.

In [300]:
loss_track = []

try:
    # iteration number during training
    epoch = 100
    for j in range(0,epoch):
        # shuffle it in every epoch for creating random batches
        source_data_train = random.sample(source_data_train, len(source_data_train))
        
        # encoder inputs and decoder outputs devided into batches
        source_batches, target_batches = create_batches(source_data_train, target_data, batch_size)

        # get every batches and train the model on it
        for batch_num in range(0, len(source_batches)):
            fd = next_feed(batch_num, source_batches, target_batches)
   
            _, l = sess.run([train_op, loss], fd)
            loss_track.append(l)
        
            if batch_num == 0 or batch_num % batches_in_epoch == 0:
                print('batch {}'.format(batch_num))
                print('  minibatch loss: {}'.format(sess.run(loss, fd)))
                predict_ = sess.run(decoder_prediction, fd)
                for i, (inp, pred) in enumerate(zip(fd[encoder_inputs].T, predict_.T)):
                    print('  sample {}:'.format(i + 1))
                    print('    input     > {}'.format(inp))
                    print('    predicted > {}'.format(pred))
                    if i >= 2:
                        break
                print()
            
except KeyboardInterrupt:
    print('training interrupted')

batch 0
  minibatch loss: 3.285431146621704
  sample 1:
    input     > [ 2  3  4 22 32 23 33  8  3  4  5 23  7  8 29 37 28 15 24 24 18 24 29 21  1
  0  0  0  0  0  0]
    predicted > [ 3  0  0  0  0  0  0  0  0 40  0  0  0  0  0  0  0 40  0  0  0  0  0  0  0
 40  0  0  0  0  0  0  0 40  0  0  0  0  0  0  0]
  sample 2:
    input     > [ 2  3  4  5  6  7  8  3  4  5 32  6 33  8 31 29 17 40 24 29 10 29 24 20 13
 21  1  0  0  0  0]
    predicted > [ 3  0  0  0  0  0  0  0  0 40  0  0  0  0  0  0  0 40  0  0  0  0  0  0  0
 40  0  0  0  0  0  0  0 40  0  0  0  0  0  0  0]
  sample 3:
    input     > [ 2  3 23  3 35  5 32  6 33 34 24  9 21  9 19 46 40 12 24 24  1  0  0  0  0
  0  0  0  0  0  0]
    predicted > [ 3  0  0  0  0  0  0  0  0 40  0  0  0  0  0  0  0 40  0  0  0  0  0  0  0
 40  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]



InvalidArgumentError: indices[19,5] = 76 is not in [0, 76)
	 [[Node: embedding_lookup = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, _class=["loc:@Variable"], validate_indices=true, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable/read, _recv_encoder_inputs_0)]]

Caused by op 'embedding_lookup', defined at:
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-297-8a12ed11dfe1>", line 22, in <module>
    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/embedding_ops.py", line 111, in embedding_lookup
    validate_indices=validate_indices)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1359, in gather
    validate_indices=validate_indices, name=name)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/dorottyahuszti/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): indices[19,5] = 76 is not in [0, 76)
	 [[Node: embedding_lookup = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, _class=["loc:@Variable"], validate_indices=true, _device="/job:localhost/replica:0/task:0/cpu:0"](Variable/read, _recv_encoder_inputs_0)]]
