# **Attention Ensemble  - Survival(SoftVoting)**
---

### **Import Libraries**

In [1137]:
import tensorflow as tf
from keras.models import Model
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split 

import unicodedata
import re
import numpy as np
import os
import io
import time
import random
import openpyxl

### **데이터 로드**

In [1138]:
path_to_file_esb = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/spa-eng/spa_for_esb.txt'

### **데이터(문장) 전처리**

In [1139]:
# 유니코드 파일을 아스키 코드 파일로 변환합니다.
def unicode_to_ascii(s):
  return ''.join(c for c in unicodedata.normalize('NFD', s)
      if unicodedata.category(c) != 'Mn')


def preprocess_sentence(w):
  w = unicode_to_ascii(w.lower().strip())

  # 단어와 단어 뒤에 오는 구두점(.)사이에 공백을 생성합니다.
  # 예시: "he is a boy." => "he is a boy ."
  # 참고:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
  w = re.sub(r"([?.!,¿])", r" \1 ", w)
  w = re.sub(r'[" "]+', " ", w)

  # (a-z, A-Z, ".", "?", "!", ",")을 제외한 모든 것을 공백으로 대체합니다.
  w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)

  w = w.strip()

  # 모델이 예측을 시작하거나 중단할 때를 알게 하기 위해서
  # 문장에 start와 end 토큰을 추가합니다.
  w = '<start> ' + w + ' <end>'
  return w

### **Dataset 생성**
1. 문장에 있는 억양을 제거합니다.
2. 불필요한 문자를 제거하여 문장을 정리합니다.
3. 다음과 같은 형식으로 문장의 쌍을 반환합니다: [영어, 스페인어]

In [1140]:
def create_dataset(path, num_examples):
  lines = io.open(path, encoding='UTF-8').read().strip().split('\n')

  word_pairs = [[preprocess_sentence(w) for w in l.split('\t')]  for l in lines[:num_examples]]

  return zip(*word_pairs)

### **Language 가 들어오면 공백 단위로 토큰화**
- fit_on_texts(): 문자 데이터를 입력받아서 리스트의 형태로 변환
- texts_to_sequences: 텍스트 안의 단어들을 숫자 시퀀스로 출력
- pad_sequcences(tensor, padding='post') : 서로 다른 개수의 단어로 이루어진 문장을 같은 길이로 만들어주기 위해 패딩을 사용
  - padding = 'post' : [[ 0  0  0  5  3  2  4], [ 0  0  0  5  3  2  7],...,]
  - padding = 'pre' : 뒤 부터 패딩이 채워짐
  - 가장 긴 sequence 의 길이 만큼
  

In [1141]:
def tokenize(lang):
  lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
      filters='')
  lang_tokenizer.fit_on_texts(lang)

  tensor = lang_tokenizer.texts_to_sequences(lang)

  tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
                                                         padding='post')

  return tensor, lang_tokenizer

### **전처리된 타겟 문장과 입력 문장 쌍을 생성**
- input_tensor : input 문장의 패딩 처리된 숫자 시퀀스
- inp_lang_tokenizer : input 문장을 공백 단위로 토큰화, 문자 -> 리스트 변환
- target_tensor, targ_lang_tokenizer : 위와 비슷


In [1142]:
def load_dataset(path, num_examples=None):
  
  targ_lang, inp_lang = create_dataset(path, num_examples)

  input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
  target_tensor, targ_lang_tokenizer = tokenize(targ_lang)

  return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

### **언어 데이터셋 크기 제한**
- 언어 데이터셋을 아래의 크기로 제한하여 훈련과 검증을 수행
- inp_lang, targ_lang : 인풋,타겟 문장의 문자 -> 리스트 변환 결과
- max_length_targ, max_length_inp : 인풋, 타겟 문장의 '패딩된' 숫자 시퀀스 길이 -> 타겟 텐서와 입력 텐서의 최대 길이

In [1143]:
# num_examples = 30000
num_examples = 60000
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(path_to_file_esb, num_examples)

max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]

### **데이터셋 (테스트 & 검증) 분리**

In [1144]:
# 훈련 집합과 검증 집합을 80대 20으로 분리합니다.
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

# 훈련 집합과 검증 집합의 데이터 크기를 출력합니다.
print(len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val))

48000 48000 12000 12000


### 인덱스 -> 해당 word 로

```
Input Language; index to word mapping
1 ----> <start>
93 ----> tomas
27 ----> le
1063 ----> escribio
7 ----> a
120 ----> maria
3 ----> .
2 ----> <end>
```


```
Target Language; index to word mapping
1 ----> <start>
8 ----> tom
695 ----> wrote
6 ----> to
31 ----> mary
3 ----> .
2 ----> <end>
```



In [1145]:
def convert(lang, tensor):
  for t in tensor:
    if t!=0:
      print ("%d ----> %s" % (t, lang.index_word[t]))

### **Buffer, Batch, epoch, embedding dimension, units 설정**
- Tokenizer 의 word_index 속성 : 속성은 단어와 숫자의 키-값 쌍을 포함하는 딕셔너리를 반환
- 따라서 vocab_inp_size, vocab_inp_size : 인풋, 타겟의 단어-숫자 딕셔너리 최대 길이 + 1 (?)
- dataset.batch(BATCH_SIZE, drop_remainder = True) : 배치사이즈 만큼 분할 후 남은 데이터를 drop 할 것인지 여부
- shuffle : 데이터셋 적절히 섞어준다.

In [1146]:
BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor_train)//BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len(inp_lang.word_index)+1
vocab_tar_size = len(targ_lang.word_index)+1

# 훈련 집합에서만 shuffle, batch
dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [1147]:
example_input_batch, example_target_batch = next(iter(dataset))
example_input_batch.shape, example_target_batch.shape

(TensorShape([64, 53]), TensorShape([64, 51]))

### **Encoder**


1.   초기화 : vocab_size(단어의 크기), embedding_dim(임베딩 차원 수), enc_units(인코더의 히든 사이즈), batch_sz(배치 사이즈)
  - embedding_dim : 단어 -> 임베딩 벡터로 하기 위한 차원 수
2.  call : gru 에 들어가 output, state 출력
3.  initialize_hidden_state : 맨 처음 gru에 들어가기 위한 더미 입력 값




In [1148]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.enc_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

  def call(self, x, hidden):
    x = self.embedding(x)
    output, state = self.gru(x, initial_state = hidden)
    return output, state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_sz, self.enc_units))

### **Encoder 객체 생성**

In [1149]:
# encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

### **Attention**


In [1150]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    # 쿼리 은닉 상태(query hidden state)는 (batch_size, hidden size)쌍으로 이루어져 있습니다.
    # query_with_time_axis은 (batch_size, 1, hidden size)쌍으로 이루어져 있습니다.
    # values는 (batch_size, max_len, hidden size)쌍으로 이루어져 있습니다.
    # 스코어(score)계산을 위해 덧셈을 수행하고자 시간 축을 확장하여 아래의 과정을 수행합니다.
    query_with_time_axis = tf.expand_dims(query, 1)

    # score는 (batch_size, max_length, 1)쌍으로 이루어져 있습니다.
    # score를 self.V에 적용하기 때문에 마지막 축에 1을 얻습니다.
    # self.V에 적용하기 전에 텐서는 (batch_size, max_length, units)쌍으로 이루어져 있습니다.
    score = self.V(tf.nn.tanh(
        self.W1(query_with_time_axis) + self.W2(values)))

    # attention_weights는 (batch_size, max_length, 1)쌍으로 이루어져 있습니다. 
    attention_weights = tf.nn.softmax(score, axis=1)

    # 덧셈이후 컨텍스트 벡터(context_vector)는 (batch_size, hidden_size)쌍으로 이루어져 있습니다.
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

### **Decoder**


1.   초기화 : vocab_size(단어의 크기), embedding_dim(임베딩 차원 수), enc_units(인코더의 히든 사이즈), batch_sz(배치 사이즈)
2.   encoder 와의 차이점 : 마지막 fully_connected_layer(tf.keras.layers.Dense) 추가



In [1151]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)

    # 어텐션을 사용합니다.
    self.attention = BahdanauAttention(self.dec_units)

  def call(self, x, hidden, enc_output):
    # enc_output는 (batch_size, max_length, hidden_size)쌍으로 이루어져 있습니다.
    context_vector, attention_weights = self.attention(hidden, enc_output)

    # 임베딩층을 통과한 후 x는 (batch_size, 1, embedding_dim)쌍으로 이루어져 있습니다.
    x = self.embedding(x)

    # 컨텍스트 벡터와 임베딩 결과를 결합한 이후 x의 형태는 (batch_size, 1, embedding_dim + hidden_size)쌍으로 이루어져 있습니다.
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # 위에서 결합된 벡터를 GRU에 전달합니다.
    output, state = self.gru(x)

    # output은 (batch_size * 1, hidden_size)쌍으로 이루어져 있습니다.
    output = tf.reshape(output, (-1, output.shape[2]))

    # output은 (batch_size, vocab)쌍으로 이루어져 있습니다.
    x = self.fc(output)

    # return x, state, attention_weights
    return x, state

### **Decoder 객체 생성**

In [1152]:
# decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

### **NMT Model 생성**

In [1153]:
class NMT_Model():
  def __init__(self):
    super(NMT_Model, self).__init__()
    self.encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
    self.decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

### **Ensemble Model 생성**

In [1154]:
models = []
num_models = 5
for m in range(num_models):
  m = NMT_Model()
  models.append(m)


### **Loss Function & Optimizer**

In [1155]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

### **Chekcpoint**
- 여기서 학습한 매개변수를 저장, optimizer/encoder/decoder

In [1156]:
checkpoint_dir = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/training_checkpoints_esb'
checkpoint_dir_test = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/training_checkpoints_esb2'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoints = []

for m in range(num_models):
  checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=models[m].encoder,
                                 decoder=models[m].decoder)
  checkpoints.append(checkpoint)


In [1157]:
print(checkpoints)

[<tensorflow.python.training.tracking.util.Checkpoint object at 0x7fe1f5c13d30>, <tensorflow.python.training.tracking.util.Checkpoint object at 0x7fe057b44d30>, <tensorflow.python.training.tracking.util.Checkpoint object at 0x7fe10a1af518>, <tensorflow.python.training.tracking.util.Checkpoint object at 0x7fe10a1af5f8>, <tensorflow.python.training.tracking.util.Checkpoint object at 0x7fe10a1af470>]


### **Train_step**

In [1158]:
# @tf.function
def train_step(model, inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:
    enc_output, enc_hidden = model.encoder(inp, enc_hidden)

    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

    # 교사 강요(teacher forcing) - 다음 입력으로 타겟을 피딩(feeding)합니다.
    for t in range(1, targ.shape[1]):
      # enc_output를 디코더에 전달합니다.
      predictions, dec_hidden = model.decoder(dec_input, dec_hidden, enc_output)
      # print('predictions', predictions.shape)

      loss += loss_function(targ[:, t], predictions)

      # 교사 강요(teacher forcing)를 사용합니다. -> 훈련에서는 실제 값을 이용
      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))
  variables = model.encoder.trainable_variables + model.decoder.trainable_variables
  gradients = tape.gradient(loss, variables)
  optimizer.apply_gradients(zip(gradients, variables))
  return batch_loss

### **학습**
- 학습 코드 X

### **문장 번역(스페인 -> 영어)** 

*   tf.keras.preprocessing.sequence.pad_sequences([inputs], maxlen='', padding='post') : 일정한 길이(maxlen)로 맞춰준다. (패딩은 뒤에서)
*   

  ```
  inp_lang.word_index :  {'<start>': 1, '<end>': 2, '.': 3, 'tom': 4, '?': 5...}
  ```

* tf.expand_dims: 차원을 늘려준다.




In [1159]:
def evaluate(sentence):


  sentence = preprocess_sentence(sentence)

  # 문장, input 딕셔너리 출력 
  print ('sentence:', sentence)

  no_word = 'no word'

  input_list = []
   
  for j in range(5):
    inp = []
    for i in sentence.split(' '):
        if i in inp_lang.word_index:
            inp.append(inp_lang.word_index[i])
        else:
            print('no words!')
            return no_word, sentence
    input_list.append(inp)
    

  for input in range(len(input_list)):
    input_list[input] = tf.keras.preprocessing.sequence.pad_sequences([input_list[input]],
                                                         maxlen=max_length_inp,
                                                         padding='post')
    input_list[input] = tf.convert_to_tensor(input_list[input])
    
  voting_result = ''
  
  hiddens = []
  for i in range(5):
    hiddens.append([tf.zeros((1, units))])

  # enc_out, dec_hidden, dec_input 리스트 생성
  enc_outs = []
  enc_hiddens = []
  dec_hiddens = [] 
  dec_inputs = []

  # Encoder 의 hidden 을 Decoder 의 hidden 으로 받는다.
  for i in range(5):
    eo, eh = models[i].encoder(input_list[i], hiddens[i])
    enc_outs.append(eo)
    enc_hiddens.append(eh)
    

  for i in range(5):
    dec_hiddens.append(enc_hiddens[i])


  # Decoder 의 시작인 '<start>' 
  for i in range(5):
    dec_inputs.append(tf.expand_dims([targ_lang.word_index['<start>']], 0))


  # Model Prediction List
  pred_list = []   # 모델 1~5 의 predictions 가 저장되어 있는 리스트
  pred_ids = []    # 모델 1~5의 predictions 에서 가장 큰 값을 가지는 인덱스가 저장되어 있는 리스트
  pred_num = 5     
  max_list = []    # 모델 1~5의 predictions 에서 가장 큰 값을 나타내는 리스트
  suvi_models = models.copy()

  # Target 의 최대 길이 만큼 출력
  for t in range(max_length_inp):
    if pred_num != 1:
        for pred in range(pred_num):
            predictions, dec_hidden = suvi_models[pred].decoder(dec_inputs[pred],
                                                             dec_hiddens[pred],
                                                             enc_outs[pred])
            pred_list.append(predictions)
            dec_hiddens[pred] = dec_hidden

            max_idx = tf.argmax(predictions[0]).numpy()
            pred_ids.append(max_idx)

            # 각 모델의 time step 에서 가장 큰 값 확인 (제일 작은 모델을 제외할 예정)
            # 각 모델의 예측에서 가장 큰 확률 값만 모아놓은 리스트
            max_list.append(predictions[0][max_idx])




        # 서바이벌 - 제일 나쁜 모델 탈락
        # 제일 낮은 확률을 가진 모델을 나타내는 인덱스 --> worst
        worst = max_list.index(min(max_list))

        # worst 모델을 목록에서 삭제
        del pred_list[worst]
        del suvi_models[worst]
        del dec_hiddens[worst]
        del enc_outs[worst]
#         print(worst, '번째 모델 탈락')


        # Soft Voting
        predictions_sum = 0
        for pred in pred_list:
            predictions_sum = tf.add(predictions_sum, pred[0])
        mean = tf.divide(predictions_sum, pred_num)
        voting_id = tf.argmax(mean).numpy()
#         voting_id = pred_ids[max_list.index(max(max_list))]

        pred_ids = []
        max_list = []
        pred_num -= 1

        voting_result += targ_lang.index_word[voting_id] + ' '

        if targ_lang.index_word[voting_id] == '<end>':
          return voting_result, sentence

        del dec_inputs[worst]
        # 예측된 ID를 모델에 다시 피드합니다. (voting_id)
        for i in range(pred_num):
            dec_inputs[i] = tf.expand_dims([voting_id], 0)
    else:

        predictions, dec_hiddens[0] = suvi_models[0].decoder(dec_inputs[0],
                                                             dec_hiddens[0],
                                                             enc_outs[0])
        predicted_id = tf.argmax(predictions[0]).numpy() 
        voting_result += targ_lang.index_word[predicted_id] + ' '
        
        if targ_lang.index_word[predicted_id] == '<end>':
          return voting_result, sentence
    
        # 예측된 ID를 모델에 다시 피드합니다.
        dec_inputs[0] = tf.expand_dims([predicted_id], 0)
        

  return voting_result, sentence

In [1160]:
def translate(sentence):
    voting_result, sentence = evaluate(sentence)
    
    
    print('Survival Voting_result: ', voting_result)
    
    return voting_result

#     # 엑셀 파일 불러오기
#     wb = openpyxl.load_workbook('BLEU_Score.xlsx')

#     # 엑셀 파일의 시트 활성화
#     sheet1 = wb['HardVoting(2)']
#     sheet2 = wb['HardVoting(2)']
#     sheet3 = wb['HardVoting(2)']
#     sheet4 = wb['HardVoting(2)']
#     sheet5 = wb['HardVoting(2)']

#     sheet1.cell(row = start_row, column = ).value = result1
#     sheet2.cell(row = start_row, column = ).value = result2
#     sheet3.cell(row = start_row, column = ).value = result3
#     sheet4.cell(row = start_row, column = ).value = result4
#     sheet5.cell(row = start_row, column = ).value = result5
        

#     wb.save('BLEU_Score.xlsx')


### **Checkpoint 복원**

In [1161]:
ckp_dir_m1 = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/5 Models Checkpoints_60000/training_checkpoints_esb'
ckp_dir_m2 = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/5 Models Checkpoints_60000/training_checkpoints_esb 2'
ckp_dir_m3 = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/5 Models Checkpoints_60000/training_checkpoints_esb 3'
ckp_dir_m4 = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/5 Models Checkpoints_60000/training_checkpoints_esb 4'
ckp_dir_m5 = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/5 Models Checkpoints_60000/training_checkpoints_esb 5'

In [1162]:
# checkpoint_dir내에 있는 최근 체크포인트(checkpoint)를 복원
checkpoints[0].restore(tf.train.latest_checkpoint(ckp_dir_m1))
checkpoints[1].restore(tf.train.latest_checkpoint(ckp_dir_m2))
checkpoints[2].restore(tf.train.latest_checkpoint(ckp_dir_m3))
checkpoints[3].restore(tf.train.latest_checkpoint(ckp_dir_m4))
checkpoints[4].restore(tf.train.latest_checkpoint(ckp_dir_m5))

### 이 코드로 했을 때 학습 바로 돌렸을 때와 같은 결과가 나왔음.

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fdc01c54f98>

### **번역 시작**

In [1163]:
# test dataset 파일 불러오기
testdata = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/BLEU/test_data.txt'
start_row = 2

# 엑셀 파일 불러오기
wb = openpyxl.load_workbook('Survival_Translate.xlsx')

# 엑셀 파일의 시트 활성화
sheet1 = wb['Survival(Soft)']

f = open(testdata, 'r')
lines = f.readlines()

for sentence in lines:
    result = translate(sentence)
    
    sheet1.cell(row = start_row, column = 4).value = result

    start_row += 1
    
f.close()
wb.save('Survival_Translate.xlsx')

sentence: <start> te dijeron lo que paso , ¿ no ? <end>
Survival Voting_result:  you were told told you , what time ? <end> 
sentence: <start> ellos no estaban alla . <end>
Survival Voting_result:  they were not not there . <end> 
sentence: <start> no me gusta ninguno de los chicos . <end>
Survival Voting_result:  i don don t like any boys . <end> 
sentence: <start> jamas trabaje con el . <end>
Survival Voting_result:  i never never work with him . <end> 
sentence: <start> mi padre cultiva arroz . <end>
Survival Voting_result:  my father father sometimes drinks . <end> 
sentence: <start> dime que hiciste en shounan . <end>
Survival Voting_result:  tell me me you did . <end> 
sentence: <start> ¿ cuantas palabras deberias escribir ? <end>
Survival Voting_result:  how many many many should you eat ? <end> 
sentence: <start> ¿ como te introdujiste en mi casa ? <end>
Survival Voting_result:  how how did did you take you my house ? <end> 
sentence: <start> mi madre hornea pan todas las manan

Survival Voting_result:  i would d like you to kick you a look . <end> 
sentence: <start> durante la guerra , la gente paso muchos apuros . <end>
Survival Voting_result:  for the war war , many people died . <end> 
sentence: <start> ¿ con quien desea hablar ? <end>
Survival Voting_result:  who do do you want to speak to ? <end> 
sentence: <start> ¿ son americanos ? <end>
Survival Voting_result:  are they they american ? <end> 
sentence: <start> tengo un hijo y una hija . mi hijo esta en nueva york y mi hija en londres . <end>
Survival Voting_result:  i have have a son and a daughter . <end> 
sentence: <start> llevo marcapasos . <end>
no words!
Survival Voting_result:  no word
sentence: <start> este trabajo es lo suficientemente simple para que lo haga un nino . <end>
Survival Voting_result:  this work is is difficult for a little girl is to make a child . <end> 
sentence: <start> nunca he ido al extranjero . <end>
Survival Voting_result:  i have never never been to the weather . <end> 

Survival Voting_result:  you are a a gossip . <end> 
sentence: <start> no creo que tom y mary sean hermanos . <end>
Survival Voting_result:  i don don t think tom and mary are brothers . <end> 
sentence: <start> ¿ conoces a un buen dentista ? <end>
Survival Voting_result:  do you you you a good doctor ? <end> 
sentence: <start> hice un trato con tom . <end>
Survival Voting_result:  i made made a deal with tom . <end> 
sentence: <start> tiene el nombre de tom en el . <end>
Survival Voting_result:  tom tom s s on in . <end> 
sentence: <start> yo no tengo ganas de hablar . <end>
Survival Voting_result:  i don don t feel like talking . <end> 
sentence: <start> estoy cansado de todo este fastidio . <end>
Survival Voting_result:  i am tired tired of all over this headache . <end> 
sentence: <start> tus manos estan frias . <end>
Survival Voting_result:  your hands are are right . <end> 
sentence: <start> prosigamos . <end>
Survival Voting_result:  let s go go . <end> 
sentence: <start> no fue

Survival Voting_result:  i will wait wait for four o clock . <end> 
sentence: <start> ¿ por que esta tan caliente ? <end>
Survival Voting_result:  why is is this hot ? <end> 
sentence: <start> ¿ cuando paso ? <end>
Survival Voting_result:  when did did did it end ? <end> 
sentence: <start> es dificil creer lo que dices . <end>
Survival Voting_result:  it is is difficult to say what you say . <end> 
sentence: <start> necesitas esto . <end>
Survival Voting_result:  you need this this . <end> 
sentence: <start> has bebido tres tazas de cafe . <end>
Survival Voting_result:  you have had had an apartment under coffee . <end> 
sentence: <start> ¿ somos amigas ? <end>
Survival Voting_result:  are we we we all friends ? <end> 
sentence: <start> ire aunque llueva . <end>
Survival Voting_result:  i will go go outside . <end> 
sentence: <start> tom esta jugando con sus juguetes . <end>
Survival Voting_result:  tom is is playing with his toys . <end> 
sentence: <start> los celos en una relacion a 

Survival Voting_result:  we don don t like our neighbors , and follow them . <end> 
sentence: <start> para de hacer el vago y encuentra algo que hacer . <end>
Survival Voting_result:  to make the make out and he is something to do . <end> 
sentence: <start> los pensamientos se expresan con palabras . <end>
no words!
Survival Voting_result:  no word
sentence: <start> tom llamo a mary para decirle que podria necesitar su ayuda despues esa tarde . <end>
Survival Voting_result:  tom called mary mary to tell her help her friends for that afternoon . <end> 
sentence: <start> un extranjero me pregunto en donde queda la estacion . <end>
Survival Voting_result:  a a little little asked me where to the station . <end> 
sentence: <start> las polillas son atraidas por la luz . <end>
no words!
Survival Voting_result:  no word
sentence: <start> deberiamos dejar descansar a los perros dormidos . <end>
Survival Voting_result:  we should should have the dogs began . <end> 
sentence: <start> no se si qu

Survival Voting_result:  i will will be back . <end> 
sentence: <start> nos perdimos en el bosque . <end>
Survival Voting_result:  we got lost got air in the forest . <end> 
sentence: <start> no tengo suficiente dinero para comprarlo . <end>
Survival Voting_result:  i do have no money to buy it . <end> 
sentence: <start> el cuarto mes se llama abril . <end>
Survival Voting_result:  the room next month . <end> 
sentence: <start> ella lo llama todas las noches y habla al menos una hora . <end>
Survival Voting_result:  she calls calls him every night and she every night . <end> 
sentence: <start> adondequiera que vayas , conoceras gente amable y generosa . <end>
Survival Voting_result:  wherever he he he ll come up , you guys make mistakes and it . <end> 
sentence: <start> dejad sitio ! <end>
Survival Voting_result:  quit quit ! ! <end> 
sentence: <start> ¿ por que me llamaste ? <end>
Survival Voting_result:  why did did you call me ? <end> 
sentence: <start> la mancha de tinta no se sale

Survival Voting_result:  i arrived at at the station at six . <end> 
sentence: <start> perdio a su padre cuando tenia tres anos . <end>
Survival Voting_result:  she lost lost his father when he was three . <end> 
sentence: <start> le gusto . <end>
Survival Voting_result:  i love love him . <end> 
sentence: <start> no podes escapar . <end>
Survival Voting_result:  you can can can escape . <end> 
sentence: <start> ellos originalmente eran granjeros . <end>
Survival Voting_result:  they they were were farmers . <end> 
sentence: <start> a pesar de las dificultades de idioma , todos nos hicimos amigos rapido . <end>
Survival Voting_result:  despite in the the most of the most of the most of the most of the other , we did together some times . <end> 
sentence: <start> me gustaria comprar una heladera . <end>
Survival Voting_result:  i would like like to buy a cookie . <end> 
sentence: <start> quiero cantar la cancion . <end>
Survival Voting_result:  i want to to the song . <end> 
sentence: <

Survival Voting_result:  tom has never never been in boston . <end> 
sentence: <start> te aseguro que es bastante innecesario . <end>
Survival Voting_result:  i assure you you are really irrelevant . <end> 
sentence: <start> esta muy orgulloso de su motocicleta personalizada . <end>
no words!
Survival Voting_result:  no word
sentence: <start> mi vista esta empeorando . <end>
Survival Voting_result:  my my grandfather grandfather at me . <end> 
sentence: <start> ¿ cuales son de tom ? <end>
Survival Voting_result:  what are tom s ? <end> 
sentence: <start> mi madre no puede venir . <end>
Survival Voting_result:  my mother mother can t come . <end> 
sentence: <start> canta , por favor . <end>
Survival Voting_result:  please sing , . <end> 
sentence: <start> perdi mis anteojos . <end>
Survival Voting_result:  i lost my my glasses . <end> 
sentence: <start> despues del almuerzo vimos television . <end>
Survival Voting_result:  after i lunch lunch . <end> 
sentence: <start> tom le pidio a ma

Survival Voting_result:  i dreamed i dreamed i was eating cake cake . <end> 
sentence: <start> manana voy a boston . <end>
Survival Voting_result:  tomorrow i i m going to boston tomorrow . <end> 
sentence: <start> ¿ conoce tom a mary ? <end>
Survival Voting_result:  did tom tom tom ? <end> 
sentence: <start> ¿ que libro necesita ? <end>
Survival Voting_result:  what what book do you need ? <end> 
sentence: <start> somos profesores . <end>
Survival Voting_result:  we are both both teachers . <end> 
sentence: <start> yo soy el que recibio la paliza . <end>
Survival Voting_result:  i am i i called on you . <end> 
sentence: <start> disculpeme . no pense que fuera su asiento . <end>
no words!
Survival Voting_result:  no word
sentence: <start> ella siempre luce palida . <end>
no words!
Survival Voting_result:  no word
sentence: <start> hay mucho que hacer . <end>
Survival Voting_result:  there is a a lot to do . <end> 
sentence: <start> ¿ estas enamorada de tom ? <end>
Survival Voting_resul

Survival Voting_result:  they saw saw a cat agency to england . <end> 
sentence: <start> ¿ has abierto esta puerta alguna vez ? <end>
Survival Voting_result:  have you open open the door before ? <end> 
sentence: <start> manana es feriado . <end>
Survival Voting_result:  tomorrow is is a holiday . <end> 
sentence: <start> ¿ me podrias llamar mas tarde por favor ? <end>
Survival Voting_result:  could you you you wake me later ? <end> 
sentence: <start> tienen vino . <end>
Survival Voting_result:  they have have wine . <end> 
sentence: <start> el resolvio cada problema . <end>
Survival Voting_result:  he solved the solved . <end> 
sentence: <start> a tomas se le da muy bien el ajedrez . <end>
Survival Voting_result:  tom is very very good at chess . <end> 
sentence: <start> no tengo nada para darte . <end>
Survival Voting_result:  i don do have nothing to give you . <end> 
sentence: <start> como ya era tarde , me fui a dormir . <end>
Survival Voting_result:  since it it it was late , i w

Survival Voting_result:  have you you ever sung in a train ? <end> 
sentence: <start> necesito estar aqui por otras cuatro horas . <end>
Survival Voting_result:  i need need to be here for another few hours . <end> 
sentence: <start> el nino lleva un murcielago bajo el brazo . <end>
Survival Voting_result:  the child child child under the arm . <end> 
sentence: <start> mas de un amigo mio no sabe nadar . <end>
Survival Voting_result:  a friend friend of no answer can . <end> 
sentence: <start> la anciana subio las escaleras con dificultad . <end>
Survival Voting_result:  the old old woman went to cover with her . <end> 
sentence: <start> ¿ devolviste el libro de tom ? <end>
Survival Voting_result:  did you tom tom s book ? <end> 
sentence: <start> no le digas a mi esposa , por favor . <end>
Survival Voting_result:  please don don t tell my wife . <end> 
sentence: <start> no puedo hacerlo sola . <end>
Survival Voting_result:  i can can do that alone . <end> 
sentence: <start> tom no cor

Survival Voting_result:  what do you do from australia ? <end> 
sentence: <start> las cobras estan siempre muy alerta . <end>
no words!
Survival Voting_result:  no word
sentence: <start> a el le gusta sentarse y jugar videojuegos todo el dia . <end>
Survival Voting_result:  he likes likes to sit down and then and then . <end> 
sentence: <start> suelo oirla tocar el piano . <end>
no words!
Survival Voting_result:  no word
sentence: <start> no recuerdo haberte visto desde hace dos anos . <end>
Survival Voting_result:  i don don t remember seeing since years ago . <end> 
sentence: <start> me gustaria que este carro sea reparado lo mas pronto posible . <end>
Survival Voting_result:  i would like like this car that it might be done it . <end> 
sentence: <start> sin la capa de ozono , estariamos en peligro . <end>
no words!
Survival Voting_result:  no word
sentence: <start> deberia haber ido a la reunion de ayer . <end>
Survival Voting_result:  i should ve ve gone to the meeting yesterday . 

Survival Voting_result:  it has already already done . <end> 
sentence: <start> quiero que entiendas lo que estoy tratando de decir . <end>
Survival Voting_result:  i want you want you to say what i m trying to say . <end> 
sentence: <start> la chica habla muy bien ingles . <end>
Survival Voting_result:  the girl girl girl very well . <end> 
sentence: <start> me gusta mas el invierno que el verano . <end>
Survival Voting_result:  i like like like summer or summer . <end> 
sentence: <start> ¿ donde esta la clase de tom ? <end>
Survival Voting_result:  where is tom s full ? <end> 
sentence: <start> tuvimos suerte . <end>
Survival Voting_result:  we had got lucky . <end> 
sentence: <start> tom tuvo problemas para resolver la situacion . <end>
Survival Voting_result:  tom had had trouble understanding situation . <end> 
sentence: <start> el auto de mi padre es hecho en italia . <end>
Survival Voting_result:  my father car car is done in italy . <end> 
sentence: <start> aunque a veces timid

Survival Voting_result:  some people were were lost their time . <end> 
sentence: <start> cristobal colon descubrio america . <end>
Survival Voting_result:  columbus columbus discovered came in north america . <end> 
sentence: <start> a finales de agosto , las fuerzas aliadas tomaron paris . <end>
no words!
Survival Voting_result:  no word
sentence: <start> vendre a verte el domingo a las tres de la tarde . <end>
Survival Voting_result:  i will come come to the sunday evening . <end> 
sentence: <start> quiero que seas mi socio . <end>
Survival Voting_result:  i want want you to be my advice . <end> 
sentence: <start> ella es amable . <end>
Survival Voting_result:  she is is is . <end> 
sentence: <start> no esta de acuerdo con su familia . <end>
Survival Voting_result:  he is doesn doesn t agree with his family . <end> 
sentence: <start> esta ocurriendo esta noche . <end>
Survival Voting_result:  this is is going tonight . <end> 
sentence: <start> deberias haber llamado a la policia . <

Survival Voting_result:  tomorrow , i will be tomorrow . <end> 
sentence: <start> el dinero no lo es todo , pero si no tienes dinero no puedes hacer nada . <end>
Survival Voting_result:  money isn isn t everything , but you don t have accomplished . <end> 
sentence: <start> ¿ que me aconsejas hacer ? <end>
Survival Voting_result:  what do do you advise me to do ? <end> 
sentence: <start> no deberias usar ropa ajustada si estas embarazada . <end>
Survival Voting_result:  you shouldn shouldn shouldn t wear clothes for these attention . <end> 
sentence: <start> los dos ninos empezaron a culparse el uno al otro . <end>
no words!
Survival Voting_result:  no word
sentence: <start> ella vivio alli durante muchos anos . <end>
Survival Voting_result:  she she lived lived for many years . <end> 
sentence: <start> el no es el tipo mas listo de la clase . <end>
Survival Voting_result:  he isn isn isn t the tallest one of the class . <end> 
sentence: <start> esta no sera la ultima vez . <end>
Survi

In [1164]:
# translate(u'esta es mi vida.')  # this is my life

sentence: <start> esta es mi vida . <end>
Survival Voting_result:  this is is my life . <end> 


'this is is my life . <end> '

In [1165]:
# translate(u'hace mucho frio aqui.')  # it s very cold here

sentence: <start> hace mucho frio aqui . <end>
Survival Voting_result:  it is very cold here . <end> 


'it is very cold here . <end> '

In [1166]:
# translate(u'¿todavia estan en casa?')  # Are you still at home?

sentence: <start> ¿ todavia estan en casa ? <end>
Survival Voting_result:  are you still still ? <end> 


'are you still still ? <end> '

In [1167]:
# translate(u'¿Con quién desea hablar?')  # Who do you want to talk to?

sentence: <start> ¿ con quien desea hablar ? <end>
Survival Voting_result:  who do do you want to speak to ? <end> 


'who do do you want to speak to ? <end> '