# Attention Ensemble - 단일 모델 결과의 진짜 다수결
---

### **Import Libraries**

In [178]:
import tensorflow as tf
from keras.models import Model
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split 

import unicodedata
import re
import numpy as np
import os
import io
import time
import random
import openpyxl

### **데이터 로드**

In [179]:
path_to_file_esb = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/spa-eng/spa_for_esb.txt'

### **데이터(문장) 전처리**

In [180]:
# 유니코드 파일을 아스키 코드 파일로 변환합니다.
def unicode_to_ascii(s):
  return ''.join(c for c in unicodedata.normalize('NFD', s)
      if unicodedata.category(c) != 'Mn')


def preprocess_sentence(w):
  w = unicode_to_ascii(w.lower().strip())

  # 단어와 단어 뒤에 오는 구두점(.)사이에 공백을 생성합니다.
  # 예시: "he is a boy." => "he is a boy ."
  # 참고:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
  w = re.sub(r"([?.!,¿])", r" \1 ", w)
  w = re.sub(r'[" "]+', " ", w)

  # (a-z, A-Z, ".", "?", "!", ",")을 제외한 모든 것을 공백으로 대체합니다.
  w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)

  w = w.strip()

  # 모델이 예측을 시작하거나 중단할 때를 알게 하기 위해서
  # 문장에 start와 end 토큰을 추가합니다.
  w = '<start> ' + w + ' <end>'
  return w

### **Dataset 생성**
1. 문장에 있는 억양을 제거합니다.
2. 불필요한 문자를 제거하여 문장을 정리합니다.
3. 다음과 같은 형식으로 문장의 쌍을 반환합니다: [영어, 스페인어]

In [181]:
def create_dataset(path, num_examples):
  lines = io.open(path, encoding='UTF-8').read().strip().split('\n')

  word_pairs = [[preprocess_sentence(w) for w in l.split('\t')]  for l in lines[:num_examples]]

  return zip(*word_pairs)

### **Language 가 들어오면 공백 단위로 토큰화**
- fit_on_texts(): 문자 데이터를 입력받아서 리스트의 형태로 변환
- texts_to_sequences: 텍스트 안의 단어들을 숫자 시퀀스로 출력
- pad_sequcences(tensor, padding='post') : 서로 다른 개수의 단어로 이루어진 문장을 같은 길이로 만들어주기 위해 패딩을 사용
  - padding = 'post' : [[ 0  0  0  5  3  2  4], [ 0  0  0  5  3  2  7],...,]
  - padding = 'pre' : 뒤 부터 패딩이 채워짐
  - 가장 긴 sequence 의 길이 만큼
  

In [182]:
def tokenize(lang):
  lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
      filters='')
  lang_tokenizer.fit_on_texts(lang)

  tensor = lang_tokenizer.texts_to_sequences(lang)

  tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
                                                         padding='post')

  return tensor, lang_tokenizer

### **전처리된 타겟 문장과 입력 문장 쌍을 생성**
- input_tensor : input 문장의 패딩 처리된 숫자 시퀀스
- inp_lang_tokenizer : input 문장을 공백 단위로 토큰화, 문자 -> 리스트 변환
- target_tensor, targ_lang_tokenizer : 위와 비슷


In [183]:
def load_dataset(path, num_examples=None):
  
  targ_lang, inp_lang = create_dataset(path, num_examples)

  input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
  target_tensor, targ_lang_tokenizer = tokenize(targ_lang)

  return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

### **언어 데이터셋 크기 제한**
- 언어 데이터셋을 아래의 크기로 제한하여 훈련과 검증을 수행
- inp_lang, targ_lang : 인풋,타겟 문장의 문자 -> 리스트 변환 결과
- max_length_targ, max_length_inp : 인풋, 타겟 문장의 '패딩된' 숫자 시퀀스 길이 -> 타겟 텐서와 입력 텐서의 최대 길이

In [184]:
# num_examples = 30000
num_examples = 60000
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(path_to_file_esb, num_examples)

max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]

### **데이터셋 (테스트 & 검증) 분리**

In [185]:
# 훈련 집합과 검증 집합을 80대 20으로 분리합니다.
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

# 훈련 집합과 검증 집합의 데이터 크기를 출력합니다.
print(len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val))

48000 48000 12000 12000


### 인덱스 -> 해당 word 로

```
Input Language; index to word mapping
1 ----> <start>
93 ----> tomas
27 ----> le
1063 ----> escribio
7 ----> a
120 ----> maria
3 ----> .
2 ----> <end>
```


```
Target Language; index to word mapping
1 ----> <start>
8 ----> tom
695 ----> wrote
6 ----> to
31 ----> mary
3 ----> .
2 ----> <end>
```



In [186]:
def convert(lang, tensor):
  for t in tensor:
    if t!=0:
      print ("%d ----> %s" % (t, lang.index_word[t]))

### **Buffer, Batch, epoch, embedding dimension, units 설정**
- Tokenizer 의 word_index 속성 : 속성은 단어와 숫자의 키-값 쌍을 포함하는 딕셔너리를 반환
- 따라서 vocab_inp_size, vocab_inp_size : 인풋, 타겟의 단어-숫자 딕셔너리 최대 길이 + 1 (?)
- dataset.batch(BATCH_SIZE, drop_remainder = True) : 배치사이즈 만큼 분할 후 남은 데이터를 drop 할 것인지 여부
- shuffle : 데이터셋 적절히 섞어준다.

In [187]:
BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor_train)//BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len(inp_lang.word_index)+1
vocab_tar_size = len(targ_lang.word_index)+1

# 훈련 집합에서만 shuffle, batch
dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [188]:
example_input_batch, example_target_batch = next(iter(dataset))
example_input_batch.shape, example_target_batch.shape

(TensorShape([64, 53]), TensorShape([64, 51]))

### **Encoder**


1.   초기화 : vocab_size(단어의 크기), embedding_dim(임베딩 차원 수), enc_units(인코더의 히든 사이즈), batch_sz(배치 사이즈)
  - embedding_dim : 단어 -> 임베딩 벡터로 하기 위한 차원 수
2.  call : gru 에 들어가 output, state 출력
3.  initialize_hidden_state : 맨 처음 gru에 들어가기 위한 더미 입력 값




In [189]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.enc_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

  def call(self, x, hidden):
    x = self.embedding(x)
    output, state = self.gru(x, initial_state = hidden)
    return output, state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_sz, self.enc_units))

### **Encoder 객체 생성**

In [190]:
# encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

### **Attention**


In [191]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    # 쿼리 은닉 상태(query hidden state)는 (batch_size, hidden size)쌍으로 이루어져 있습니다.
    # query_with_time_axis은 (batch_size, 1, hidden size)쌍으로 이루어져 있습니다.
    # values는 (batch_size, max_len, hidden size)쌍으로 이루어져 있습니다.
    # 스코어(score)계산을 위해 덧셈을 수행하고자 시간 축을 확장하여 아래의 과정을 수행합니다.
    query_with_time_axis = tf.expand_dims(query, 1)

    # score는 (batch_size, max_length, 1)쌍으로 이루어져 있습니다.
    # score를 self.V에 적용하기 때문에 마지막 축에 1을 얻습니다.
    # self.V에 적용하기 전에 텐서는 (batch_size, max_length, units)쌍으로 이루어져 있습니다.
    score = self.V(tf.nn.tanh(
        self.W1(query_with_time_axis) + self.W2(values)))

    # attention_weights는 (batch_size, max_length, 1)쌍으로 이루어져 있습니다. 
    attention_weights = tf.nn.softmax(score, axis=1)

    # 덧셈이후 컨텍스트 벡터(context_vector)는 (batch_size, hidden_size)쌍으로 이루어져 있습니다.
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

### **Decoder**


1.   초기화 : vocab_size(단어의 크기), embedding_dim(임베딩 차원 수), enc_units(인코더의 히든 사이즈), batch_sz(배치 사이즈)
2.   encoder 와의 차이점 : 마지막 fully_connected_layer(tf.keras.layers.Dense) 추가



In [192]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)

    # 어텐션을 사용합니다.
    self.attention = BahdanauAttention(self.dec_units)

  def call(self, x, hidden, enc_output):
    # enc_output는 (batch_size, max_length, hidden_size)쌍으로 이루어져 있습니다.
    context_vector, attention_weights = self.attention(hidden, enc_output)

    # 임베딩층을 통과한 후 x는 (batch_size, 1, embedding_dim)쌍으로 이루어져 있습니다.
    x = self.embedding(x)

    # 컨텍스트 벡터와 임베딩 결과를 결합한 이후 x의 형태는 (batch_size, 1, embedding_dim + hidden_size)쌍으로 이루어져 있습니다.
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # 위에서 결합된 벡터를 GRU에 전달합니다.
    output, state = self.gru(x)

    # output은 (batch_size * 1, hidden_size)쌍으로 이루어져 있습니다.
    output = tf.reshape(output, (-1, output.shape[2]))

    # output은 (batch_size, vocab)쌍으로 이루어져 있습니다.
    x = self.fc(output)

    # return x, state, attention_weights
    return x, state

### **Decoder 객체 생성**

In [193]:
# decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

### **NMT Model 생성**

In [194]:
class NMT_Model():
  def __init__(self):
    super(NMT_Model, self).__init__()
    self.encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
    self.decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

### **Ensemble Model 생성**

In [195]:
models = []
# num_models = 5
num_models = 15
for m in range(num_models):
  m = NMT_Model()
  models.append(m)

### **Loss Function & Optimizer**

In [196]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

### **Chekcpoint**
- 여기서 학습한 매개변수를 저장, optimizer/encoder/decoder

In [197]:
checkpoint_dir = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/training_checkpoints_esb'
checkpoint_dir_test = '/Users/ahjeong_park/Study/Attention-Ensemble-Translation/training_checkpoints_esb2'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoints = []

for m in range(num_models):
  checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=models[m].encoder,
                                 decoder=models[m].decoder)
  checkpoints.append(checkpoint)


In [198]:
# print(checkpoints)

### **Train_step**

In [199]:
# @tf.function
def train_step(model, inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:
    enc_output, enc_hidden = model.encoder(inp, enc_hidden)

    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

    # 교사 강요(teacher forcing) - 다음 입력으로 타겟을 피딩(feeding)합니다.
    for t in range(1, targ.shape[1]):
      # enc_output를 디코더에 전달합니다.
      predictions, dec_hidden = model.decoder(dec_input, dec_hidden, enc_output)
      # print('predictions', predictions.shape)

      loss += loss_function(targ[:, t], predictions)

      # 교사 강요(teacher forcing)를 사용합니다. -> 훈련에서는 실제 값을 이용
      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))
  variables = model.encoder.trainable_variables + model.decoder.trainable_variables
  gradients = tape.gradient(loss, variables)
  optimizer.apply_gradients(zip(gradients, variables))
  return batch_loss

### **문장 번역(스페인 -> 영어)** 

*   tf.keras.preprocessing.sequence.pad_sequences([inputs], maxlen='', padding='post') : 일정한 길이(maxlen)로 맞춰준다. (패딩은 뒤에서)
*   

  ```
  inp_lang.word_index :  {'<start>': 1, '<end>': 2, '.': 3, 'tom': 4, '?': 5...}
  ```

* tf.expand_dims: 차원을 늘려준다.




In [200]:
def evaluate(sentence):
    
    sentence = preprocess_sentence(sentence)
    
    # 문장, input 딕셔너리 출력 
    print ('sentence:', sentence)

    no_word = 'no word'

    input_list = []

    for j in range(num_models):
        inp = []
        for i in sentence.split(' '):
            if i in inp_lang.word_index:
                inp.append(inp_lang.word_index[i])
            else:
                print('no words!')
                return no_word, _, sentence
        input_list.append(inp)
    
    for input in range(len(input_list)):
        input_list[input] = tf.keras.preprocessing.sequence.pad_sequences([input_list[input]],
                                                             maxlen=max_length_inp,
                                                             padding='post')
        input_list[input] = tf.convert_to_tensor(input_list[input])
    
#     voting_result = ''
    result_list = [''] * 15
#     result_list = [''] * 5
    
    hiddens = []
    for i in range(num_models):
        hiddens.append([tf.zeros((1, units))])
        
    # enc_out, dec_hidden, dec_input 리스트 생성
    enc_outs = []
    enc_hiddens = []
    dec_hiddens = []
    dec_inputs = []
    
    # Encoder 의 hidden 을 Decoder 의 hidden으로 받는다.
    for i in range(num_models):
        eo, eh = models[i].encoder(input_list[i], hiddens[i])
        enc_outs.append(eo)
        enc_hiddens.append(eh)
        
    for i in range(num_models):
        dec_hiddens.append(enc_hiddens[i])
    
    # Decoder 의 시작인 '<start>' 
    for i in range(num_models):
        dec_inputs.append(tf.expand_dims([targ_lang.word_index['<start>']], 0))
        
    # Model Prediction List
    pred_num = 15
#     pred_num = 5
    suvi_models = models.copy()
    pred_softmax = []   # 모델 1~5 의 predictions 에서 가장 큰 소프트맥스 확률 값
    end_result = []   # 모델 1~5의 <end>가 나올때까지의 번역 리스트
    
    for i in range(num_models):
        pred_softmax.append([])
        
    # Target 의 최대 길이 만큼 출력
    for t in range(max_length_inp):
        pred_list = []   # 모델 1~5 의 predictions 가 저장되어 있는 리스트
        pred_ids = []    # 모델 1~5의 predictions 에서 가장 큰 값을 가지는 인덱스가 저장되어 있는 리스트
    
        for m in range(num_models):
            
            # 1. 모델(1~5)의 예측과 hidden decoder를 출력한다. --> predictions
            predictions, dec_hidden = suvi_models[m].decoder(dec_inputs[m],
                                                             dec_hiddens[m],
                                                             enc_outs[m])
            # 2. 모델(1~5)의 예측을 pred_list 리스트에 저장
            #    모델(1~5)의 hidden decoder을 dec_hiddens 리스트에 저장
            pred_list.append(predictions)
            dec_hiddens[m] = dec_hidden
            
            # 3. 모델(1~5)의 predictions 에서 가장 큰 softmax 값을 가진 인덱스 번호를 max_idx 에 저장한다.
            #    모델(1~5)에서 가장 높은 확률로 뽑힌 인덱스를 저장하는 리스트에 max_idx 를 추가한다.
            max_idx = tf.argmax(predictions[0]).numpy()
            pred_ids.append(max_idx)
            
            # 모델 (1~5)의 predictions 에서 가장 큰 softmax 확률 값(즉, 인덱스 max_idx에 해당하는 값)
            # 모델의 softmax 확률 값을 모아놓은 리스트에 저장한다.
            softmax = predictions[0][max_idx].numpy()
            pred_softmax[m].append(softmax)
        
        # 4. 각 모델의 번역 결과 word 저장
        for m in range(num_models):
            result_list[m] += targ_lang.index_word[pred_ids[m]] + ' '
        
        # 5. 번역 과정에서 <end> 가 나올 때 최종 번역 리스트에 추가
        for m in range(num_models):
            if (targ_lang.index_word[pred_ids[m]] == '<end>'):
                end_result.append(result_list[m])
        
        # 6. 모델 (1~5) 모두 <end> 가 나온다면 종료
        if len(end_result) == num_models:
            return end_result, pred_softmax, sentence
            
        # 7. 예측된 ID 를 모델에 다시 피드
        for m in range(num_models):
            dec_inputs[m] = tf.expand_dims([pred_ids[m]], 0)
            
    
    return end_result, pred_softmax, sentence

In [201]:
def translate(sentence):
    end_result, pred_softmax, sentence = evaluate(sentence)
    
    if end_result == 'no word':
        return end_result
    
    # 각 모델 번역의 softmax 평균 값
    pred_softmax = np.array(pred_softmax)
    softmax_avg = []
    
    for m in pred_softmax:
        softmax_avg.append(np.mean(m))

    # 각 모델 번역 결과 같은지/아닌지 그룹핑
    group = {}
    index = []
    for m in range(num_models):
        if end_result[m] in group:
            group[end_result[m]].append(m)
        else:
            index = [m]
            group[end_result[m]] = index
            
    # 그룹 별 소프트맥스 평균값의 합
    softmax_sum = {}

    for idx in group.values():
        sum = 0
        for m in idx:
            sum += softmax_avg[m]
        softmax_sum[tuple(idx)] = sum

#     print(group)
#     print(softmax_sum)

    # 가장 높은 소프트맥스합 값을 가진 번역 결과 출력
    real_maj = max(softmax_sum, key = softmax_sum.get)

    for key, value in group.items():
        if value == list(real_maj):
            print(key)
            return key

       


### **Checkpoint 복원**

In [202]:
ckp_dir_m1 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints'
ckp_dir_m2 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints2'
ckp_dir_m3 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints3'
ckp_dir_m4 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints4'
ckp_dir_m5 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints5' 
ckp_dir_m6 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints6'
ckp_dir_m7 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints7'
ckp_dir_m8 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints8'
ckp_dir_m9 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints9'
ckp_dir_m10 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints10'
ckp_dir_m11 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints11'
ckp_dir_m12 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints12'
ckp_dir_m13 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints13'
ckp_dir_m14 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints14'
ckp_dir_m15 = '/Users/ahjeong_park/Study/Survival-Ensemble/Checkpoint/training_checkpoints15' 

In [203]:
# checkpoint_dir내에 있는 최근 체크포인트(checkpoint)를 복원
checkpoints[0].restore(tf.train.latest_checkpoint(ckp_dir_m1))
checkpoints[1].restore(tf.train.latest_checkpoint(ckp_dir_m2))
checkpoints[2].restore(tf.train.latest_checkpoint(ckp_dir_m3))
checkpoints[3].restore(tf.train.latest_checkpoint(ckp_dir_m4))
checkpoints[4].restore(tf.train.latest_checkpoint(ckp_dir_m5))
checkpoints[5].restore(tf.train.latest_checkpoint(ckp_dir_m6))
checkpoints[6].restore(tf.train.latest_checkpoint(ckp_dir_m7))
checkpoints[7].restore(tf.train.latest_checkpoint(ckp_dir_m8))
checkpoints[8].restore(tf.train.latest_checkpoint(ckp_dir_m9))
checkpoints[9].restore(tf.train.latest_checkpoint(ckp_dir_m10))
checkpoints[10].restore(tf.train.latest_checkpoint(ckp_dir_m11))
checkpoints[11].restore(tf.train.latest_checkpoint(ckp_dir_m12))
checkpoints[12].restore(tf.train.latest_checkpoint(ckp_dir_m13))
checkpoints[13].restore(tf.train.latest_checkpoint(ckp_dir_m14))
checkpoints[14].restore(tf.train.latest_checkpoint(ckp_dir_m15))

### 이 코드로 했을 때 학습 바로 돌렸을 때와 같은 결과가 나왔음.

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fa623311c18>

### **번역 시작**

In [204]:
# translate(u'esta es mi vida.')  # this is my life

In [205]:
# translate(u'era casi imposible circular por esa calle.')

In [206]:
# translate('¿Tiene bebidas sin alcohol?') 

In [207]:
# test dataset 파일 불러오기
testdata = '/Users/ahjeong_park/Study/Survival-Ensemble/Dataset/test_data.txt'
start_row = 2

# 엑셀 파일 불러오기
wb = openpyxl.load_workbook('Survival_Translate.xlsx')

# 엑셀 파일의 시트 활성화
sheet1 = wb['real']

f = open(testdata, 'r')
lines = f.readlines()

for sentence in lines:
    result = translate(sentence)
    
    sheet1.cell(row = start_row, column = 4).value = result

    start_row += 1
    
f.close()
wb.save('Survival_Translate.xlsx')

sentence: <start> te dijeron lo que paso , ¿ no ? <end>
you said what happened here , isn t you ? <end> 
sentence: <start> ellos no estaban alla . <end>
they were not there . <end> 
sentence: <start> no me gusta ninguno de los chicos . <end>
i don t like any boys . <end> . <end> 
sentence: <start> jamas trabaje con el . <end>
i never worked with him . <end> 
sentence: <start> mi padre cultiva arroz . <end>
my father often rice . <end> 
sentence: <start> dime que hiciste en shounan . <end>
tell me what you did to shounan . <end> 
sentence: <start> ¿ cuantas palabras deberias escribir ? <end>
how many words should i like ? <end> 
sentence: <start> ¿ como te introdujiste en mi casa ? <end>
how did you get in my house ? <end> 
sentence: <start> mi madre hornea pan todas las mananas . <end>
my mother bakes bread every morning . <end> 
sentence: <start> yo he disfrutado el leer esta novela . <end>
i ve managed to read this novel . <end> 
sentence: <start> el estaba parado en la esquina . <en

i miss my friends . <end> 
sentence: <start> el me enseno a usar esta camara . <end>
he taught me how to use this camera . <end> 
sentence: <start> esta casa es demasiado chica para vivir . <end>
this house is too small to live . <end> 
sentence: <start> tom me dijo eso antes . <end>
tom told me that before . <end> 
sentence: <start> el salmon va rio arriba y pone sus huevos en la arena . <end>
the fence will crack by quickly . <end> 
sentence: <start> bebe mucha agua . <end>
drink a lot of water . <end> 
sentence: <start> no seas ridiculo ! <end>
don t be ridiculous ! <end> 
sentence: <start> estoy sin blanca . <end>
i m out of white . <end> 
sentence: <start> no nos importa lo que el haga . <end>
we don t care what he does . <end> 
sentence: <start> ¿ es hoy viernes ? <end>
is today friday ? <end> 
sentence: <start> dije que podia ir . <end>
i said i could go . <end> 
sentence: <start> ¿ a quien se lo diste ? <end>
who did you give it ? <end> 
sentence: <start> nunca se ha hecho ante

why is she so popular ? <end> 
sentence: <start> dame un pedazo de papel para escribir , por favor . <end>
give me a part of paper for me , please . <end> 
sentence: <start> para hawking , escribir este libro no fue facil . <end>
no words!
sentence: <start> no sabia que supieras conducir . <end>
i didn t know you to drive . <end> 
sentence: <start> casi todas las chicas son amables . <end>
almost all the girls are kind . <end> 
sentence: <start> llevare dos completos con mostaza y ketchup . <end>
no words!
sentence: <start> no creo que tom vaya a regresar . <end>
i don t think tom will come back . <end> 
sentence: <start> tom , ¿ que quieres de cenar ? <end>
tom , what do you want for dinner ? <end> 
sentence: <start> alrededor del ano a . c . , su gente se volvio contra roma . <end>
about the year . <end> 
sentence: <start> kenia solia ser una colonia inglesa . <end>
kenya used to be an english colony . <end> 
sentence: <start> ¿ que tal estuvo la fiesta de tom ? <end>
how was tom s p

is there space for another person ? <end> 
sentence: <start> quiero ayudarte con los deberes . <end>
i want to help you with my homework . <end> 
sentence: <start> no nos gustan nuestros vecinos , y a ellos tampoco les gustamos nosotros . <end>
we don t like our books and nobody likes us . <end> 
sentence: <start> para de hacer el vago y encuentra algo que hacer . <end>
stop the attack and make something to do . <end> 
sentence: <start> los pensamientos se expresan con palabras . <end>
no words!
sentence: <start> tom llamo a mary para decirle que podria necesitar su ayuda despues esa tarde . <end>
tom called mary to tell her she could need her help that afternoon . <end> 
sentence: <start> un extranjero me pregunto en donde queda la estacion . <end>
a foreigner asked me where the station . <end> . <end> 
sentence: <start> las polillas son atraidas por la luz . <end>
no words!
sentence: <start> deberiamos dejar descansar a los perros dormidos . <end>
we should keep the cats slept . <end

i didn t give you a present . <end> 
sentence: <start> ¿ como suena eso ? <end>
how important mean ? <end> ? <end> 
sentence: <start> esta noche vamos al cine . <end>
we re going to the movies . <end> 
sentence: <start> vimos una momia en el museo . <end>
no words!
sentence: <start> ¿ en que momento estudias ? <end>
what time do you study ? <end> 
sentence: <start> ellos me miraban en silencio . <end>
no words!
sentence: <start> este es un gran proyecto . <end>
this is a great plan . <end> 
sentence: <start> el sabe hablar japones . <end>
he can speak japanese . <end> 
sentence: <start> existe un grado de estres en todos los trabajos . <end>
draw a chatterbox of citizens . <end> . <end> 
sentence: <start> una catastrofe ha sido impedida . <end>
no words!
sentence: <start> no sabia que hacer entonces . <end>
i didn t know what to do then . <end> 
sentence: <start> me pregunto donde se estara escondiendo . <end>
i wonder where he is hiding . <end> 
sentence: <start> estoy demasiado borra

the train was an hour . <end> 
sentence: <start> tom nos engano . <end>
tom fooled us . <end> 
sentence: <start> el murio en el terremoto . <end>
he died in the earthquake . <end> 
sentence: <start> lamento mucho lo que dije . <end>
i m sorry for what i said . <end> 
sentence: <start> escribid con tinta . <end>
write it with ink . <end> 
sentence: <start> es muy relajante . <end>
no words!
sentence: <start> iremos . <end>
we ll go . <end> 
sentence: <start> no tenemos nada de que hablar . <end>
we have nothing to talk . <end> 
sentence: <start> mi hija quiere un gatito . <end>
my daughter wants a little . <end> 
sentence: <start> pense que habias dicho que no hacias ese tipo de cosas . <end>
i thought you said you didn t do that sort of thing . <end> 
sentence: <start> el tradujo el libro del frances al ingles . <end>
he translated the book from french . <end> 
sentence: <start> tenemos algo especial para usted , senor . <end>
we have something special for you , sir . <end> 
sentence: 

i am from shikoku . <end> 
sentence: <start> esto es lo mejor que se pone , chicos . <end>
this is the best thing ever pass . <end> 
sentence: <start> sabia que tom no era bueno . <end>
i knew tom wasn t good . <end> 
sentence: <start> manana voy a ir a verla . <end>
i m going to school tomorrow . <end> tomorrow . <end> 
sentence: <start> eres un doctor . <end>
you re a doctor . <end> 
sentence: <start> tomas me estuvo mensajeando . <end>
no words!
sentence: <start> sone que me estaba comiendo la tarta de bodas de mi nieta . <end>
i dreamed that i had my friend of my tennis . <end> 
sentence: <start> manana voy a boston . <end>
i m going to boston tomorrow . <end> 
sentence: <start> ¿ conoce tom a mary ? <end>
does tom know mary ? <end> 
sentence: <start> ¿ que libro necesita ? <end>
what book do you need ? <end> 
sentence: <start> somos profesores . <end>
we re teachers . <end> 
sentence: <start> yo soy el que recibio la paliza . <end>
i m the one who lay on . <end> . <end> 
sentence:

he is the least likely to come . <end> 
sentence: <start> pienso que es moralmente incorrecto comer personas . <end>
i think it is morally wrong to eat people . <end> 
sentence: <start> ellos viajaron juntos . <end>
no words!
sentence: <start> ellos atacaran . <end>
they ll attack . <end> 
sentence: <start> ella estaba avergonzada del comportamiento de sus ninos . <end>
she was ashamed of her children . <end> 
sentence: <start> ¿ que hay de nosotros ? <end>
what s on us ? <end> 
sentence: <start> el barco se hunde . <end>
the ship is sinking . <end> 
sentence: <start> esa caja es muy pesada para cargarla . <end>
no words!
sentence: <start> tom disparo primero . <end>
tom fired first . <end> 
sentence: <start> ahora da un paso atras . <end>
now a child . <end> 
sentence: <start> he estado estudiando ingles durante cinco anos . <end>
i ve been studying english for five years . <end> 
sentence: <start> ¿ donde te quitaste los guantes ? <end>
where did you take your gloves ? <end> 
sentenc

the train stopped at the station . <end> 
sentence: <start> puedo ser tu mejor amiga o tu peor enemiga . <end>
i can be your best friend or your matter . <end> . <end> 
sentence: <start> ellos quieren saber cual es tu plan . <end>
they want to know what your plan is . <end> 
sentence: <start> tom era muy atractivo cuando era joven . <end>
tom was very attractive when he was young . <end> 
sentence: <start> tom no tiene que pagar nada . <end>
tom doesn t have to pay anything . <end> 
sentence: <start> manten tus manos limpias . <end>
keep your hands clean . <end> 
sentence: <start> le pregunte sin andarme con rodeos . <end>
no words!
sentence: <start> no le hagas caso . <end>
don t make it . <end> 
sentence: <start> tom es un excelente jugador . <end>
tom is an excellent cricket . <end> 
sentence: <start> el tiene muchos sellos extranjeros , sin mencionar los japoneses . <end>
no words!
sentence: <start> no tengo idea de que significa esto . <end>
i have no idea what this means . <end> 

why can t you describe you ? <end> 
sentence: <start> me temo que no hay mucho que pueda hacer para ayudar . <end>
i m afraid there s not long to do to do . <end> 
sentence: <start> creo que estamos listos para ir alli . <end>
i think we re ready to go there . <end> 
sentence: <start> la tormenta no mostro senales de abatirse . <end>
no words!
sentence: <start> ya paso lo peor . <end>
it s already worse . <end> 
sentence: <start> quiero que entiendas lo que estoy tratando de decir . <end>
i want you to understand what i m trying to say . <end> 
sentence: <start> la chica habla muy bien ingles . <end>
the girl speaks very well . <end> 
sentence: <start> me gusta mas el invierno que el verano . <end>
i like the summer than summer . <end> 
sentence: <start> ¿ donde esta la clase de tom ? <end>
where s tom s class ? <end> 
sentence: <start> tuvimos suerte . <end>
we had lucky . <end> 
sentence: <start> tom tuvo problemas para resolver la situacion . <end>
tom had trouble accepting the situ

tom decided not to tell mary anything he had done . <end> 
sentence: <start> les di mil yenes a cada uno . <end>
i gave them a thousand yen to each other . <end> 
sentence: <start> pense que habias dicho que tom habia muerto . <end>
i thought you said tom had died . <end> 
sentence: <start> esta manana llegue tarde a la escuela . <end>
i was late for school this morning . <end> 
sentence: <start> el bebe esta llorando . <end>
the baby is crying . <end> 
sentence: <start> me quede atonito . <end>
i stayed up . <end> 
sentence: <start> me gustaria tomar al menos una botella mas de cerveza antes de ir a casa . <end>
i d like to take a few more than more than before . <end> 
sentence: <start> una batidora te permite mezclar comidas diferentes . <end>
no words!
sentence: <start> tom le regalo un diccionario de frances a mary . <end>
tom gave mary a french dictionary . <end> 
sentence: <start> caminaba a paso rapido . <end>
it was running quickly . <end> 
sentence: <start> realmente funciona

tom claimed that mary s son . <end> 
sentence: <start> los cables transmiten la electricidad . <end>
the cables sales . <end> 
sentence: <start> el rechazo su solicitud de un dia libre . <end>
he turned out his request of a day free . <end> 
sentence: <start> por favor , ¿ podria firmar este documento ? <end>
could you please sign this document ? <end> 
sentence: <start> he oido que eres una buena jugadora de tenis . <end>
no words!
sentence: <start> tomo vitaminas todos los dias . <end>
he gorged himself every day . <end> . <end> 
sentence: <start> salio de la habitacion . <end>
he left the room . <end> 
sentence: <start> ¿ tomas estaba aqui cuando paso ? <end>
was tom here when it happened ? <end> 
sentence: <start> pase toda la tarde tratando de encontrar mis llaves . <end>
i spent the whole afternoon trying to find my keys . <end> 
sentence: <start> solo queria decirte que me he encontrado con tom esta manana . <end>
i just wanted to say that i ve met tom this morning . <end> 
sent