In [None]:
!wget http://www.manythings.org/anki/cmn-eng.zip
!unzip -d ./cmn-eng cmn-eng.zip

--2023-10-28 14:13:39--  http://www.manythings.org/anki/cmn-eng.zip
Resolving www.manythings.org (www.manythings.org)... 173.254.30.110
Connecting to www.manythings.org (www.manythings.org)|173.254.30.110|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1278411 (1.2M) [application/zip]
Saving to: ‘cmn-eng.zip’


2023-10-28 14:13:39 (58.9 MB/s) - ‘cmn-eng.zip’ saved [1278411/1278411]

Archive:  cmn-eng.zip
  inflating: ./cmn-eng/cmn.txt       
  inflating: ./cmn-eng/_about.txt    


In [27]:
import torch
print(torch.__version__)

2.1.0+cu118


AttributeError: ignored

In [1]:
seed = 2020

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import time
import math
import random

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# 'Hi.\t嗨。\tCC-BY 2.0 (France) Attribution: tatoeba.org #538123 (CM) & #891077 (Martha)'
with open('./cmn_tw.txt', 'r', encoding='utf-8') as f:
    data = f.read()
data = data.strip()
data = data.split('\n')
print('樣本數:\n', len(data))
print('\n樣本示例:')
data[0]

樣本數:
 29513

樣本示例:


'Hi.\t嗨'

In [5]:
# 分割英文數據和中文數據
en_data = [line.split('\t')[0] for line in data]
ch_data = [line.split('\t')[1] for line in data]
print('英文數據:\n', en_data[:10])
print('\n中文數據:\n', ch_data[:10])

英文數據:
 ['Hi.', 'Hi.', 'Run.', 'Stop', 'Wait', 'Wait', 'Begin.', 'Hello', 'I try.', 'I won']

中文數據:
 ['嗨', '你好', '跑步', '停止', '等待', '等一下', '開始', '你好', '我嘗試一下', '我贏了']


In [6]:
# 按字符级切割，並添加<eos>
en_token_list = [[char for char in line]+["<eos>"] for line in en_data]
ch_token_list = [[char for char in line]+["<eos>"] for line in ch_data]
print('英文數據:\n', en_token_list[:2])
print('\n中文數據:\n', ch_token_list[:2])

英文數據:
 [['H', 'i', '.', '<eos>'], ['H', 'i', '.', '<eos>']]

中文數據:
 [['嗨', '<eos>'], ['你', '好', '<eos>']]


In [7]:
# 基本字典
basic_dict = {'<pad>':0, '<unk>':1, '<bos>':2, '<eos>':3}
# 分別生成中英文字典
en_vocab = set(''.join(en_data))
en2id = {char:i+len(basic_dict) for i, char in enumerate(en_vocab)}
en2id.update(basic_dict)
id2en = {v:k for k,v in en2id.items()}

# 分別生成中英文字典
ch_vocab = set(''.join(ch_data))
ch2id = {char:i+len(basic_dict) for i, char in enumerate(ch_vocab)}
ch2id.update(basic_dict)
id2ch = {v:k for k,v in ch2id.items()}

In [8]:
# 利用字典，對應數據
en_num_data = [[en2id[en] for en in line ] for line in en_token_list]
ch_num_data = [[ch2id[ch] for ch in line] for line in ch_token_list]

print('char:', en_data[1])
print('index:', en_num_data[1])

char: Hi.
index: [63, 71, 57, 3]


## **表示為Dataset**

In [9]:
class TranslationDataset(Dataset):
    def __init__(self, src_data, trg_data):
        self.src_data = src_data
        self.trg_data = trg_data

        assert len(src_data) == len(trg_data), \
            "numbers of src_data  and trg_data must be equal!"

    def __len__(self):
        return len(self.src_data)

    def __getitem__(self, idx):
        src_sample =self.src_data[idx]
        src_len = len(self.src_data[idx])
        trg_sample = self.trg_data[idx]
        trg_len = len(self.trg_data[idx])
        return {"src": src_sample, "src_len": src_len, "trg": trg_sample, "trg_len": trg_len}

In [10]:
def padding_batch(batch):
    """
    input: -> list of dict
        [{'src': [1, 2, 3], 'trg': [1, 2, 3]}, {'src': [1, 2, 2, 3], 'trg': [1, 2, 2, 3]}]
    output: -> dict of tensor
        {
            "src": [[1, 2, 3, 0], [1, 2, 2, 3]].T
            "trg": [[1, 2, 3, 0], [1, 2, 2, 3]].T
        }
    """
    src_lens = [d["src_len"] for d in batch]
    trg_lens = [d["trg_len"] for d in batch]

    src_max = max([d["src_len"] for d in batch])
    trg_max = max([d["trg_len"] for d in batch])
    for d in batch:
        d["src"].extend([en2id["<pad>"]]*(src_max-d["src_len"]))
        d["trg"].extend([ch2id["<pad>"]]*(trg_max-d["trg_len"]))
    srcs = torch.tensor([pair["src"] for pair in batch], dtype=torch.long, device=device)
    trgs = torch.tensor([pair["trg"] for pair in batch], dtype=torch.long, device=device)

    batch = {"src":srcs.T, "src_len":src_lens, "trg":trgs.T, "trg_len":trg_lens}
    return batch

In [11]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout=0.5, bidirectional=True):
        super(Encoder, self).__init__()

        self.hid_dim = hid_dim
        self.n_layers = n_layers

        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.gru = nn.GRU(emb_dim, hid_dim, n_layers, dropout=dropout, bidirectional=bidirectional)

    def forward(self, input_seqs, input_lengths, hidden):
        # input_seqs = [seq_len, batch]
        embedded = self.embedding(input_seqs)
        # embedded = [seq_len, batch, embed_dim]
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths, enforce_sorted=False)

        outputs, hidden = self.gru(packed, hidden)
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        # outputs = [seq_len, batch, hid_dim * n directions]
        # output_lengths = [batch]
        return outputs, hidden

In [12]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout=0.5, bidirectional=True):
        super(Decoder, self).__init__()

        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers

        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.gru = nn.GRU(emb_dim, hid_dim, n_layers, dropout=dropout, bidirectional=bidirectional)

        if bidirectional:
            self.fc_out = nn.Linear(hid_dim*2, output_dim)
        else:
            self.fc_out = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, token_inputs, hidden):
        # token_inputs = [batch]
        batch_size = token_inputs.size(0)
        embedded = self.dropout(self.embedding(token_inputs).view(1, batch_size, -1))
        # embedded = [1, batch, emb_dim]

        output, hidden = self.gru(embedded, hidden)
        # output = [1, batch,  n_directions * hid_dim]
        # hidden = [n_layers * n_directions, batch, hid_dim]

        output = self.fc_out(output.squeeze(0))
        output = self.softmax(output)
        # output = [batch, output_dim]
        return output, hidden

In [13]:
class Seq2Seq(nn.Module):
    def __init__(self,
                 encoder,
                 decoder,
                 device,
                 predict=False,
                 basic_dict=None,
                 max_len=100
                 ):
        super(Seq2Seq, self).__init__()

        self.device = device

        self.encoder = encoder
        self.decoder = decoder

        self.predict = predict  # 訓練階段還是預測階段
        self.basic_dict = basic_dict  # decoder的字典，存放特殊token對應的id
        self.max_len = max_len  # 翻譯時最大輸出長度

        self.enc_n_layers = self.encoder.gru.num_layers
        self.enc_n_directions = 2 if self.encoder.gru.bidirectional else 1
        self.dec_n_directions = 2 if self.decoder.gru.bidirectional else 1

        assert encoder.hid_dim == decoder.hid_dim, \
            "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder and decoder must have equal number of layers!"
        assert self.enc_n_directions >= self.dec_n_directions, \
            "If decoder is bidirectional, encoder must be bidirectional either!"

    def forward(self, input_batches, input_lengths, target_batches=None, target_lengths=None, teacher_forcing_ratio=0.5):
        # input_batches = target_batches = [seq_len, batch]
        batch_size = input_batches.size(1)

        BOS_token = self.basic_dict["<bos>"]
        EOS_token = self.basic_dict["<eos>"]
        PAD_token = self.basic_dict["<pad>"]

        # 初始化
        encoder_hidden = torch.zeros(self.enc_n_layers*self.enc_n_directions, batch_size, self.encoder.hid_dim, device=self.device)

        # encoder_output = [seq_len, batch, hid_dim * n directions]
        # encoder_hidden = [n_layers*n_directions, batch, hid_dim]
        encoder_output, encoder_hidden = self.encoder(
            input_batches, input_lengths, encoder_hidden)

        # 初始化
        decoder_input = torch.tensor([BOS_token] * batch_size, dtype=torch.long, device=self.device)
        if self.enc_n_directions == self.dec_n_directions:
            decoder_hidden = encoder_hidden
        else:
            L = encoder_hidden.size(0)
            decoder_hidden = encoder_hidden[range(0, L, 2)] + encoder_hidden[range(1, L, 2)]

        if self.predict:
            # 預測階段使用
            # 一次只輸入一句話
            assert batch_size == 1, "batch_size of predict phase must be 1!"
            output_tokens = []

            while True:
                decoder_output, decoder_hidden = self.decoder(
                    decoder_input, decoder_hidden
                )
                # [1, 1]
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(1)  # 上一個預測作為下一個輸入
                output_token = topi.squeeze().detach().item()
                if output_token == EOS_token or len(output_tokens) == self.max_len:
                    break
                output_tokens.append(output_token)
            return output_tokens

        else:
            # 訓練階段
            max_target_length = max(target_lengths)
            all_decoder_outputs = torch.zeros((max_target_length, batch_size, self.decoder.output_dim), device=self.device)

            for t in range(max_target_length):
                use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
                if use_teacher_forcing:
                    # decoder_output = [batch, output_dim]
                    # decoder_hidden = [n_layers*n_directions, batch, hid_dim]
                    decoder_output, decoder_hidden = self.decoder(
                        decoder_input, decoder_hidden
                    )
                    all_decoder_outputs[t] = decoder_output
                    decoder_input = target_batches[t]  # 下一個輸入來自訓練數據
                else:
                    decoder_output, decoder_hidden = self.decoder(
                        decoder_input, decoder_hidden
                    )
                    # [batch, 1]
                    topv, topi = decoder_output.topk(1)
                    all_decoder_outputs[t] = decoder_output
                    decoder_input = topi.squeeze(1)  # 下一輸入來自模型預測

            loss_fn = nn.NLLLoss(ignore_index=PAD_token)
            loss = loss_fn(
                all_decoder_outputs.reshape(-1,self.decoder.output_dim ),  # [batch*seq_len, output_dim]
                target_batches.reshape(-1)                                                 # [batch*seq_len]
            )
            return loss

### **訓練和預測程式碼**

In [14]:
# 計算執行時間
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [15]:
def train(
    model,
    data_loader,
    optimizer,
    clip=1,
    teacher_forcing_ratio=0.5,
    print_every=None  # None不列印
    ):
    model.predict = False
    model.train()

    if print_every == 0:
        print_every = 1

    print_loss_total = 0  # 每次列印都重置
    start = time.time()
    epoch_loss = 0
    for i, batch in enumerate(data_loader):

        # shape = [seq_len, batch]
        input_batchs = batch["src"]
        target_batchs = batch["trg"]
        # list
        input_lens = batch["src_len"]
        target_lens = batch["trg_len"]

        optimizer.zero_grad()

        loss = model(input_batchs, input_lens, target_batchs, target_lens, teacher_forcing_ratio)
        print_loss_total += loss.item()
        epoch_loss += loss.item()
        loss.backward()

        # 梯度裁剪
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        if print_every and (i+1) % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('\tCurrent Loss: %.4f' % print_loss_avg)

    return epoch_loss / len(data_loader)

In [16]:
def evaluate(
    model,
    data_loader,
    print_every=None
    ):
    model.predict = False
    model.eval()
    if print_every == 0:
        print_every = 1

    print_loss_total = 0  # 每次打印都重置
    start = time.time()
    epoch_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(data_loader):

            # shape = [seq_len, batch]
            input_batchs = batch["src"]
            target_batchs = batch["trg"]
            # list
            input_lens = batch["src_len"]
            target_lens = batch["trg_len"]

            loss = model(input_batchs, input_lens, target_batchs, target_lens, teacher_forcing_ratio=0)
            print_loss_total += loss.item()
            epoch_loss += loss.item()

            if print_every and (i+1) % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('\tCurrent Loss: %.4f' % print_loss_avg)

    return epoch_loss / len(data_loader)

In [17]:
def translate(
    model,
    sample,
    idx2token=None
    ):
    model.predict = True
    model.eval()

    # shape = [seq_len, 1]
    input_batch = sample["src"]
    # list
    input_len = sample["src_len"]

    output_tokens = model(input_batch, input_len)
    output_tokens = [idx2token[t] for t in output_tokens]

    return "".join(output_tokens)

In [18]:
INPUT_DIM = len(en2id)
OUTPUT_DIM = len(ch2id)

# 超參數
BATCH_SIZE = 32
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
LEARNING_RATE = 1e-4
N_EPOCHS = 100
CLIP = 1

bidirectional = True
enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT, bidirectional)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT, bidirectional)
model = Seq2Seq(enc, dec, device, basic_dict=basic_dict).to(device)

## encoder和encoder設置相同的學習策略
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
# ## encoder和encoder設置不同的學習策略
# optimizer_grouped_parameters = [
#         {'params': [p for n, p in model.named_parameters() if 'encoder' in n], 'lr': LEARNING_RATE},
#         {'params': [p for n, p in model.named_parameters() if 'decoder' in n], 'lr': LEARNING_RATE*2}
# ]
# optimizer = optim.Adam(optimizer_grouped_parameters)

In [19]:
# 數據集
train_set = TranslationDataset(en_num_data, ch_num_data)
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, collate_fn=padding_batch)

In [21]:
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    train_loss = train(model, train_loader, optimizer, CLIP)
    valid_loss = evaluate(model, train_loader)
    end_time = time.time()

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'en2ch-model.pt')

    if epoch %2 == 0:
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}')

Epoch: 01 | Time: 1m 4s
	Train Loss: 5.184 | Val. Loss: 5.662
Epoch: 03 | Time: 1m 4s
	Train Loss: 4.229 | Val. Loss: 4.977
Epoch: 05 | Time: 1m 4s
	Train Loss: 3.654 | Val. Loss: 4.385
Epoch: 07 | Time: 1m 4s
	Train Loss: 3.210 | Val. Loss: 3.903
Epoch: 09 | Time: 1m 4s
	Train Loss: 2.868 | Val. Loss: 3.529
Epoch: 11 | Time: 1m 4s
	Train Loss: 2.576 | Val. Loss: 3.219
Epoch: 13 | Time: 1m 4s
	Train Loss: 2.346 | Val. Loss: 2.965
Epoch: 15 | Time: 1m 4s
	Train Loss: 2.143 | Val. Loss: 2.775
Epoch: 17 | Time: 1m 4s
	Train Loss: 1.976 | Val. Loss: 2.544
Epoch: 19 | Time: 1m 3s
	Train Loss: 1.802 | Val. Loss: 2.324
Epoch: 21 | Time: 1m 3s
	Train Loss: 1.645 | Val. Loss: 2.168
Epoch: 23 | Time: 1m 3s
	Train Loss: 1.524 | Val. Loss: 2.037
Epoch: 25 | Time: 1m 3s
	Train Loss: 1.396 | Val. Loss: 1.907
Epoch: 27 | Time: 1m 3s
	Train Loss: 1.292 | Val. Loss: 1.727
Epoch: 29 | Time: 1m 3s
	Train Loss: 1.190 | Val. Loss: 1.573
Epoch: 31 | Time: 1m 3s
	Train Loss: 1.104 | Val. Loss: 1.446
Epoch: 3

In [22]:
print("best valid loss：", best_valid_loss)
# 載入最優權重
model.load_state_dict(torch.load('/content/en2ch-model.pt'))

best valid loss： 0.1250642401937906


<All keys matched successfully>

In [23]:
random.seed(seed)
for i in random.sample(range(len(en_num_data)), 10):  # 隨機看10個
    en_tokens = list(filter(lambda x: x!=0, en_num_data[i]))  # 過濾零
    ch_tokens = list(filter(lambda x: x!=3 and x!=0, ch_num_data[i]))  # 和機器翻譯作對照
    sentence = [id2en[t] for t in en_tokens]
    print("【原文】")
    print("".join(sentence))
    translation = [id2ch[t] for t in ch_tokens]
    print("【原文】")
    print("".join(translation))
    test_sample = {}
    test_sample["src"] = torch.tensor(en_tokens, dtype=torch.long, device=device).reshape(-1, 1)
    test_sample["src_len"] = [len(en_tokens)]
    print("【機器翻譯】")
    print(translate(model, test_sample, id2ch), end="\n\n")

【原文】
My camera is different from yours.<eos>
【原文】
我的相機和你的不一樣
【機器翻譯】
我的相機和你的不一樣

【原文】
Monkeys can learn a lot of tricks.<eos>
【原文】
猴子可以學到很多技巧
【機器翻譯】
猴子可以學到很多技巧

【原文】
We want a new carpet.<eos>
【原文】
我們想要一塊新地毯
【機器翻譯】
我們想要一塊新地毯

【原文】
Do you mind if I turn off the light<eos>
【原文】
你介意我把燈關掉嗎
【機器翻譯】
你介意我把燈關掉嗎

【原文】
The family eats breakfast on the balcony.<eos>
【原文】
一家人在陽台上吃早餐
【機器翻譯】
一家人在陽台上吃早餐

【原文】
My mother likes tea very much.<eos>
【原文】
我媽媽非常喜歡喝茶
【機器翻譯】
我媽媽非常喜歡喝茶

【原文】
He is lacking in common sense.<eos>
【原文】
他缺乏常識
【機器翻譯】
他缺乏常識

【原文】
We immediately became friends.<eos>
【原文】
我們立即成為朋友
【機器翻譯】
我們立即成為朋友

【原文】
And what if someone sees us<eos>
【原文】
如果有人看到我們怎麼辦
【機器翻譯】
如果有人看到我們怎麼辦

【原文】
I've always kept my promises.<eos>
【原文】
我一直遵守諾言
【機器翻譯】
我一直遵守諾言



In [24]:
random.seed(seed*10)
for i in random.sample(range(len(en_num_data)), 10):  # 隨機看10個
    en_tokens = list(filter(lambda x: x!=0, en_num_data[i]))  # 過瀘零
    ch_tokens = list(filter(lambda x: x!=3 and x!=0, ch_num_data[i]))  # 和機器翻譯作對照
    sentence = [id2en[t] for t in en_tokens]
    print("【原文】")
    print("".join(sentence))
    translation = [id2ch[t] for t in ch_tokens]
    print("【原文】")
    print("".join(translation))
    test_sample = {}
    test_sample["src"] = torch.tensor(en_tokens, dtype=torch.long, device=device).reshape(-1, 1)
    test_sample["src_len"] = [len(en_tokens)]
    print("【機器翻譯】")
    print(translate(model, test_sample, id2ch), end="\n\n")

【原文】
Your CV has really impressed me.<eos>
【原文】
你的履歷給我留下了深刻的印象
【機器翻譯】
你的履歷給我留下了深刻的印象

【原文】
I'm exhausted.<eos>
【原文】
我很疲勞
【機器翻譯】
我很疲

【原文】
I wish I could figure out how to disable comments on my blog.<eos>
【原文】
我希望我能弄清楚如何停用我部落格上的評論
【機器翻譯】
我希望我能弄清楚如何停止我部落格的評論

【原文】
You're a good driver.<eos>
【原文】
你是個好司機
【機器翻譯】
你是個好司機

【原文】
I play volleyball a lot.<eos>
【原文】
我常打排球
【機器翻譯】
我常打排球

【原文】
Tom is old enough to go to school.<eos>
【原文】
湯姆已經到了可以上學的年紀了
【機器翻譯】
湯姆已經到了可以上學的年紀了

【原文】
Are you sure<eos>
【原文】
你確定
【機器翻譯】
你確定嗎

【原文】
I might have forgotten the key.<eos>
【原文】
我可能忘了鑰匙
【機器翻譯】
我可能忘了鑰匙

【原文】
I'd be crazy to do that.<eos>
【原文】
我會瘋掉那樣做的
【機器翻譯】
我會瘋掉那樣做的

【原文】
The bathtub was filled with hot water and flower petals.<eos>
【原文】
浴缸裡充滿了熱水和花瓣
【機器翻譯】
浴缸裡充滿了熱水和花瓣



In [25]:
random.seed(seed*20)
for i in random.sample(range(len(en_num_data)), 10):  # 隨機看10個
    en_tokens = list(filter(lambda x: x!=0, en_num_data[i]))  # 過瀘零
    ch_tokens = list(filter(lambda x: x!=3 and x!=0, ch_num_data[i]))  # 和機器翻譯作對照
    sentence = [id2en[t] for t in en_tokens]
    print("【原文】")
    print("".join(sentence))
    translation = [id2ch[t] for t in ch_tokens]
    print("【原文】")
    print("".join(translation))
    test_sample = {}
    test_sample["src"] = torch.tensor(en_tokens, dtype=torch.long, device=device).reshape(-1, 1)
    test_sample["src_len"] = [len(en_tokens)]
    print("【機器翻譯】")
    print(translate(model, test_sample, id2ch), end="\n\n")

【原文】
I'm lucky.<eos>
【原文】
我很幸運
【機器翻譯】
我很幸運

【原文】
You're just being ridiculous.<eos>
【原文】
你簡直是太可笑了
【機器翻譯】
你簡直是太可笑了

【原文】
You can tell us.<eos>
【原文】
你可以告訴我們
【機器翻譯】
你可以告訴我們

【原文】
A truck ran over our dog.<eos>
【原文】
一輛卡車輾過我們的狗
【機器翻譯】
一輛卡車輾過我們的狗

【原文】
I asked him to go there tomorrow.<eos>
【原文】
我請他明天去那裡
【機器翻譯】
我請他明天去那裡

【原文】
Let's eat sushi.<eos>
【原文】
我們吃壽司吧
【機器翻譯】
我們吃壽司吧

【原文】
In case the shipment is delayed, we have special delay insurance.<eos>
【原文】
如果發貨延誤，我們有特殊的延誤保險
【機器翻譯】
如果貨貨貨誤，我們有特殊的延誤保險

【原文】
His second son married and settled down.<eos>
【原文】
他的二兒子結婚並定居下來
【機器翻譯】
他的二兒子結婚並定居下來

【原文】
By the way, what's your address<eos>
【原文】
順便問一下，你的地址是
【機器翻譯】
順便問一下，你的地址是

【原文】
The board unanimously decided to appoint her as CEO.<eos>
【原文】
董事會一致決定任命她為執行長
【機器翻譯】
董事會一致決定任命為執執長長

