# Get P100 GPU

In [0]:
!nvidia-smi

Fri Apr  3 11:02:25 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   63C    P0    42W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

# Get the data

## Load all the required Libraries

In [0]:
import torchtext
from torchtext.data import Field, BucketIterator, Iterator
from torchtext.datasets import TranslationDataset
from collections import defaultdict
from allennlp.training.learning_rate_schedulers.noam import NoamLR

from datetime import datetime
import pytz
import os
import torch
import copy
import time
import math

import torch.nn as nn 
import torch.nn.functional as F
import dill as pickle
import numpy as np
from torch.autograd import Variable

import numpy as np

## Mount Drive

In [0]:
# link to google drive
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [0]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

## Read data into memory

In [0]:
dir_path = '/content/drive/My Drive/Spring-20/11-747/11-747 Project/de-en/data/fairseq_data'
src = Field(fix_length=100)
trg = Field(init_token = "<sos>", eos_token = "<eos>", fix_length=100)

In [0]:
def data_loader(mode):
    data = TranslationDataset(
        path=dir_path + f"/{mode}", exts=('.de', '.en'),
        fields=(src, trg))

    iterator = None
    if mode == "train" or mode == "dev":
        if mode == "train":
          src.build_vocab(data)
          trg.build_vocab(data)

        iterator = BucketIterator(dataset=data, batch_size=128,
            sort_key=lambda x: data.interleave_keys(len(x.src), len(x.trg)))
    
    else:
        iterator = Iterator(dataset=data, batch_size=64, train=False, 
                            shuffle=False, sort=False)
    
    return iterator

In [0]:
# Train dataloader
train_iter = data_loader("train")

# Validation dataloader
val_iter = data_loader("dev")

# Test dataloader
test_iter = data_loader("test")

## Model Parameters



In [0]:
class Configuration(object):
  def __init__(self, source, target):
    self.src_data = source
    self.trg_data = target
    self.src_lang = 'de'
    self.trg_lang = 'en'
    self.src_pad = src.vocab.stoi['<pad>']
    self.trg_pad = trg.vocab.stoi['<pad>']
    self.epochs = 50
    self.n_layers = 6
    self.heads = 8
    self.dropout = 0.1
    self.printevery = 100
    self.lr = 5e-4
    self.emb_dim = 512
    self.ff_hsize = 1024
    self.max_strlen = 100
    self.checkpoint = 0
    self.device = 0
    self.clip_norm = 0.0
    self.k = 5
    self.max_len = 100
    self.factor=2
    self.warmup=4000
    self.length_penalty = 0.7


opt = Configuration(src, trg)

# My Transformer Model

## Embedder

In [0]:
class Embedder(nn.Module):
    def __init__(self, vocab_size, d_model):
        super().__init__()
        self.d_model = d_model
        self.embed = nn.Embedding(vocab_size, d_model)
    def forward(self, x):
        return self.embed(x)

In [0]:
class PositionalEncoder(nn.Module):
    def __init__(self, opt, max_seq_len=100):
        super().__init__()
        self.dropout = nn.Dropout(p=opt.dropout)
        self.dim = opt.emb_dim
        pe = torch.zeros(max_seq_len, self.dim)
        position = torch.arange(0, max_seq_len, dtype=torch.float).unsqueeze(1)

        # 1000 ^ (2i / dmodel) = e ^ (2i) * -log(1000)
        div_term = torch.exp(torch.arange(0, self.dim, 2).float() \
                            * (-math.log(10000.0) / self.dim))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        pe = pe.unsqueeze(0)

        self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        x = x * math.sqrt(self.dim)
        pe = Variable(self.pe[:,:x.size(1)], requires_grad=False)
        pe = pe.to(device)
        x = x + pe
        return self.dropout(x)

## Sublayers

In [0]:
class Norm(nn.Module):
    def __init__(self, d_model, eps=1e-5):
        super().__init__()
    
        self.size = d_model
        
        self.alpha = nn.Parameter(torch.ones(self.size))
        self.bias = nn.Parameter(torch.zeros(self.size))
        
        self.eps = eps
    
    def forward(self, x):
        norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) \
        / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
        return norm

def attention(q, k, v, d_k, mask, dropout): 
    scores = torch.matmul(q, k.transpose(-2, -1)) /  math.sqrt(d_k)   
    scores = dropout(F.softmax(scores.masked_fill(mask == 0, -1e9), dim=-1))
        
    output = torch.matmul(scores, v)
    return output

In [0]:
class MultiHeadAttention(nn.Module):
    def __init__(self, opt):
        super().__init__()
        
        self.d_model = opt.emb_dim
        self.d_k = int(self.d_model/ opt.heads)
        self.h = opt.heads
        
        self.q_linear = nn.Linear(self.d_model, self.d_model)
        self.v_linear = nn.Linear(self.d_model, self.d_model)
        self.k_linear = nn.Linear(self.d_model, self.d_model)
        
        self.dropout = nn.Dropout(opt.dropout)
        self.out = nn.Linear(self.d_model, self.d_model)
    
    def forward(self, q, k, v, mask):
        k = self.k_linear(k).view(q.size(0), -1, self.h, self.d_k).transpose(1,2)
        q = self.q_linear(q).view(q.size(0), -1, self.h, self.d_k).transpose(1,2)
        v = self.v_linear(v).view(q.size(0), -1, self.h, self.d_k).transpose(1,2)

        mask = mask.unsqueeze(1)
        scores = attention(q, k, v, self.d_k, mask, self.dropout)
        concat = scores.transpose(1,2).contiguous() \
                .view(q.size(0), -1, self.d_model)
        output = self.out(concat)
    
        return output

In [0]:
class FeedForward(nn.Module):
    def __init__(self, opt):
        super().__init__() 

        linear_1 = nn.Linear(opt.emb_dim, opt.ff_hsize)
        dropout = nn.Dropout(opt.dropout)
        linear_2 = nn.Linear(opt.ff_hsize, opt.emb_dim)

        self.layers = nn.Sequential(linear_1, nn.ReLU(), dropout, linear_2)
    
    def forward(self, x):
        self.layers(x)
        return x

## Layers

In [0]:
class EncoderLayer(nn.Module):
    def __init__(self, opt):
        super().__init__()
        self.norm_1 = Norm(opt.emb_dim)
        self.norm_2 = Norm(opt.emb_dim)

        self.dropout_1 = nn.Dropout(opt.dropout)
        self.dropout_2 = nn.Dropout(opt.dropout)
        
        self.attn = MultiHeadAttention(opt)
        
        self.ff = FeedForward(opt)

        self.d = math.sqrt(opt.emb_dim // opt.heads)
        
        
    def forward(self, x, mask):
        '''
        This implementation follows the Tensor2Tensor implementation
        instead of the original paper "Attention is all you need"
        The Norm is applied to the input first, then self attention
        is applied to the sub-layer.
        '''

        x = self.norm_1(x)
        x1 = x + self.dropout_1(self.attn(x, x, x, mask))

        x1 = self.norm_2(x1)
        x2 = x1 + self.dropout_2(self.ff(x1))

        return x2

In [0]:
class DecoderLayer(nn.Module):
    def __init__(self, opt):
        super().__init__()
        self.norm_1 = Norm(opt.emb_dim)
        self.norm_2 = Norm(opt.emb_dim)
        self.norm_3 = Norm(opt.emb_dim)
        
        self.dropout_1 = nn.Dropout(opt.dropout)
        self.dropout_2 = nn.Dropout(opt.dropout)
        self.dropout_3 = nn.Dropout(opt.dropout)

        self.attn_1 = MultiHeadAttention(opt)
        
        self.attn_2 = MultiHeadAttention(opt)
        
        self.ff = FeedForward(opt)

        self.d = math.sqrt(opt.emb_dim // opt.heads)

    def forward(self, x, e_outputs, src_mask, trg_mask):
        '''
        This implementation follows the Tensor2Tensor implementation
        instead of the original paper "Attention is all you need"
        The Norm is applied to the input first, then self attention
        is applied to the sub-layer.
        '''
        x = self.norm_1(x)
        x1 = x + self.dropout_1(self.attn_1(x, x, x, trg_mask))

        x1 = self.norm_2(x1)
        x2 = x1 + self.dropout_2(self.attn_2(x1, 
                                             e_outputs, 
                                             e_outputs,
                                             src_mask))

        x2 = self.norm_3(x2)
        x3 = x2 + self.dropout_3(self.ff(x2))

        return x3

## Masks

There are two type of masks,

1.   To mask out the padded input sequence to prevent attention score being applied to them.
2.   To mask out the future time steps while decoding



In [0]:
def nopeak_mask(size, opt):
    np_mask = torch.triu(torch.ones((1, size, size)), diagonal=1)
    np_mask =  Variable(np_mask == 0)
    return np_mask

def create_masks(src, trg, opt):
    
    src_mask = (src != opt.src_pad).unsqueeze(-2)

    if trg is not None:
        trg_mask = (trg != opt.trg_pad).unsqueeze(-2)
        size = trg.size(1)
        np_mask = nopeak_mask(size, opt)
        np_mask = np_mask.to(device)
        trg_mask = trg_mask & np_mask
    else:
        trg_mask = None
        
    return src_mask, trg_mask

## Transformer Model

In [0]:
def get_clones(module, N):
    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])

class Encoder(nn.Module):
    def __init__(self, vocab_size, opt):
        super().__init__()
        self.N = opt.n_layers
        self.embed = Embedder(vocab_size, opt.emb_dim)
        self.pe = PositionalEncoder(opt)
        self.layers = get_clones(EncoderLayer(opt), self.N)
        self.norm = Norm(opt.emb_dim)

    def forward(self, src, mask):
        x = self.embed(src)
        x = self.pe(x)

        for i in range(self.N):
            x = self.layers[i](x, mask)

        return self.norm(x)
    
class Decoder(nn.Module):
    def __init__(self, vocab_size, opt):
        super().__init__()
        self.N = opt.n_layers
        self.embed = Embedder(vocab_size, opt.emb_dim)
        self.pe = PositionalEncoder(opt)
        self.layers = get_clones(DecoderLayer(opt), self.N)
        self.norm = Norm(opt.emb_dim)

    def forward(self, trg, e_outputs, src_mask, trg_mask):
        x = self.embed(trg)
        x = self.pe(x)

        for i in range(self.N):
            x = self.layers[i](x, e_outputs, src_mask, trg_mask)

        return self.norm(x)

class Transformer(nn.Module):
    def __init__(self, s_len, t_len, opt):
        super().__init__()
        assert opt.emb_dim % opt.heads == 0
        assert opt.dropout < 1

        self.opt = opt

        self.encoder = Encoder(s_len, self.opt)
        self.decoder = Decoder(t_len, self.opt)
        self.out = nn.Linear(opt.emb_dim, t_len)

    def forward(self, src_seq, trg_seq, src_mask, trg_mask):
        e_outputs = self.encoder(src_seq, src_mask)
        d_output = self.decoder(trg_seq, e_outputs, src_mask, trg_mask)
        output = self.out(d_output)
        return output

    def decode(self, decoder_input, encoder_output, src_mask, trg_mask):
        return self.out(model.decoder(decoder_input,
                                      encoder_output, 
                                      src_mask, 
                                      trg_mask))

def init_model_params(model):    
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model

# Optimizer

In [0]:
def get_std_opt(model, optimizer):
    return NoamLR(optimizer,
                  opt.emb_dim,
                  opt.warmup,
                  opt.factor)

## Decoder and Beam Search

In [0]:
class BeamSearch():

    def __init__(self, src, trg, model, opt):
        self.src = src
        self.trg = trg
        self.model = model
        self.opt = opt
        self.init_tok = trg.vocab.stoi['<sos>']


    def beam_search(self, encoded_seq, src_mask):
        assert encoded_seq.shape[0] == opt.k
        assert encoded_seq.shape[1] == opt.max_len

        eos_tok = self.trg.vocab.stoi['<eos>']
        outputs = torch.LongTensor([[self.init_tok]]).to(device)
        ind = None

        for i in range(1, self.opt.max_len):
            trg_mask = nopeak_mask(i, self.opt).to(device)

            if i == 1:
                out = self.model.decode(outputs, 
                                        encoded_seq[0], 
                                        src_mask, 
                                        trg_mask)
            else:
                out = self.model.decode(outputs[:, :i], 
                                        encoded_seq, 
                                        src_mask, 
                                        trg_mask)
            out = F.softmax(out, dim=-1)

            probs, ix = out[:, -1].data.topk(self.opt.k)

            if i == 1:
                log_scores = torch.Tensor([math.log(prob) \
                                           for prob in probs.data[0]]) \
                                           .unsqueeze(0)
                outputs = torch.zeros(opt.k, opt.max_len).long().to(device)

                outputs[:, 0] = self.init_tok
                outputs[:, 1] = ix[0]

                continue

            log_probs = torch.Tensor([math.log(p) \
                                        for p in probs.data.view(-1)]) \
                                    .view(opt.k, -1) \
                                    + log_scores.transpose(0,1)

            k_probs, k_ix = log_probs.view(-1).topk(opt.k)

            outputs[:, :i] = outputs[k_ix // opt.k, :i]
            outputs[:, i] = ix[k_ix // opt.k, k_ix % opt.k]

            log_scores = k_probs.unsqueeze(0)

            sentence_lengths = torch.zeros(len(outputs), 
                                            dtype=torch.long).to(device)

            for vec in (outputs==eos_tok).nonzero():
                i = vec[0]
                if sentence_lengths[i]==0:
                    sentence_lengths[i] = vec[1]

            num_finished_sentences = len([s for s in sentence_lengths \
                                            if s > 0])
            
            if num_finished_sentences == opt.k:
                alpha = opt.length_penalty
                div = 1/(sentence_lengths.type_as(log_scores)**alpha)
                _, ind = torch.max(log_scores * div, 1)
                ind = ind.data[0]
                break
        
        if ind is None:
            length = (outputs[0]==eos_tok).nonzero()   
            return [self.trg.vocab.itos[tok] for tok in outputs[0]]
        else:
            length = (outputs[ind]==eos_tok).nonzero()[0]
            return [self.trg.vocab.itos[tok] for tok in outputs[ind][1:length]]

    def encode_batch(self, test_batch):
        src_mask = (test_batch != self.src.vocab.stoi['<pad>']).unsqueeze(-2)
        encoded_batch = self.model.encoder(test_batch, src_mask)
        return encoded_batch, src_mask.to(device)

# Generate Translation and Evaluate Performance

In [0]:
def translate_sentence(test_batch, b_search, opt): 
    final_output = []
    encoded_batch, src_masks = b_search.encode_batch(test_batch)

    for i, (encoded_seq, src_mask) in enumerate(zip(encoded_batch, src_masks)):
        encoded_seqs =  torch.zeros(opt.k, 
                                    encoded_seq.size(-2), 
                                    encoded_seq.size(-1))
        encoded_seqs[:, :] = encoded_seq[0]
        encoded_seqs = encoded_seqs.to(device)

        sentence = b_search.beam_search(encoded_seqs, src_mask)
        final_output.append(sentence)

        del encoded_seqs
    
    del encoded_batch
    del src_masks

    return final_output

# Train the model

In [0]:
class Translator():
    def __init__(self, model, opt):
        self.opt = opt
        self.model = model

    def train(self, check_path):
        print("training model...")
        opt = self.opt
        model = self.model

        model.train()
        start = time.time()

        best_acc = []
        step = 0

        for epoch in range(opt.epochs):  
            total_loss = 0

            for i, batch in enumerate(train_iter):
                src_seq = batch.src.transpose(0,1)
                trg_seq = batch.trg.transpose(0,1)

                src_seq, trg_seq = src_seq.to(device), trg_seq.to(device)
                trg_input = trg_seq[:, :-1].to(device)
                
                src_mask, trg_mask = create_masks(src_seq, trg_input, opt)
                
                src_mask = src_mask.to(device)
                trg_mask = trg_mask.to(device)

                preds = model(src_seq, trg_input, src_mask, trg_mask)

                ys = trg_seq[:, 1:].contiguous().view(-1)
                opt.optimizer.zero_grad()

                loss = F.cross_entropy(preds.view(-1, preds.size(-1)), 
                                       ys, 
                                       ignore_index=opt.trg_pad)
                loss.backward()

                opt.optimizer.step()
                opt.scheduler.step_batch(step)

                total_loss += loss.item()
                step +=1
                
                if (i + 1) % opt.printevery == 0:
                    p = int(100 * (i + 1) / len(train_iter))
                    print(f"total loss: {total_loss}")
                    avg_loss = total_loss/opt.printevery
                    elapsed_time = int((time.time() - start)//60)
                    print(f"{elapsed_time}m | epoch {epoch} | ",
                          f"{p}% | loss = {avg_loss}")
                    total_loss = 0

                del src_seq
                del trg_seq
                del src_mask
                del trg_mask
    
            val_loss = self.eval(model)
            self.best_checkpoints(best_acc, val_loss, epoch, check_path)
            self.save_checkpoint(epoch, model, opt, check_path)

            print(f"{(time.time() - start)//60}m:  | epoch {epoch + 1}  100% \
                    loss = {avg_loss:.3f}\nepoch {epoch + 1} | complete, \
                    loss = {avg_loss:.3f}, val_loss = {val_loss:0.3f}")
            
    def eval(self, model):
        model = self.model
        model.eval()
        total_loss = 0

        for i, batch in enumerate(val_iter):
            with torch.no_grad():

                src = batch.src.transpose(0,1)
                trg = batch.trg.transpose(0,1)
                src, trg = src.to(device), trg.to(device)
                trg_input = trg[:, :-1]

                src_mask, trg_mask = create_masks(src, trg_input, opt)
                
                src_mask = src_mask.to(device)
                trg_mask = trg_mask.to(device)

                preds = model(src, trg_input, src_mask, trg_mask)
                ys = trg[:, 1:].contiguous().view(-1)

                loss = F.cross_entropy(preds.view(-1, preds.size(-1)), 
                                    ys, ignore_index=opt.trg_pad)
                
                total_loss += loss.item()

                del src
                del trg
                del src_mask
                del trg_mask

        val_loss = total_loss / len(val_iter)
        model.train()
        return val_loss

    def test(self):
        print("testing model...")
        self.model.eval() 
        b_search = BeamSearch(src, trg, self.model, self.opt)
                
        for i, batch in enumerate(test_iter):
            test_batch = batch.src.transpose(0,1).to(device)
            output = translate_sentence(test_batch, b_search, opt)
            append_to_list(output)

    def best_checkpoints(self, best_acc, val_loss, epoch, check_path):
        best_acc.append((epoch, val_loss))

        with open(os.path.join(check_path, "stats.txt"), "a+") as fl:
            size = 5 if len(best_acc) > 4 else len(best_acc)
            best = sorted(best_acc, key=s_key)[:size]
            indices = [str(a[0]) for a in best]
            pt = " ".join(indices)
            fl.write(f"epoch_num: {epoch}, val_loss: {val_loss}, \
                    top 5 checkpoints: {pt}\n")
            fl.write("====\n")
            fl.close()

        return

    def make_checkpoint_dir(self, path):
        d = datetime.now()
        EST = pytz.timezone('US/Eastern')
        d = d.astimezone(EST)
        fd = str(d.strftime("afternorm-%d-%H_%M_%S"))

        check_path = os.path.join(path, fd)

        try:
            os.mkdir(check_path)
        except OSError:
            print("Creation of the directory %s failed" % check_path)
        else:
            print("Successfully created the directory %s " % check_path)
        return check_path

    
    def save_checkpoint(self, epoch, model, opt, check_path):
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': opt.optimizer.state_dict(),
            }, 
            os.path.join(check_path, 
                         'transformer_'  + str(epoch) + '_model.pth'))
        
    def load_checkpoint(self, checkpoint_path, model):
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        opt.optimizer.load_state_dict(
            checkpoint['optimizer_state_dict'])
        model = model.to(self.opt.device)
        self.model = model
        
def s_key(lst):
    return lst[1]

## Declare the Training Model

In [0]:
mode = "train"
saved_checkpoint_path = "/content/drive/My Drive/Spring-20/11-747/11-747 Project/checkpoints/afternorm-01-02_25_12/transformer_5_model.pth"
check_dir = '/content/drive/My Drive/Spring-20/11-747/11-747 Project/checkpoints'

if __name__ == '__main__':

    transformer = Transformer(len(src.vocab), len(trg.vocab), opt)
    model = init_model_params(transformer)
    model = model.to(device)

    opt.optimizer = torch.optim.Adam(model.parameters(), 
                                     lr=opt.lr, 
                                     betas=(0.9, 0.98), 
                                     weight_decay=0.0001, 
                                     eps=1e-9)
    
    opt.scheduler = get_std_opt(model, opt.optimizer)

    translator = Translator(model, opt)

    if mode == "train":
        check_path = translator.make_checkpoint_dir(check_dir)
        translator.train(check_path)
    elif mode == "test":
        translator.load_checkpoint(saved_checkpoint_path, model)
        translator.test()


    

Successfully created the directory /content/drive/My Drive/Spring-20/11-747/11-747 Project/checkpoints/afternorm-03-07_02_59 
training model...
total loss: 784.813802242279
0m | epoch 0 |  7% | loss = 7.84813802242279
total loss: 673.3436665534973
1m | epoch 0 |  15% | loss = 6.733436665534973
total loss: 623.1265358924866
2m | epoch 0 |  23% | loss = 6.231265358924865
total loss: 607.5639386177063
3m | epoch 0 |  31% | loss = 6.075639386177063
total loss: 576.9185843467712
4m | epoch 0 |  39% | loss = 5.769185843467713
total loss: 545.6001148223877
5m | epoch 0 |  47% | loss = 5.456001148223877
total loss: 523.3435544967651
6m | epoch 0 |  55% | loss = 5.2334355449676515
total loss: 504.4624056816101
7m | epoch 0 |  63% | loss = 5.044624056816101
total loss: 492.979220867157
8m | epoch 0 |  71% | loss = 4.92979220867157
total loss: 480.15998554229736
9m | epoch 0 |  79% | loss = 4.801599855422974
total loss: 469.4180254936218
10m | epoch 0 |  87% | loss = 4.694180254936218
total loss:

In [0]:
import nltk
from nltk.translate.bleu_score import SmoothingFunction
chencherry = SmoothingFunction()

def calculate_bleu_scores(output, gold_output):
    for out, gout in zip(output, gold_output):
        total.append(nltk.translate.bleu_score.sentence_bleu([gout], out, smoothing_function=chencherry.method0))
    print(np.mean(total))


def append_to_list(output):
    print(output)
    for out in output:
        line = ' '.join(out)\
                .replace('@@ ', '')\
                .replace('<sos>', '')\
                .replace('<eos>', '')\
                .replace('<unk>', '')
        hypothesis.append(line)


In [0]:
hypothesis = []
test_model(model, opt)

In [0]:
hypothesis[:5]

NameError: ignored

In [0]:
pkl_path = '/content/drive/My Drive/Spring-20/11-747/11-747 Project/checkpoints/28-02_23_43-fairseq'

with open(pkl_path + '/hypothesis_fq.txt', 'w') as hypothesis_writer:
    for x in hypothesis:
        hypothesis_writer.write('%s\n' % x)

FileNotFoundError: ignored

In [0]:
goutput = []
with open(pkl_path + '/test.en.txt', 'r') as real_output:
    for x in real_output:
        x = x.strip('\n')\
                .replace('@@ ', '')\
                .replace('<sos>', '')\
                .replace('<eos>', '')\
                .replace('<unk>', '')
        goutput.append(x)

In [0]:
goutput[:5]

['you know , one of the intense pleasures of travel and one of the delights of ethnographic research is the opportunity to live amongst those who have not forgotten the old ways , who still feel their past in the wind , touch it in stones polished by rain , taste it in the bitter leaves of plants .',
 'just to know that jaguar shamans still journey beyond the milky way , or the myths of the inuit elders still resonate with meaning , or that in the himalaya , the buddhists still pursue the breath of the dharma , is to really remember the central revelation of anthropology , and that is the idea that the world in which we live does not exist in some absolute sense , but is just one model of reality , the consequence of one particular set of adaptive choices that our lineage made , albeit successfully , many generations ago .',
 'and of course , we all share the same adaptive imperatives .',
 'we &apos;re all born . we all bring our children into the world .',
 'we go through initiation r

In [0]:
nltk.translate.bleu_score.corpus_bleu(goutput, hypothesis, smoothing_function=chencherry.method0)

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


0.6716043329073803

In [0]:
import nltk
from nltk.translate.bleu_score import corpus_bleu

In [0]:
hypothesis = open("hypothesis_unsorted.txt").readlines()
hypothesis = [x.strip('\n').split(' ') for x in hypothesis]

reference = open("test.txt").readlines()
reference = [[x.strip('\n').split(' ')] for x in reference]