In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker

import os
import urllib
import re
import random
import json
from typing import List, Dict, Optional, Any, Tuple
import glob

import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
import pickle
from torch.nn import functional as F

from collections import OrderedDict, Counter

In [2]:
import tokenizers
#from tokenizers import Tokenizer
from tokenizers.pre_tokenizers import Whitespace
from tokenizers.pre_tokenizers import WhitespaceSplit
from tokenizers.pre_tokenizers import Punctuation

from tokenizers import normalizers
from tokenizers.normalizers import Lowercase, Strip, Replace, Sequence
from tokenizers.trainers import UnigramTrainer

In [3]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(1)

In [4]:
HEMINGWAY_DATA = './data/hemingway'
!mkdir -p $HEMINGWAY_DATA


In [5]:
#!pip install sacrebleu
!pip show sacrebleu

Name: sacrebleu
Version: 1.5.1
Summary: Hassle-free computation of shareable, comparable, and reproducible BLEU, chrF, and TER scores
Home-page: https://github.com/mjpost/sacrebleu
Author: Matt Post
Author-email: post@cs.jhu.edu
License: Apache License 2.0
Location: /home/guntis/anaconda3/envs/tw131/lib/python3.8/site-packages
Requires: portalocker
Required-by: 


In [6]:
# from google.colab import drive
# drive.mount('/content/drive')

#Text preprocessing

In [7]:
num_bpe_merges = 10000
vocab_size = 5500
joint_vocab_size = 2*vocab_size

!echo BPE_ops=$num_bpe_merges vocab_size=$vocab_size joint_vocab_size=$joint_vocab_size

BPE_ops=10000 vocab_size=5500 joint_vocab_size=11000


In [8]:
#!pip install subword-nmt
!pip show subword-nmt

Name: subword-nmt
Version: 0.3.7
Summary: Unsupervised Word Segmentation for Neural Machine Translation and Text Generation
Home-page: https://github.com/rsennrich/subword-nmt
Author: Rico Sennrich
Author-email: None
License: MIT
Location: /home/guntis/anaconda3/envs/tw131/lib/python3.8/site-packages
Requires: 
Required-by: 


In [9]:
# Read Hemingway texts from URL. There are Hemingway's "A Farewell to arms"
text_en = urllib.request.urlopen('http://www.ltn.lv/~guntis/translation_dataset/dataset_en_small.txt').read().decode("utf-8", "ignore")
text_lv = urllib.request.urlopen('http://www.ltn.lv/~guntis/translation_dataset/dataset_lv_small.txt').read().decode("utf-8-sig", "ignore")

HEMINGWAY_SRC_EN = f'{HEMINGWAY_DATA}/hemingway.en.txt'
HEMINGWAY_SRC_LV = f'{HEMINGWAY_DATA}/hemingway.lv.txt'

with open(HEMINGWAY_SRC_EN, 'w') as f:
    f.write(text_en)

with open(HEMINGWAY_SRC_LV, 'w') as f:
    f.write(text_lv)

In [10]:
# !git clone https://github.com/moses-smt/mosesdecoder.git

In [11]:
# Normalize and tokenize texts

!cat $HEMINGWAY_SRC_EN | mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l en \
  | mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l en > $HEMINGWAY_DATA/hemingway.en.tok.txt

!cat $HEMINGWAY_SRC_LV | mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l lv \
  | mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l lv > $HEMINGWAY_DATA/hemingway.lv.tok.txt

Tokenizer Version 1.1
Language: en
Number of threads: 1
Tokenizer Version 1.1
Language: lv
Number of threads: 1


In [12]:
# # Normalize and tokenize texts

# #!cat hemingway.en.txt | mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l en \
# !cat hemingway.en.txt \
#   | mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l en > hemingway.en.tok.txt

# # !cat hemingway.lv.txt | mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l lv \
# !cat hemingway.lv.txt \
#   | mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l lv > hemingway.lv.tok.txt

In [13]:
!mosesdecoder/scripts/recaser/train-truecaser.perl -corpus $HEMINGWAY_DATA/hemingway.en.tok.txt -model $HEMINGWAY_DATA/tc_model.en
!mosesdecoder/scripts/recaser/train-truecaser.perl -corpus $HEMINGWAY_DATA/hemingway.lv.tok.txt -model $HEMINGWAY_DATA/tc_model.lv

!mosesdecoder/scripts/recaser/truecase.perl -model $HEMINGWAY_DATA/tc_model.en < $HEMINGWAY_DATA/hemingway.en.tok.txt > $HEMINGWAY_DATA/hemingway.en.tc.txt
!mosesdecoder/scripts/recaser/truecase.perl -model $HEMINGWAY_DATA/tc_model.lv < $HEMINGWAY_DATA/hemingway.lv.tok.txt > $HEMINGWAY_DATA/hemingway.lv.tc.txt

In [14]:
# !subword-nmt learn-joint-bpe-and-vocab --input en.tc.txt lv.tc.txt -s 10000 -o tokens.txt --write-vocabulary token_freq.en.txt token_freq.lv.txt
!mkdir -p $HEMINGWAY_DATA/bpe 
!subword-nmt learn-joint-bpe-and-vocab --input $HEMINGWAY_DATA/hemingway.en.tc.txt -s $num_bpe_merges -o $HEMINGWAY_DATA/bpe/tokens.en --write-vocabulary $HEMINGWAY_DATA/bpe/token_freq.en
!subword-nmt learn-joint-bpe-and-vocab --input $HEMINGWAY_DATA/hemingway.lv.tc.txt -s $num_bpe_merges -o $HEMINGWAY_DATA/bpe/tokens.lv --write-vocabulary $HEMINGWAY_DATA/bpe/token_freq.lv

no pair has frequency >= 2. Stopping
no pair has frequency >= 2. Stopping


In [15]:
def build_vocab(freq_file, vocab_size):
    vocab = ['<unk>', '<pad>', '<eos>']
    with open(freq_file, 'r') as f:
        for line in f.readlines():
            token, _ = line.split()
            vocab.append(token)

    return vocab[:vocab_size]

en_vocab = build_vocab(f'{HEMINGWAY_DATA}/bpe/token_freq.en', vocab_size)
lv_vocab = build_vocab(f'{HEMINGWAY_DATA}/bpe/token_freq.lv', vocab_size)

with open(f'{HEMINGWAY_DATA}/bpe/vocab.en', 'w') as f:
    for i, token in enumerate(en_vocab):
        f.write(f"{token} {i + 1} \n")

with open(f'{HEMINGWAY_DATA}/bpe/vocab.lv', 'w') as f:
    for i, token in enumerate(lv_vocab):
        f.write(f"{token} {i + 1} \n")

In [16]:
!subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.en --vocabulary $HEMINGWAY_DATA/bpe/vocab.en --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.en.tc.txt > $HEMINGWAY_DATA/hemingway.en.BPE.txt
!subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.lv --vocabulary $HEMINGWAY_DATA/bpe/vocab.lv --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.lv.tc.txt > $HEMINGWAY_DATA/hemingway.lv.BPE.txt

# !subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.en --vocabulary $HEMINGWAY_DATA/bpe/token_freq.en --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.en.tc.txt > $HEMINGWAY_DATA/hemingway.en.BPE.txt
# !subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.lv --vocabulary $HEMINGWAY_DATA/bpe/token_freq.lv --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.lv.tc.txt > $HEMINGWAY_DATA/hemingway.lv.BPE.txt

In [17]:
special_tokens = ['<unk>', '<pad>', '<eos>', '<sep>'] #, '<S>', '</S>', '<bos>', '<eos>', '<sep>', '<NONE>', '<|>']
                  
    #  '---Exits---']  #, COMMAND_TOKEN]

                 # '+open', '+closed', '+roasted', '+baked', '+fried', '+raw',
                 # '+sliced', '+diced', '+chopped', '++Carrying:', ]
normalizer = normalizers.Sequence([Strip(), Lowercase()])
pre_tokenizer = Whitespace()

model = tokenizers.models.WordLevel(unk_token='<unk>')
# model = tokenizers.models.WordPiece()
tokenizer = tokenizers.Tokenizer(model=model)


tokenizer.add_special_tokens(special_tokens)
tokenizer.normalizer = normalizer
tokenizer.pre_tokenizer = pre_tokenizer

# filelist = glob.glob(PTHRU_DIR+"valid/*.pthru")
# filelist.extend( glob.glob(PTHRU_DIR+"test/*.pthru"))
# filelist.extend( glob.glob(PTHRU_DIR+"train/*.pthru"))


# token_strs = [tok for (tok, span) in pre_tokenizer.pre_tokenize_str(str1)]
# print(token_strs)

# filelist = glob.glob(PTHRU_DIR+"valid/*.pthru")

filelist = glob.glob(f"{HEMINGWAY_DATA}/hemingway.*.BPE.txt")

filelist = sorted(filelist)
print(len(filelist), filelist[:10])


# unigram_trainer = tokenizers.trainers.UnigramTrainer()
# trainer = tokenizers.trainers.WordPieceTrainer(vocab_size=vocab_size)
trainer = tokenizers.trainers.WordLevelTrainer(vocab_size=joint_vocab_size, special_tokens=special_tokens)

tokenizer.train(files=filelist, trainer=trainer)

2 ['./data/hemingway/hemingway.en.BPE.txt', './data/hemingway/hemingway.lv.BPE.txt']


In [18]:
vocab_dict = tokenizer.get_vocab(with_added_tokens=False)
print("ACTUAL VOCAB SIZE =", len(vocab_dict))
print(vocab_dict)

ACTUAL VOCAB SIZE = 8730
{'stairs': 1244, 'neskatījās': 5888, 'tch': 8447, 'november': 4321, 'racing': 2823, 'durvīs': 5298, 'smoke': 3219, 'ians': 5037, 'translated': 6558, 'brother': 4166, 'smie': 6479, 'grabbed': 8232, 'majoram': 7174, 'nākamgad': 6081, 'ūsām': 5057, 'sniedza': 2569, 'stāsti': 3921, 'stāvot': 4658, 'employment': 8183, 'ways': 8279, 'ņu': 564, 'fire': 1524, 'either': 2721, 'varot': 4310, 'kasta': 6468, 'ative': 6535, 'no': 24, 'umu': 701, 'akmeņiem': 3966, 'kties': 2862, 'nošāva': 4073, 'spinner': 6699, 'ying': 6457, 'jūdzi': 4705, 'neatradu': 8292, 'cap': 985, 'dīti': 5841, 'deviņdesmit': 5539, 'sanitārs': 2638, 'small': 600, 'dzeršu': 4285, 'latin': 5597, 'ņas': 2075, 'iestrē': 6393, 'erno': 7923, 'heavy': 2428, 'ratos': 2872, 'pār': 157, 'via': 3165, 'skaitī': 4082, 'ik': 5117, 'wan': 8721, 'paša': 2600, 'salīdzinājumā': 8160, 'scr': 8656, 'otā': 4854, 'is': 58, 'ināju': 5361, 'avīzi': 3630, 'crowded': 3855, 'home': 740, 'large': 3575, 'iekārta': 5330, 'cepures': 

In [19]:
with open(f'{HEMINGWAY_DATA}/hemingway.lv.BPE.txt', 'r') as f:
    text_input = f.read()

with open(f'{HEMINGWAY_DATA}/hemingway.en.BPE.txt', 'r') as f:
    text_output = f.read()

#MinGPT

In [20]:
import random
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

def top_k_logits(logits, k):
    v, ix = torch.topk(logits, k)
    out = logits.clone()
    out[out < v[:, [-1]]] = -float('Inf')
    return out

def calculate_attention_token(attention, top_k, model):
    logits = model.head(attention)
    logits = logits[:, -1, :]
    logits = top_k_logits(logits, top_k)

    probs = F.softmax(logits)

    _, ix = torch.topk(probs, k=1, dim=-1)
    ix = torch.multinomial(probs, num_samples=top_k)

    return ix[0]


@torch.no_grad()
def sample(model, x, steps, temperature=1.0, sample=False, top_k=None, output_attention=False):
    """
    take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in
    the sequence, feeding the predictions back into the model each time. Clearly the sampling
    has quadratic complexity unlike an RNN that is only linear, and has a finite context window
    of block_size, unlike an RNN that has an infinite context window.
    """
    block_size = model.get_block_size()
    model.eval()
    attention_state = [[] for _ in model.blocks]

    for k in range(steps):
        x_cond = x if x.size(1) <= block_size else x[:, -block_size:] # crop context if needed
        logits, _ = model(x_cond)
        # pluck the logits at the final step and scale by temperature
        logits = logits[:, -1, :] / temperature
        # optionally crop probabilities to only the top k options
        if top_k is not None:
            logits = top_k_logits(logits, top_k)
        # apply softmax to convert to probabilities
        probs = F.softmax(logits, dim=-1)
        # sample from the distribution or take the most likely
        if sample:
            ix = torch.multinomial(probs, num_samples=1)
        else:
            _, ix = torch.topk(probs, k=1, dim=-1)

        if output_attention:
            b, t = x.size()

            for block_id in range(len(model.blocks)):
                att = model.blocks[block_id].attn.att
                attention_state[block_id].append(att)

        # append to the sequence and continue
        x = torch.cat((x, ix), dim=1)

    if output_attention:
        return x, attention_state

    return x


In [21]:
"""
GPT model:
- the initial stem consists of a combination of token encoding and a positional encoding
- the meat of it is a uniform sequence of Transformer blocks
    - each Transformer is a sequential combination of a 1-hidden-layer MLP block and a self-attention block
    - all blocks feed into a central residual pathway similar to resnets
- the final decoder is a linear projection into a vanilla Softmax classifier
"""

import math
import logging

import torch
import torch.nn as nn
from torch.nn import functional as F

logger = logging.getLogger(__name__)

class GPTConfig:
    """ base GPT config, params common to all GPT versions """
    embd_pdrop = 0.1
    resid_pdrop = 0.1
    attn_pdrop = 0.1

    def __init__(self, vocab_size, block_size, **kwargs):
        self.vocab_size = vocab_size
        self.block_size = block_size
        for k,v in kwargs.items():
            setattr(self, k, v)

class GPT1Config(GPTConfig):
    """ GPT-1 like network roughly 125M params """
    n_layer = 12
    n_head = 12
    n_embd = 768

class CausalSelfAttention(nn.Module):
    """
    A vanilla multi-head masked self-attention layer with a projection at the end.
    It is possible to use torch.nn.MultiheadAttention here but I am including an
    explicit implementation here to show that there is nothing too scary here.
    """

    def __init__(self, config):
        super().__init__()
        assert config.n_embd % config.n_head == 0
        # key, query, value projections for all heads
        self.key = nn.Linear(config.n_embd, config.n_embd)
        self.query = nn.Linear(config.n_embd, config.n_embd)
        self.value = nn.Linear(config.n_embd, config.n_embd)
        # regularization
        self.attn_drop = nn.Dropout(config.attn_pdrop)
        self.resid_drop = nn.Dropout(config.resid_pdrop)
        # output projection
        self.proj = nn.Linear(config.n_embd, config.n_embd)
        # causal mask to ensure that attention is only applied to the left in the input sequence
        self.register_buffer("mask", torch.tril(torch.ones(config.block_size, config.block_size))
                                     .view(1, 1, config.block_size, config.block_size))
        self.n_head = config.n_head
        self.att = None

    def forward(self, x, layer_past=None):
        B, T, C = x.size()

        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
        k = self.key(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
        q = self.query(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
        v = self.value(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)

        # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
        att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
        att = F.softmax(att, dim=-1)
        att = self.attn_drop(att)
        y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
        y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side

        # output projection
        y = self.resid_drop(self.proj(y))

        self.att = att

        return y

class Block(nn.Module):
    """ an unassuming Transformer block """

    def __init__(self, config):
        super().__init__()
        self.ln1 = nn.LayerNorm(config.n_embd)
        self.ln2 = nn.LayerNorm(config.n_embd)
        self.attn = CausalSelfAttention(config)
        self.mlp = nn.Sequential(
            nn.Linear(config.n_embd, 4 * config.n_embd),
            nn.GELU(),
            nn.Linear(4 * config.n_embd, config.n_embd),
            nn.Dropout(config.resid_pdrop),
        )

    def forward(self, x):
        x = x + self.attn(self.ln1(x))
        x = x + self.mlp(self.ln2(x))
        return x

class GPT(nn.Module):
    """  the full GPT language model, with a context size of block_size """

    def __init__(self, config):
        super().__init__()

        # input embedding stem
        self.tok_emb = nn.Embedding(config.vocab_size, config.n_embd)
        self.pos_emb = nn.Parameter(torch.zeros(1, config.block_size, config.n_embd))
        self.drop = nn.Dropout(config.embd_pdrop)
        # transformer
        self.blocks = nn.Sequential(*[Block(config) for _ in range(config.n_layer)])
        # decoder head
        self.ln_f = nn.LayerNorm(config.n_embd)
        self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

        self.block_size = config.block_size
        self.apply(self._init_weights)

        logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))

    def get_block_size(self):
        return self.block_size

    def _init_weights(self, module):
        if isinstance(module, (nn.Linear, nn.Embedding)):
            module.weight.data.normal_(mean=0.0, std=0.02)
            if isinstance(module, nn.Linear) and module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def configure_optimizers(self, train_config):
        """
        This long function is unfortunately doing something very simple and is being very defensive:
        We are separating out all parameters of the model into two buckets: those that will experience
        weight decay for regularization and those that won't (biases, and layernorm/embedding weights).
        We are then returning the PyTorch optimizer object.
        """

        # separate out all parameters to those that will and won't experience regularizing weight decay
        decay = set()
        no_decay = set()
        whitelist_weight_modules = (torch.nn.Linear, )
        blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding)
        for mn, m in self.named_modules():
            for pn, p in m.named_parameters():
                fpn = '%s.%s' % (mn, pn) if mn else pn # full param name

                if pn.endswith('bias'):
                    # all biases will not be decayed
                    no_decay.add(fpn)
                elif pn.endswith('weight') and isinstance(m, whitelist_weight_modules):
                    # weights of whitelist modules will be weight decayed
                    decay.add(fpn)
                elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules):
                    # weights of blacklist modules will NOT be weight decayed
                    no_decay.add(fpn)

        # special case the position embedding parameter in the root GPT module as not decayed
        no_decay.add('pos_emb')

        # validate that we considered every parameter
        param_dict = {pn: p for pn, p in self.named_parameters()}
        inter_params = decay & no_decay
        union_params = decay | no_decay
        assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params), )
        assert len(param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \
                                                    % (str(param_dict.keys() - union_params), )

        # create the pytorch optimizer object
        optim_groups = [
            {"params": [param_dict[pn] for pn in sorted(list(decay))], "weight_decay": train_config.weight_decay},
            {"params": [param_dict[pn] for pn in sorted(list(no_decay))], "weight_decay": 0.0},
        ]
        optimizer = torch.optim.AdamW(optim_groups, lr=train_config.learning_rate, betas=train_config.betas)
        return optimizer

    def forward(self, idx, targets=None):
        b, t = idx.size()
        assert t <= self.block_size, "Cannot forward, model block size is exhausted."

        # forward the GPT model
        token_embeddings = self.tok_emb(idx) # each index maps to a (learnable) vector
        position_embeddings = self.pos_emb[:, :t, :] # each position maps to a (learnable) vector
        x = self.drop(token_embeddings + position_embeddings)
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.head(x)

        # if we are given some desired targets also calculate the loss
        loss = None
        if targets is not None:
            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))

        return logits, loss


In [22]:
"""
Simple training loop; Boilerplate that could apply to any arbitrary neural network,
so nothing in this file really has anything to do with GPT specifically.
"""

import sacrebleu
import math
import logging
from random import choice

from tqdm import tqdm
import numpy as np

import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data.dataloader import DataLoader

logger = logging.getLogger(__name__)

def clean_tokens(sentence):
    return sentence.replace('@@ ', '').replace(' @', '').replace('@ ', '')

class TrainerConfig:
    # optimization parameters
    max_epochs = 10
    batch_size = 64
    learning_rate = 3e-4
    betas = (0.9, 0.95)
    grad_norm_clip = 1.0
    weight_decay = 0.1 # only applied on matmul weights
    # learning rate decay params: linear warmup followed by cosine decay to 10% of original
    lr_decay = False
    warmup_tokens = 375e6 # these two numbers come from the GPT-3 paper, but may not be good defaults elsewhere
    final_tokens = 260e9 # (at what point we reach 10% of original LR)
    # checkpoint settings
    ckpt_path = None
    num_workers = 0 # for DataLoader

    def __init__(self, **kwargs):
        for k,v in kwargs.items():
            setattr(self, k, v)

class Trainer:

    def __init__(self, model, train_dataset, test_dataset, valid_dataset, config):
        self.model = model
        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
        self.valid_dataset = valid_dataset
        self.config = config

        # take over whatever gpus are on the system
        self.device = 'cpu'
        if torch.cuda.is_available():
            self.device = torch.cuda.current_device()
            self.model = torch.nn.DataParallel(self.model).to(self.device)

    def save_checkpoint(self, postfix=''):
        # DataParallel wrappers keep raw model object in .module attribute
        raw_model = self.model.module if hasattr(self.model, "module") else self.model
        checkpoint_path = self.config.ckpt_path + postfix + '.pt'
        logger.info("saving %s", checkpoint_path)
        torch.save(raw_model.state_dict(), checkpoint_path)

    def train(self):
        model, config = self.model, self.config
        raw_model = model.module if hasattr(self.model, "module") else model
        optimizer = raw_model.configure_optimizers(config)

        def run_epoch(split):
            is_train = split == 'train'
            model.train(is_train)
            data = self.train_dataset
            if split == 'test':
                data = self.test_dataset
            elif split == 'valid':
                data = self.valid_dataset
                model.eval()
            loader = DataLoader(data, shuffle=True, pin_memory=True,
                                batch_size=config.batch_size,
                                num_workers=config.num_workers)

            losses = []
            pbar = tqdm(enumerate(loader), total=len(loader)) if is_train else enumerate(loader)
            logits_total = None
            x_total = None
            y_total = None
            for it, (x, y) in pbar:

                # place data on the correct device
                x = x.to(self.device)
                y = y.to(self.device)

                # forward the model
                with torch.set_grad_enabled(is_train):
                    logits, loss = model(x, y)
                    loss = loss.mean() # collapse all losses if they are scattered on multiple gpus
                    losses.append(loss.item())
                    if split == 'valid':
                        if logits_total is None:
                            logits_total = logits
                            x_total = x
                            y_total = y
                        else:
                            logits_total = torch.cat((logits_total, logits), dim=0)
                            x_total = torch.cat((x_total, x), dim=0)
                            y_total = torch.cat((y_total, y), dim=0)
                        

                if is_train:
                    # backprop and update the parameters
                    model.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_norm_clip)
                    optimizer.step()

                    # decay the learning rate based on our progress
                    if config.lr_decay:
                        self.tokens += (y >= 0).sum() # number of tokens processed this step (i.e. label is not -100)
                        if self.tokens < config.warmup_tokens:
                            # linear warmup
                            lr_mult = float(self.tokens) / float(max(1, config.warmup_tokens))
                        else:
                            # cosine learning rate decay
                            progress = float(self.tokens - config.warmup_tokens) / float(max(1, config.final_tokens - config.warmup_tokens))
                            lr_mult = max(0.1, 0.5 * (1.0 + math.cos(math.pi * progress)))
                        lr = config.learning_rate * lr_mult
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr
                    else:
                        lr = config.learning_rate

                    # report progress
                    pbar.set_description(f"epoch {epoch+1} iter {it}: train loss {loss.item():.5f}. mean loss: {float(np.mean(losses)):.5f}. lr {lr:e}")

            if split == 'train':
                train_loss = float(np.mean(losses))
                print(f"train loss: {train_loss}")
                return train_loss

            if split == 'test':
                test_loss = float(np.mean(losses))
                print(f"test loss: {test_loss}")
                return test_loss

            if split == 'valid':
                test_loss = float(np.mean(losses))
                print(f"valid loss: {test_loss}")

                eval_results = []
                translation_results = []
                context_list = []

                for idx in range(len(logits_total)):
                    intent = (x_total[idx] == valid_dataset.tokenizer_input.encode(['<eos>'])[0]).nonzero(as_tuple=True)[0][0]

                    probs = F.softmax(logits_total[idx], dim=-1)
                    # sample from the distribution or take the most likely
                    _, predicted = torch.topk(probs, k=1, dim=-1)
                    context = clean_tokens(data.tokenizer_input.decode(x_total[idx][:intent - 1], True))
                    completion = clean_tokens(data.tokenizer_output.decode(predicted[intent:], True))
                    real = clean_tokens(data.tokenizer_output.decode(y_total[idx][intent:], True))

                    context_list.append(context)
                    translation_results.append(completion)
                    eval_results.append(real)
                
                with open('valid.txt', 'w') as f:
                    f.write("\n".join(translation_results))

                with open('eval.txt', 'w') as f:
                    f.write("\n".join(eval_results))

                with open('context.txt', 'w') as f:
                    f.write("\n".join(context_list))


                !cat valid.txt | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > valid.detok.txt
                !cat eval.txt | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > eval.detok.txt
                !cat context.txt | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > context.detok.txt

                with open('eval.detok.txt', 'r') as f:
                    eval_results = [l.strip() for l in f.readlines()]
                with open('valid.detok.txt', 'r') as f:
                    translation_results = [l.strip() for l in f.readlines()]
                with open('context.detok.txt', 'r') as f:
                    context_list = [l.strip() for l in f.readlines()]

#                 idx = choice(range(len(context_list)))
                valid_sentences = ['the driver wore a cap and his face was thin and very tanned.',
                                   'outside it was getting dark.',
                                   'the two girls were asleep.',
                                   'I would like to have had the uniform off although I did not care much about the outward forms.',
                                   'I watched the flashes on San Gabriele.',
                                   'I asked.',
                                   '"no.']

                idx_list = [i for i, sentence in enumerate(eval_results) if sentence in valid_sentences]
                
                for idx in idx_list:
                    print(f'Input:            {context_list[idx]}')
                    print(f'Predicted output: {translation_results[idx]}')
                    print(f'Real output:      {eval_results[idx]}')
                    print('--------------------------------------------------')

                refs = [eval_results]
                sys = translation_results
                bleu = sacrebleu.corpus_bleu(sys, refs)
                print(f'BLEU: {bleu.score}')
                print('##############################################################')

                return test_loss, bleu.score

        train_loss_list = []
        test_loss_list = []
        valid_loss_list = []
        valid_bleu_list = []
        best_loss = float('inf')
        self.tokens = 0 # counter used for learning rate decay
        for epoch in range(config.max_epochs):

            train_loss = run_epoch('train')
            train_loss_list.append(train_loss)
            if self.test_dataset is not None:
                test_loss = run_epoch('test')
                test_loss_list.append(test_loss)

            if self.valid_dataset is not None:
                valid_loss, bleu_score = run_epoch('valid')
                valid_loss_list.append(valid_loss)
                valid_bleu_list.append(bleu_score)

            # supports early stopping based on the test loss, or just save always if no test set is provided
            good_model = self.test_dataset is None or test_loss < best_loss
            if self.config.ckpt_path is not None and good_model:
                best_loss = test_loss
                self.save_checkpoint("_best")

            if epoch % 10 == 0:
                self.save_checkpoint(f"_{epoch}")

            self.save_checkpoint("_last")

        return train_loss_list, test_loss_list, valid_loss_list, valid_bleu_list


#Training

In [23]:

class Tokenizer:
    def __init__(self, data, vocab_size, vocab):
        self.vocab_size = vocab_size
        self.vocab = vocab

        self.stoi = {ch: i for i, ch in enumerate(self.vocab)}
        self.itos = {i: ch for i, ch in enumerate(self.vocab)}
    
    def tokenize(self, data, block_size):
        tokenized_text = data.split()
        # Filter empty strings
        tokenized_text = [x for x in tokenized_text if x]
        result = []
        for tokenized in tokenized_text:
            # In case other single # found, replace them with <unk> special token, marking the element as unknown
            if tokenized in self.vocab:
                result.append(tokenized)
            else:
                result.append('<unk>')

        # in case the sentence is longer, than block_size, we trim the sentence
        return result[:block_size]
    
    def encode(self, data):
        return [self.stoi[s] for s in data]
    
    def decode(self, data, clean_paddings=False):
        text = ' '.join([self.itos[int(i)] for i in data])

        if not clean_paddings:
            return text
        return text.replace('<pad>', '').replace('  ', '')

In [24]:
# vocab_size = 10000

# vocab_input = None
# if os.path.exists('vocab_input.pkl'):
#     with open('vocab_input.pkl', 'rb') as f:
#         vocab_input = pickle.load(f)
        
# vocab_output = None
# if os.path.exists('vocab_output.pkl'):
#     with open('vocab_output.pkl', 'rb') as f:
#         vocab_output = pickle.load(f)

# building vocabluary can take some time. ~5 minutes for 10_000 tokens for each tokenizer. 
tokenizer_input = Tokenizer(text_input, vocab_size, lv_vocab)
tokenizer_output = Tokenizer(text_output, vocab_size, en_vocab)

In [25]:
# with open('vocab_input.pkl', 'wb') as f:
#     pickle.dump(tokenizer_input.vocab, f)

# with open('vocab_output.pkl', 'wb') as f:
#     pickle.dump(tokenizer_output.vocab, f)

In [26]:
# Shuffle texts by lines
texts = list(zip(text_output.splitlines(), text_input.splitlines()))
random.shuffle(texts)

output_texts, input_texts = zip(*texts)

In [27]:
# Split texts into train, test and validation datasets
train_dataset_size = round(0.75 * len(output_texts))
test_dataset_size = round(0.15 * len(output_texts))
valid_dataset_size = round(0.1 * len(output_texts))

train_input = input_texts[:train_dataset_size]
test_input = input_texts[train_dataset_size:train_dataset_size + test_dataset_size]
valid_input = input_texts[-valid_dataset_size:]

train_output = output_texts[:train_dataset_size]
test_output = output_texts[train_dataset_size:train_dataset_size + test_dataset_size]
valid_output = output_texts[-valid_dataset_size:]


In [28]:

with open('data/hemingway/train.lv', 'w') as f:
    f.write("\n".join(train_input))

with open('data/hemingway/test.lv', 'w') as f:
    f.write("\n".join(test_input))

with open('data/hemingway/valid.lv', 'w') as f:
    f.write("\n".join(valid_input))


with open('data/hemingway/train.en', 'w') as f:
    f.write("\n".join(train_output))

with open('data/hemingway/test.en', 'w') as f:
    f.write("\n".join(test_output))

with open('data/hemingway/valid.en', 'w') as f:
    f.write("\n".join(valid_output))


In [29]:
from torch.utils.data import Dataset

class WordDataset(Dataset):

    def __init__(self, output_text, input_text, tokenizer_output, tokenizer_input, block_size):
        self.tokenizer_output = tokenizer_output
        self.tokenizer_input = tokenizer_input

        self.block_size = block_size * 2 + 1
        self.output_text = [tokenizer_output.tokenize(t, block_size) for t in output_text]
        self.input_text = [tokenizer_input.tokenize(t, block_size) for t in input_text]

    def __len__(self):
        return len(self.output_text)

    def __getitem__(self, idx):
        """
        The idea is to get the input sentence
        and translate it to output sentence (sentences could be on any language).

        In the init method we already split a sentence into tokens and filled with spaces,
        to have an equal sentence size. In this method we just encode the tokens to
        ids (a list of numbers), and we're trying to map ids sequences
        """

        tokenized_input_text = self.tokenizer_input.encode(self.input_text[idx])
        tokenized_output_text = self.tokenizer_output.encode(self.output_text[idx])

        dix = tokenized_input_text + self.tokenizer_output.encode(['<eos>']) + tokenized_output_text
        if len(dix) < self.block_size:
            dix += self.tokenizer_output.encode(['<pad>']) * (self.block_size - len(dix))

        x = torch.tensor(dix[:-1], dtype=torch.long)
        y = torch.tensor(dix[1:], dtype=torch.long)
        y[:len(tokenized_input_text) - 1] = -100

        return x, y

In [30]:
block_size = 100  # the estimate how long lines the text could be (token count)

train_dataset = WordDataset(train_output, train_input, tokenizer_output, tokenizer_input, block_size)
test_dataset = WordDataset(test_output, test_input, tokenizer_output, tokenizer_input, block_size)
valid_dataset = WordDataset(valid_output, valid_input, tokenizer_output, tokenizer_input, block_size)

In [31]:
number_of_heads = 8
number_of_layers = 6

# from mingpt.model import GPT, GPTConfig
embd_pdrop = 0.1
resid_pdrop = 0.1
attn_pdrop = 0.2

mconf = GPTConfig(tokenizer_output.vocab_size, train_dataset.block_size,
                  n_layer=number_of_layers, n_head=number_of_heads, n_embd=512,
                  embd_pdrop=embd_pdrop, resid_pdrop=resid_pdrop, attn_pdrop=attn_pdrop)

model = GPT(mconf)

In [32]:
# from mingpt.trainer import Trainer, TrainerConfig

tokens_per_epoch = len(train_dataset) * block_size
train_epochs = 100

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=train_epochs, batch_size=128, learning_rate=3e-4,
                      lr_decay=True, warmup_tokens=tokens_per_epoch, final_tokens=train_epochs*tokens_per_epoch,
                      ckpt_path='minGPT-LV-EN-translator_model.pt',
                      num_workers=1, weight_decay=0.0001, betas=(0.9, 0.98))
trainer = Trainer(model, train_dataset, test_dataset, valid_dataset, tconf)

In [33]:
param_count = sum([param.nelement() for param in model.parameters()])

print(f'Parameters count: {param_count}')

Parameters count: 24650240


In [34]:
train_loss_list, test_loss_list, valid_loss_list, valid_bleu_list = trainer.train()

epoch 1 iter 51: train loss 0.33015. mean loss: 0.76397. lr 2.999394e-04: 100%|██████████| 52/52 [00:22<00:00,  2.31it/s]

train loss: 0.7639719133193676





test loss: 0.32935057986866345
valid loss: 0.322789911712919
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: ..
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I the the the the the the <eos> the the the the the the the the the <eos> the the the the
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I..
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: I <eos> the. the.
Real output:      the two girls were asleep.
------------------------------------------

epoch 2 iter 51: train loss 0.27574. mean loss: 0.31464. lr 2.994116e-04: 100%|██████████| 52/52 [00:20<00:00,  2.54it/s]

train loss: 0.3146414521795053





test loss: 0.2850381379777735
valid loss: 0.28002246362822397
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: I the was the..
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the was the road. the..
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I said.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I said.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I said.
Real output:      I asked.
--------------------------------------------------
Input:    

epoch 3 iter 51: train loss 0.30600. mean loss: 0.27936. lr 2.983430e-04: 100%|██████████| 52/52 [00:20<00:00,  2.51it/s]

train loss: 0.27935900768408406





test loss: 0.26393654536117206
valid loss: 0.2604259124823979
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I had be a the a a war. the I said not be.. the bed....
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the road road was a the
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: the the was a..
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------

epoch 4 iter 51: train loss 0.21535. mean loss: 0.25680. lr 2.967376e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.2568013006104873





test loss: 0.24889149449088357
valid loss: 0.2486715657370431
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the road and a little and the big and a and the very and
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I 'm.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the was the other and the head a

epoch 5 iter 51: train loss 0.23289. mean loss: 0.23999. lr 2.946011e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.23999384905283266





test loss: 0.24483465741981159
valid loss: 0.24138805908816202
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the other one was a.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
---------------------------

epoch 6 iter 51: train loss 0.24384. mean loss: 0.22530. lr 2.919413e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.2252980378957895





test loss: 0.24125747789036145
valid loss: 0.2385485257421221
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: it rain was a long and the face was very and the very.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was very a.
Real output:      outside it was getti

epoch 7 iter 51: train loss 0.23429. mean loss: 0.21173. lr 2.887677e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.21173224225640297





test loss: 0.23436067862944168
valid loss: 0.23420116305351257
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: they have be a be to been war.. they can not be.. them war....
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was very dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            virs S

epoch 8 iter 51: train loss 0.20159. mean loss: 0.19759. lr 2.850919e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.1975925060418936





test loss: 0.23580766266042535
valid loss: 0.23391433485916682
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was very very.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Pr

epoch 9 iter 51: train loss 0.22338. mean loss: 0.18507. lr 2.809272e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.18506930195368254





test loss: 0.2348050976341421
valid loss: 0.2332158855029515
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it had be a be a a war.. they can. have.. the war.. ar.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output

epoch 10 iter 51: train loss 0.20980. mean loss: 0.17152. lr 2.762886e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.1715181154700426





test loss: 0.2344214455647902
valid loss: 0.23334658571652003
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would have the have the the war for them they would not have. them them war lire. ar.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: he was was was big was he face was a and he good and
Real output

epoch 11 iter 51: train loss 0.16197. mean loss: 0.15871. lr 2.711929e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.1587147910434466





test loss: 0.24040450155735016
valid loss: 0.2347762840134757
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were wet in
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it have have the have a a war.. they can. have.. the war.. ar.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the priest was his b

epoch 12 iter 51: train loss 0.16334. mean loss: 0.14600. lr 2.656587e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.14599510010045308





test loss: 0.23600911958651108
valid loss: 0.23562427929469518
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: they girls girls were very and
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was dark dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            virs Sangabriel

epoch 13 iter 51: train loss 0.13742. mean loss: 0.13331. lr 2.597058e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.13330758807177728





test loss: 0.23839289356361737
valid loss: 0.23891178837844304
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: it was was his big and his face was a and his good.
Real output:      the driver wore a cap and his 

epoch 14 iter 51: train loss 0.14181. mean loss: 0.12219. lr 2.533559e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.12219288787589623





test loss: 0.24605021693489768
valid loss: 0.2415503135749272
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were in now
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was his big and his face was very and his good.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            "nē
Predicted 

epoch 15 iter 51: train loss 0.09723. mean loss: 0.11037. lr 2.466319e-04: 100%|██████████| 52/52 [00:21<00:00,  2.41it/s]

train loss: 0.11037113159321822





test loss: 0.2463893321427432
valid loss: 0.2462101365838732
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: his major was his big and his face was very and his small.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were all in
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            metās jau tumsa
Predicted out

epoch 16 iter 51: train loss 0.12146. mean loss: 0.10023. lr 2.395582e-04: 100%|██████████| 52/52 [00:21<00:00,  2.47it/s]

train loss: 0.10023131393469296





test loss: 0.24901465665210376
valid loss: 0.24860454882894242
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: it major was his big, his face was quiet. his good.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were over now
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I would be that be a the war... can. be... other.. ar.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
----------------------------

epoch 17 iter 51: train loss 0.08614. mean loss: 0.08930. lr 2.321603e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.0892990015160579





test loss: 0.2513399611819874
valid loss: 0.2529940881899425
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------

epoch 18 iter 51: train loss 0.09197. mean loss: 0.08033. lr 2.244651e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.08032648551922578





test loss: 0.2626384903084148
valid loss: 0.2566872090101242
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: it girl was his big and he face was looking and he small.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real out

epoch 19 iter 51: train loss 0.06700. mean loss: 0.07113. lr 2.165004e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.07112763177316922





test loss: 0.26092400740493427
valid loss: 0.26032505503722597
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it 's getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: his major was his and and his face was a and his beautiful.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brī

epoch 20 iter 51: train loss 0.06029. mean loss: 0.06294. lr 2.082950e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.0629386598817431





test loss: 0.2681142308495261
valid loss: 0.2652893215417862
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be these be if any war... would. be.. them winter.. ar.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:   

epoch 21 iter 51: train loss 0.06893. mean loss: 0.05625. lr 1.998787e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.05625098886398169





test loss: 0.2697104527191682
valid loss: 0.2672083377838135
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
-------------------------------------

epoch 22 iter 51: train loss 0.04830. mean loss: 0.04923. lr 1.912818e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.04923309822781728





test loss: 0.2737366651946848
valid loss: 0.2724111420767648
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: it priest was his gray and his face was walking and his so.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would

epoch 23 iter 51: train loss 0.05555. mean loss: 0.04327. lr 1.825355e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.04326917259738995





test loss: 0.2737413306127895
valid loss: 0.2765532774584634
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it 's getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be this be married any same for. it would. talk... winter days. ar.
Real output:      

epoch 24 iter 51: train loss 0.04144. mean loss: 0.03825. lr 1.736714e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.03824807194849619





test loss: 0.2922292256897146
valid loss: 0.2806072916303362
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking;
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aiz

epoch 25 iter 51: train loss 0.03525. mean loss: 0.03326. lr 1.647217e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.033264896796586424





test loss: 0.28643545508384705
valid loss: 0.2818456930773599
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights down the Barkley.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking;
Real output:      the two girls were asleep.
-----------------------------------

epoch 26 iter 51: train loss 0.02710. mean loss: 0.02920. lr 1.557187e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.02919690103198473





test loss: 0.2897185344587673
valid loss: 0.28740558879716055
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights too the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were at.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
------------------------------------------

epoch 27 iter 51: train loss 0.02487. mean loss: 0.02582. lr 1.466950e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.025815418169188958





test loss: 0.29221713272008026
valid loss: 0.28819102474621366
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking at
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: it major was his gray and his face was walking and muddy so.
Real output:      the driver wore a cap and his fac

epoch 28 iter 51: train loss 0.02734. mean loss: 0.02320. lr 1.376832e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.02320325872502648





test loss: 0.29296363212845544
valid loss: 0.29042878321238924
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
-------------------------------------

epoch 29 iter 51: train loss 0.02437. mean loss: 0.02039. lr 1.287160e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.020388654791391812





test loss: 0.30405030196363275
valid loss: 0.29603142184870584
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted ou

epoch 30 iter 51: train loss 0.01926. mean loss: 0.01842. lr 1.198259e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.018424122862947676





test loss: 0.3009005216034976
valid loss: 0.2980659433773586
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it 's getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
I

epoch 31 iter 51: train loss 0.01389. mean loss: 0.01623. lr 1.110450e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.016228069336368486





test loss: 0.2989864674481479
valid loss: 0.3007418853896005
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be Our be any any war. you it would.... it mountains. and ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights too the Gabriele.
Real output:      I watched the flashes on San Gabriele

epoch 32 iter 51: train loss 0.01534. mean loss: 0.01492. lr 1.024050e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.014917625257602105





test loss: 0.3074174902655862
valid loss: 0.30290353298187256
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it 's getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
In

epoch 33 iter 51: train loss 0.01220. mean loss: 0.01345. lr 9.393735e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.013451874829255618





test loss: 0.3036653616211631
valid loss: 0.30324144448552814
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be that be been any foreign for that it would... troops Cividale winter days. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real 

epoch 34 iter 51: train loss 0.00990. mean loss: 0.01223. lr 8.567257e-05: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.012232946661802439





test loss: 0.3065811910412528
valid loss: 0.3070954254695347
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the stopped the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
-------------------------------------------------

epoch 35 iter 51: train loss 0.01275. mean loss: 0.01125. lr 7.764060e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.0112497266023778





test loss: 0.3098742799325423
valid loss: 0.3084545774119241
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray and his face was walking and muddy so.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
---------------

epoch 36 iter 51: train loss 0.01044. mean loss: 0.01030. lr 6.987052e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.01029684727724928





test loss: 0.3079711713574149
valid loss: 0.31029514755521503
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be that be any any same that that it would. be.. it winter.. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray, his face was walking and muddy so.


epoch 37 iter 51: train loss 0.00880. mean loss: 0.00951. lr 6.239044e-05: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.00951353508907442





test loss: 0.315126589753411
valid loss: 0.3108274127755846
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Pr

epoch 38 iter 51: train loss 0.00800. mean loss: 0.00874. lr 5.522744e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.008740398800000548





test loss: 0.3143137368288907
valid loss: 0.3127505587679999
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking at
Real out

epoch 39 iter 51: train loss 0.00720. mean loss: 0.00810. lr 4.840745e-05: 100%|██████████| 52/52 [00:21<00:00,  2.43it/s]

train loss: 0.00809917889105586





test loss: 0.3178108957680789
valid loss: 0.3145713380404881
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were in;
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the stopped the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dar

epoch 40 iter 51: train loss 0.00751. mean loss: 0.00765. lr 4.195514e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.007646113007257764





test loss: 0.31706189296462317
valid loss: 0.316731653043202
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray and his face was walking and muddy still.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemi

epoch 41 iter 51: train loss 0.00613. mean loss: 0.00723. lr 3.589387e-05: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.007231812100284374





test loss: 0.3185187144712968
valid loss: 0.3161603552954538
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
I

epoch 42 iter 51: train loss 0.00722. mean loss: 0.00688. lr 3.024558e-05: 100%|██████████| 52/52 [00:21<00:00,  2.42it/s]

train loss: 0.006878927502279671





test loss: 0.3243346783247861
valid loss: 0.3166649171284267
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking;
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
-----------

epoch 43 iter 51: train loss 0.00486. mean loss: 0.00642. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.006421974224324983





test loss: 0.3192281262441115
valid loss: 0.31861875312668936
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking at
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray and his face was dead and muddy so.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted outp

epoch 44 iter 51: train loss 0.00717. mean loss: 0.00620. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.0062023886085416265





test loss: 0.31745917959646747
valid loss: 0.31962597370147705
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would have Our be any any formalities for that it would... troops it winter. and ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray and his face was dead and muddy so.


epoch 45 iter 51: train loss 0.00694. mean loss: 0.00599. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.0059929796082612416





test loss: 0.31783686713738873
valid loss: 0.32031212534223286
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the stopped the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted out

epoch 46 iter 51: train loss 0.00769. mean loss: 0.00581. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.005808698317895715





test loss: 0.32860441370443866
valid loss: 0.3213482882295336
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it 's getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------

epoch 47 iter 51: train loss 0.00690. mean loss: 0.00577. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.005770085272021019





test loss: 0.3210302699695934
valid loss: 0.3207912359918867
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the stopped the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking it
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
----------------------------------------------

epoch 48 iter 51: train loss 0.00709. mean loss: 0.00559. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.005594718305823894





test loss: 0.32171204686164856
valid loss: 0.324047897543226
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking it
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the stopped the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
-------------------------------

epoch 49 iter 51: train loss 0.00552. mean loss: 0.00549. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.005491216093874895





test loss: 0.3258853256702423
valid loss: 0.32413817729268757
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it know have Our be been any foreign for that it would... troops it winter.. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna u

epoch 50 iter 51: train loss 0.00479. mean loss: 0.00536. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.0053625717723312285





test loss: 0.32467768679965625
valid loss: 0.32403229815619333
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be Our be been any same for that it would... troops it winter.. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking it
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the stopped the lights of

epoch 51 iter 51: train loss 0.00676. mean loss: 0.00528. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.005281112720974936





test loss: 0.32649769295345654
valid loss: 0.32512754627636503
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be Our be been any same for that it would... troops it winter.. ically.
Real output:      I would like to have had the uni

epoch 52 iter 51: train loss 0.00630. mean loss: 0.00512. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.47it/s]

train loss: 0.0051239126164895985





test loss: 0.3247980692169883
valid loss: 0.32598055260522024
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking it
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes o

epoch 53 iter 51: train loss 0.00535. mean loss: 0.00512. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.005123223240773838





test loss: 0.3262351859699596
valid loss: 0.325923102242606
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be that be been any same for that it would. be. troops them winter.. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            abas meitenes bija iemi

epoch 54 iter 51: train loss 0.00524. mean loss: 0.00488. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.004883580500833117





test loss: 0.32803487235849554
valid loss: 0.32712535347257343
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking it
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo un

epoch 55 iter 51: train loss 0.00602. mean loss: 0.00495. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.004952947089735132





test loss: 0.3279406563802199
valid loss: 0.32761315788541523
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
----------------------

epoch 56 iter 51: train loss 0.00417. mean loss: 0.00467. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.004665866247915591





test loss: 0.33235556429082697
valid loss: 0.32671391112463816
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking it
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be that be any any same for that it would. be. troops it winter.. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
-----------------------------------

epoch 57 iter 51: train loss 0.00463. mean loss: 0.00455. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.004548090250374606





test loss: 0.32449031282554974
valid loss: 0.32764847363744465
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray and his face was walking and muddy so.
Real output:      the driver wore a cap and his face w

epoch 58 iter 51: train loss 0.00359. mean loss: 0.00455. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.004547057252448912





test loss: 0.34374364668672736
valid loss: 0.32892925824437824
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be Our be been any police for that it would... troops it winter.. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
In

epoch 59 iter 51: train loss 0.00557. mean loss: 0.00443. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.004425814214090888





test loss: 0.3358735198324377
valid loss: 0.3290047901017325
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray and his face was walking and muddy still.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:   

epoch 60 iter 51: train loss 0.00467. mean loss: 0.00443. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.004433784329404051





test loss: 0.334337594834241
valid loss: 0.330562025308609
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:       

epoch 61 iter 51: train loss 0.00380. mean loss: 0.00422. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.004215977432277913





test loss: 0.3311226015741175
valid loss: 0.3307740475450243
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be Our be been any foreign for that it would.... it north.. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kal

epoch 62 iter 51: train loss 0.00457. mean loss: 0.00415. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.004145408022360733





test loss: 0.3368324799971147
valid loss: 0.33299541899136137
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the stopped the lights of the Gabriele.
Real output:      I watched the flashes o

epoch 63 iter 51: train loss 0.00310. mean loss: 0.00405. lr 3.000000e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.00405273528303951





test loss: 0.3329136940565976
valid loss: 0.33161861981664387
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray, his face was walking and muddy so.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real outp

epoch 64 iter 51: train loss 0.00375. mean loss: 0.00404. lr 3.312817e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.0040428030635946645





test loss: 0.33254349502650177
valid loss: 0.33304491213389803
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray, his face was walking and muddy still.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no

epoch 65 iter 51: train loss 0.00310. mean loss: 0.00395. lr 3.899373e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.0039506372413598





test loss: 0.3326345519586043
valid loss: 0.3334627960409437
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the pulled the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output

epoch 66 iter 51: train loss 0.00410. mean loss: 0.00404. lr 4.526105e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.004035699830497974





test loss: 0.33746987310322846
valid loss: 0.3362515228135245
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be Our be been any foreign for that it would.... it north.. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray, his face was walking and his new.
Re

epoch 67 iter 51: train loss 0.00449. mean loss: 0.00417. lr 5.190745e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.004171209620276036





test loss: 0.33848190578547394
valid loss: 0.3350729133401598
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:    

epoch 68 iter 51: train loss 0.00577. mean loss: 0.00440. lr 5.890886e-05: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.004404174629598856





test loss: 0.3358773209831931
valid loss: 0.3348199852875301
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking it
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray and his face was walking and muddy still.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            nē
Predicted output:

epoch 69 iter 51: train loss 0.00509. mean loss: 0.00440. lr 6.623995e-05: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.00440031925861079





test loss: 0.3357376618818803
valid loss: 0.33529374429157804
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the pulled the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
------------------------------------------

epoch 70 iter 51: train loss 0.00496. mean loss: 0.00455. lr 7.387419e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.004552038583474664





test loss: 0.34066920388828625
valid loss: 0.33633112055914743
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray and his face was walking and fat so.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:

epoch 71 iter 51: train loss 0.00533. mean loss: 0.00473. lr 8.178395e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.004732321934380497





test loss: 0.3397413356737657
valid loss: 0.3367952959878104
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it 's getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be that be any any efficienough that it would. be. days the north. and ws.
Real output:      I would like to have had the

epoch 72 iter 51: train loss 0.00404. mean loss: 0.00489. lr 8.994060e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.004886842977542143





test loss: 0.3432423038916154
valid loss: 0.33941719787461416
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it 's getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:     

epoch 73 iter 51: train loss 0.00660. mean loss: 0.00534. lr 9.831461e-05: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.005343703365025039





test loss: 0.34356933019377967
valid loss: 0.3371950898851667
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the pulled the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
-----------------

epoch 74 iter 51: train loss 0.00685. mean loss: 0.00541. lr 1.068757e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.005411243496032862





test loss: 0.3436916605992751
valid loss: 0.33887290954589844
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the pulled the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking in
Real output:      the two girls were asleep.
------------------------------

epoch 75 iter 51: train loss 0.00671. mean loss: 0.00570. lr 1.155928e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.005699240835383534





test loss: 0.3472304804758592
valid loss: 0.3376909153802054
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: on went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai 

epoch 76 iter 51: train loss 0.00646. mean loss: 0.00578. lr 1.244345e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.0057762697464428265





test loss: 0.3365254916928031
valid loss: 0.33619336571012226
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: along pulled the Cova of San Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it 's getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: it driver was his gray and his face was a and a still.
Real output:      the driver wore a cap and his face was thin and very tanned.
-----

epoch 77 iter 51: train loss 0.00625. mean loss: 0.00597. lr 1.333687e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.005972112797630521





test loss: 0.3362064551223408
valid loss: 0.33859396832329886
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: along pulled the roads of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking in
Real output:      the two girls were asleep.
----------------------------------

epoch 78 iter 51: train loss 0.00750. mean loss: 0.00644. lr 1.423631e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.00644072993264462





test loss: 0.3456070666963404
valid loss: 0.3395573454243796
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real outpu

epoch 79 iter 51: train loss 0.00850. mean loss: 0.00663. lr 1.513852e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.006631582688826781





test loss: 0.337907151742415
valid loss: 0.3383997551032475
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the door was the gray and his face was quiet and muddy still.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: all would be Our be seen any Brigade you. it would.... your river.. ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:

epoch 80 iter 51: train loss 0.00781. mean loss: 0.00672. lr 1.604022e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.006724635434623521





test loss: 0.3390349501913244
valid loss: 0.34386563301086426
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis

epoch 81 iter 51: train loss 0.01058. mean loss: 0.00705. lr 1.693816e-04: 100%|██████████| 52/52 [00:21<00:00,  2.43it/s]

train loss: 0.007053390225897042





test loss: 0.34542250633239746
valid loss: 0.3399582803249359
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the room was the small and his face was quiet and muddy still.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
------------------------------

epoch 82 iter 51: train loss 0.00754. mean loss: 0.00711. lr 1.782908e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.007108444455437935





test loss: 0.3479757146401839
valid loss: 0.33876000557626995
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no,
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Francisco.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be that be luge any efficiarchitecture valit would. touch

epoch 83 iter 51: train loss 0.00880. mean loss: 0.00723. lr 1.870976e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.007229959323572425





test loss: 0.34978726506233215
valid loss: 0.34569889307022095
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the went the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predic

epoch 84 iter 51: train loss 0.00669. mean loss: 0.00754. lr 1.957702e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.00754462020435872





test loss: 0.33904529295184394
valid loss: 0.34231655086789814
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray, he face was being and a soand
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking now
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Pr

epoch 85 iter 51: train loss 0.00739. mean loss: 0.00767. lr 2.042771e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.007671317085623741





test loss: 0.3439223061908375
valid loss: 0.3430256758417402
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it really be that be luge any efficifor six it would. be. simpler your police. and ically.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output

epoch 86 iter 51: train loss 0.00782. mean loss: 0.00801. lr 2.125876e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.008008188218809664





test loss: 0.3435763797976754
valid loss: 0.33962719781058176
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking at
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting 

epoch 87 iter 51: train loss 0.00692. mean loss: 0.00789. lr 2.206716e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.007890071442279104





test loss: 0.34208174727179785
valid loss: 0.34336318714278086
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be that be any any foreign when that it would. be. days your police. and ws.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major had his man and his face was probably and a kind.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:    

epoch 88 iter 51: train loss 0.01061. mean loss: 0.00803. lr 2.284998e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.008029430585268598





test loss: 0.3429250879721208
valid loss: 0.3451277017593384
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it have have Our be married any foreign. that it would. believe. days your police.. ts.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking now
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: along pulled the road of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input: 

epoch 89 iter 51: train loss 0.01029. mean loss: 0.00841. lr 2.360438e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.008413143832093248





test loss: 0.34621579538692127
valid loss: 0.3431247855935778
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it know have that be any any efficiwhen that it did. break. track your dressing. and ical.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: along pulled the lights of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:     

epoch 90 iter 51: train loss 0.00934. mean loss: 0.00833. lr 2.432765e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.008327216035328232





test loss: 0.3494733544913205
valid loss: 0.3421197363308498
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it am have that be married any foreign when six it would. believe. days under winter. and ke.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark outside
Real output:      outside it was getting dark.
------------------------------------

epoch 91 iter 51: train loss 0.00888. mean loss: 0.00838. lr 2.501716e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.008382695521084735





test loss: 0.3534233732656999
valid loss: 0.3461718899863107
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it have be that be seen any flute when of it have. talk. track you bed. and ts.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------

epoch 92 iter 51: train loss 0.00796. mean loss: 0.00856. lr 2.567041e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.008564616311699726





test loss: 0.34901528737761756
valid loss: 0.3437821737357548
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: it would be that be a a police for that it would. be. days your winter.. ts.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the first was the man, his face was tanned and a kind.
Real output:      the dr

epoch 93 iter 51: train loss 0.00874. mean loss: 0.00851. lr 2.628505e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.008506556346009556





test loss: 0.3451349708166989
valid loss: 0.34653698972293306
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:    

epoch 94 iter 51: train loss 0.00812. mean loss: 0.00857. lr 2.685884e-04: 100%|██████████| 52/52 [00:21<00:00,  2.42it/s]

train loss: 0.008573245319824379





test loss: 0.34636045585979114
valid loss: 0.3467329442501068
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major had his gray and his hat was tanned and fat short.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: along went the wide of a Gabriele.
R

epoch 95 iter 51: train loss 0.00579. mean loss: 0.00827. lr 2.738971e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.008273944187049683





test loss: 0.34339291670105676
valid loss: 0.34794055989810396
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major had the gray, his face was a and the old and
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Re

epoch 96 iter 51: train loss 0.00882. mean loss: 0.00836. lr 2.787574e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.008356964215636253





test loss: 0.3561239676042037
valid loss: 0.35049106393541607
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was his black and his hat was a and muddy so.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were looking.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: along pulled the road on San Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------

epoch 97 iter 51: train loss 0.00826. mean loss: 0.00835. lr 2.831517e-04: 100%|██████████| 52/52 [00:21<00:00,  2.45it/s]

train loss: 0.0083516692617335





test loss: 0.35227031057531183
valid loss: 0.3477161611829485
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were taking.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the major was the gray, every face was very and beautiful beautiful.

epoch 98 iter 51: train loss 0.00767. mean loss: 0.00804. lr 2.870641e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.008035331283910917





test loss: 0.34864699840545654
valid loss: 0.34910532406398226
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were taken now
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            vaicāju
Predicted output: I said.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predict

epoch 99 iter 51: train loss 0.00766. mean loss: 0.00786. lr 2.904804e-04: 100%|██████████| 52/52 [00:21<00:00,  2.46it/s]

train loss: 0.00785995395675015





test loss: 0.35196403481743554
valid loss: 0.35328841635159086
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it 's getting dark outside
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver had the man, his face was dead and the good and
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted outp

epoch 100 iter 51: train loss 0.00942. mean loss: 0.00824. lr 2.933883e-04: 100%|██████████| 52/52 [00:21<00:00,  2.44it/s]

train loss: 0.00824137154715852





test loss: 0.358064892617139
valid loss: 0.3522955690111433
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were taken in
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver had the man, his hat was a and my good and
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I as

In [35]:
epochs = range(len(test_loss_list))
# plt.subplots(nrows=number_of_layers, ncols=number_of_heads, figsize=(30, 20))
fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(20, 10))
axs[0].plot(epochs, train_loss_list)
axs[0].set_title('Train loss')
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')

axs[0].plot(epochs, test_loss_list)
axs[0].set_title('Test loss')
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')

axs[1].plot(epochs, valid_loss_list)
axs[1].set_title('Validation loss')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Loss')

axs[2].plot(epochs, valid_bleu_list)
axs[2].set_title('Validation BLEU')
axs[2].set_xlabel('Epochs')
axs[2].set_ylabel('BLEU')

plt.show()

  plt.show()


In [36]:
plt.savefig("hemingway_losses.png")


#Evaluate

In [37]:
checkpoint = torch.load('minGPT-LV-EN-translator_model.pt_best.pt')
model.load_state_dict(checkpoint)

<All keys matched successfully>

In [38]:
train_loss_list

[0.7639719133193676,
 0.3146414521795053,
 0.27935900768408406,
 0.2568013006104873,
 0.23999384905283266,
 0.2252980378957895,
 0.21173224225640297,
 0.1975925060418936,
 0.18506930195368254,
 0.1715181154700426,
 0.1587147910434466,
 0.14599510010045308,
 0.13330758807177728,
 0.12219288787589623,
 0.11037113159321822,
 0.10023131393469296,
 0.0892990015160579,
 0.08032648551922578,
 0.07112763177316922,
 0.0629386598817431,
 0.05625098886398169,
 0.04923309822781728,
 0.04326917259738995,
 0.03824807194849619,
 0.033264896796586424,
 0.02919690103198473,
 0.025815418169188958,
 0.02320325872502648,
 0.020388654791391812,
 0.018424122862947676,
 0.016228069336368486,
 0.014917625257602105,
 0.013451874829255618,
 0.012232946661802439,
 0.0112497266023778,
 0.01029684727724928,
 0.00951353508907442,
 0.008740398800000548,
 0.00809917889105586,
 0.007646113007257764,
 0.007231812100284374,
 0.006878927502279671,
 0.006421974224324983,
 0.0062023886085416265,
 0.0059929796082612416,
 0.

In [39]:
with open('hemingway_train_loss.txt', 'w') as f:
    f.write('\n'.join([str(s) for s in train_loss_list]))


In [40]:
with open('hemingway_test_loss.txt', 'w') as f:
    f.write('\n'.join([str(s) for s in test_loss_list]))

with open('hemingway_valid_loss.txt', 'w') as f:
    f.write('\n'.join([str(s) for s in valid_loss_list]))

with open('hemingway_valid_blue.txt', 'w') as f:
    f.write('\n'.join([str(s) for s in valid_bleu_list]))

In [41]:
from random import choice

for _ in range(5):
    idx = choice(range(len(valid_output)))

    context = valid_input[idx]
    encoded_input = tokenizer_input.encode(tokenizer_input.tokenize(context, block_size))
    x = torch.tensor(encoded_input, dtype=torch.long)[None,...].to(trainer.device)
    y = sample(model, x, block_size, temperature=1.0, sample=False, top_k=10)[0]

    intent = len(encoded_input) + 1

    predicted = y[intent:]
    completion = tokenizer_output.decode(predicted, True)
    print(f'Input:            {context}')
    print(f'Predicted output: {completion}')
    print(f'Real output:      {valid_output[idx]}')
    print('--------------------------------------------------')

Input:            mēs varam apmesties augšā kalnos &quot; .
Predicted output: we were a little time . &quot;
Real output:      we can find some place up in the mountains . &quot;
--------------------------------------------------
Input:            &quot; esmu noguris no ie@@ šanas &quot; .
Predicted output: &quot; I &apos;m a little place . &quot; 
Real output:      &quot; I &apos;m tired of this walking / &apos; &quot; W@@ ell , all we have to do is walk now .
--------------------------------------------------
Input:            es attaisīju un iz@@ kr@@ at@@ ī@@ ju a@@ mu@@ le@@ tu
Predicted output: <eos> I &apos;ll go on my head and then I said . 
Real output:      I opened the capsule and spilled him out into my hand .
--------------------------------------------------
Input:            varbūt , tēvs .
Predicted output: maybe I said . 
Real output:      perhaps , father .
--------------------------------------------------
Input:            &quot; man ir daži liet@@ oti zo@@ be@@ ni 

In [42]:
idx = choice(range(len(valid_output)))

context = valid_input[idx]
encoded_input = tokenizer_input.encode(tokenizer_input.tokenize(context, block_size))
x = torch.tensor(encoded_input, dtype=torch.long)[None,...].to(trainer.device)
y, attention_state = sample(model, x, block_size, temperature=1.0, sample=False, top_k=10, output_attention=True)

intent = len(encoded_input) + 1

predicted = y[0][intent:]
completion = tokenizer_output.decode(predicted,)
print(f'Input:            {context}')
print(f'Predicted output: {completion}')
print(f'Real output:      {valid_output[idx]}')
print('--------------------------------------------------')


Input:            &quot; noteikti &quot; .
Predicted output: &quot; yes . &quot; <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
Real output:      &quot; sure . &quot;
--------------------------------------------------


In [43]:
fig, plots = plt.subplots(nrows=number_of_layers, ncols=number_of_heads, figsize=(30, 20))

axis_text = tokenizer_input.decode(encoded_input, True).split()

axis_text.append('<eos>')

axis_text += tokenizer_input.decode(predicted, True).split()

limit = len(axis_text)
for bi in range(number_of_layers):
    for hi in range(number_of_heads):
        attetion_plot = torch.zeros(limit, limit)
        for di in range(limit):
            attetion_plot[:di, :di] = attention_state[bi][di][0,hi,:di,:di].data

        ax = plots[bi][hi]
        ax.matshow(attetion_plot.numpy(), cmap='bone')

        # Set up axes
        ax.set_xticklabels([''] + axis_text, rotation=90)
        ax.set_yticklabels([''] + axis_text)

        # Show label at every tick
        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

        # Set up a title
        ax.set_title(f'Block {bi + 1} Head {hi + 1}', size=25, pad=30)
        
plt.show()

  ax.set_xticklabels([''] + axis_text, rotation=90)
  ax.set_yticklabels([''] + axis_text)
  plt.show()


In [44]:
# In case the previous cell is not plotting anything, uncomment the code below and execute. After that, the plotting should be fine.
# %matplotlib inline
# import numpy as np
# x = np.linspace(0, 10, 100)

# fig = plt.figure()
# plt.plot(x, np.sin(x), '-')
# plt.plot(x, np.cos(x), '--');

#Calculate BLEU

In [45]:
def clean_tokens(sentence):
    return sentence.replace('@@ ', '').replace(' @', '').replace('@ ', '')

In [46]:
# from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# smooth = SmoothingFunction().method7

translation_results = []
eval_text = []
bleu_results = []
for idx, context in enumerate(valid_input):
    encoded_input = tokenizer_input.encode(tokenizer_input.tokenize(context, block_size))
    x = torch.tensor(encoded_input, dtype=torch.long)[None,...].to(trainer.device)
    y = sample(model, x, block_size, temperature=1.0, sample=False, top_k=10)[0]

    intent = len(encoded_input) + 1
    predicted = y[intent:]
    completion = clean_tokens(tokenizer_output.decode(predicted, True))
    translation_results.append(completion)

    eval = clean_tokens(valid_output[idx])
    eval_text.append(eval)
    # bleu = sentence_bleu([eval], completion, smoothing_function=smooth)
    # bleu_results.append(bleu)

# print(f"Averare BLEU: {np.mean(bleu_results)}")

In [47]:
with open('hemingway_valid.out', 'w') as f:
    f.write("\n".join(translation_results))

with open('hemingway_valid.ref', 'w') as f:
    f.write("\n".join(eval_text))

In [48]:
!perl mosesdecoder/scripts/generic/multi-bleu.perl hemingway_valid.ref < hemingway_valid.out

BLEU = 7.13, 37.6/11.7/3.8/1.8 (BP=0.964, ratio=0.965, hyp_len=9173, ref_len=9509)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization.  Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.


In [49]:
!cat hemingway_valid.out | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > hemingway_valid.detok.out
!cat hemingway_valid.ref | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > hemingway_valid.detok.ref

Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv


In [50]:
#!pip install sacrebleu
!pip show sacrebleu

Name: sacrebleu
Version: 1.5.1
Summary: Hassle-free computation of shareable, comparable, and reproducible BLEU, chrF, and TER scores
Home-page: https://github.com/mjpost/sacrebleu
Author: Matt Post
Author-email: post@cs.jhu.edu
License: Apache License 2.0
Location: /home/guntis/anaconda3/envs/tw131/lib/python3.8/site-packages
Requires: portalocker
Required-by: 


In [51]:
import sacrebleu

with open('hemingway_valid.detok.ref', 'r') as f:
    eval_ref = [l.strip() for l in f.readlines()]
with open('hemingway_valid.detok.out', 'r') as f:
    translation_results = [l.strip() for l in f.readlines()]

refs = [eval_ref]
sys = translation_results
bleu = sacrebleu.corpus_bleu(sys, refs)
print(bleu.score)

7.127224006847502


#Interactive translator

In [52]:
context = input("Enter your English text to translate: ")

# Predict Latvian output
encoded_input = tokenizer_input.encode(tokenizer_input.tokenize(context, block_size))
x = torch.tensor(encoded_input, dtype=torch.long)[None,...].to(trainer.device)
y, attention_state = sample(model, x, block_size, temperature=1.0, sample=False, top_k=10, output_attention=True)

intent = len(encoded_input) + 1

predicted = y[0][intent:]
completion = tokenizer_output.decode(predicted, True)
print(f'Input:            {context}')
print(f'Predicted output: {completion}')


# Plot attention
fig, plots = plt.subplots(nrows=number_of_layers, ncols=number_of_heads, figsize=(30, 20))

axis_text = tokenizer_input.decode(encoded_input, True).split()

axis_text.append('<eos>')

axis_text += tokenizer_input.decode(predicted, True).split()

limit = len(axis_text)
for bi in range(number_of_layers):
    for hi in range(number_of_heads):
        attetion_plot = torch.zeros(limit, limit)
        for di in range(limit):
            attetion_plot[:di, :di] = attention_state[bi][di][0,hi,:di,:di].data

        ax = plots[bi][hi]
        ax.matshow(attetion_plot.numpy(), cmap='bone')

        # Set up axes
        ax.set_xticklabels([''] + axis_text, rotation=90)
        ax.set_yticklabels([''] + axis_text)

        # Show label at every tick
        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

        # Set up a title
        ax.set_title(f'Block {bi + 1} Head {hi + 1}', size=25, pad=30)
        
plt.show()

KeyboardInterrupt: Interrupted by user