In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker

import os
import urllib
import re
import random
import json
from typing import List, Dict, Optional, Any, Tuple
import glob

import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
import pickle
from torch.nn import functional as F

from collections import OrderedDict, Counter

In [2]:
import tokenizers
#from tokenizers import Tokenizer
from tokenizers.pre_tokenizers import Whitespace
from tokenizers.pre_tokenizers import WhitespaceSplit
from tokenizers.pre_tokenizers import Punctuation

from tokenizers import normalizers
from tokenizers.normalizers import Lowercase, Strip, Replace, Sequence
from tokenizers.trainers import UnigramTrainer

In [3]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(1)

In [4]:
HEMINGWAY_DATA = './data/hemingway'
!mkdir -p $HEMINGWAY_DATA


In [5]:
#!pip install sacrebleu
!pip show sacrebleu

Name: sacrebleu
Version: 1.5.1
Summary: Hassle-free computation of shareable, comparable, and reproducible BLEU, chrF, and TER scores
Home-page: https://github.com/mjpost/sacrebleu
Author: Matt Post
Author-email: post@cs.jhu.edu
License: Apache License 2.0
Location: /home/gstrazds/anaconda3/envs/tw131/lib/python3.8/site-packages
Requires: portalocker
Required-by: 


In [6]:
# from google.colab import drive
# drive.mount('/content/drive')

#Text preprocessing

In [7]:
num_bpe_merges = 10000
vocab_size = 5500
joint_vocab_size = 2*vocab_size

!echo BPE_ops=$num_bpe_merges vocab_size=$vocab_size joint_vocab_size=$joint_vocab_size

BPE_ops=10000 vocab_size=5500 joint_vocab_size=11000


In [8]:
#!pip install subword-nmt
!pip show subword-nmt

Name: subword-nmt
Version: 0.3.7
Summary: Unsupervised Word Segmentation for Neural Machine Translation and Text Generation
Home-page: https://github.com/rsennrich/subword-nmt
Author: Rico Sennrich
Author-email: None
License: MIT
Location: /home/gstrazds/anaconda3/envs/tw131/lib/python3.8/site-packages
Requires: 
Required-by: 


In [9]:
# Read Hemingway texts from URL. There are Hemingway's "A Farewell to arms"
text_en = urllib.request.urlopen('http://www.ltn.lv/~guntis/translation_dataset/dataset_en_small.txt').read().decode("utf-8", "ignore")
text_lv = urllib.request.urlopen('http://www.ltn.lv/~guntis/translation_dataset/dataset_lv_small.txt').read().decode("utf-8-sig", "ignore")

HEMINGWAY_SRC_EN = f'{HEMINGWAY_DATA}/hemingway.en.txt'
HEMINGWAY_SRC_LV = f'{HEMINGWAY_DATA}/hemingway.lv.txt'

with open(HEMINGWAY_SRC_EN, 'w') as f:
    f.write(text_en)

with open(HEMINGWAY_SRC_LV, 'w') as f:
    f.write(text_lv)

In [10]:
# !git clone https://github.com/moses-smt/mosesdecoder.git

In [11]:
# Normalize and tokenize texts

!cat $HEMINGWAY_SRC_EN | mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l en \
  | mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l en > $HEMINGWAY_DATA/hemingway.en.tok.txt

!cat $HEMINGWAY_SRC_LV | mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l lv \
  | mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l lv > $HEMINGWAY_DATA/hemingway.lv.tok.txt

Tokenizer Version 1.1
Language: en
Number of threads: 1
Tokenizer Version 1.1
Language: lv
Number of threads: 1


In [12]:
# # Normalize and tokenize texts

# #!cat hemingway.en.txt | mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l en \
# !cat hemingway.en.txt \
#   | mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l en > hemingway.en.tok.txt

# # !cat hemingway.lv.txt | mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l lv \
# !cat hemingway.lv.txt \
#   | mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l lv > hemingway.lv.tok.txt

In [13]:
!mosesdecoder/scripts/recaser/train-truecaser.perl -corpus $HEMINGWAY_DATA/hemingway.en.tok.txt -model $HEMINGWAY_DATA/tc_model.en
!mosesdecoder/scripts/recaser/train-truecaser.perl -corpus $HEMINGWAY_DATA/hemingway.lv.tok.txt -model $HEMINGWAY_DATA/tc_model.lv

!mosesdecoder/scripts/recaser/truecase.perl -model $HEMINGWAY_DATA/tc_model.en < $HEMINGWAY_DATA/hemingway.en.tok.txt > $HEMINGWAY_DATA/hemingway.en.tc.txt
!mosesdecoder/scripts/recaser/truecase.perl -model $HEMINGWAY_DATA/tc_model.lv < $HEMINGWAY_DATA/hemingway.lv.tok.txt > $HEMINGWAY_DATA/hemingway.lv.tc.txt

In [14]:
# !subword-nmt learn-joint-bpe-and-vocab --input en.tc.txt lv.tc.txt -s 10000 -o tokens.txt --write-vocabulary token_freq.en.txt token_freq.lv.txt
!mkdir -p $HEMINGWAY_DATA/bpe 
!subword-nmt learn-joint-bpe-and-vocab --input $HEMINGWAY_DATA/hemingway.lv.tc.txt $HEMINGWAY_DATA/hemingway.en.tc.txt -s $num_bpe_merges -o $HEMINGWAY_DATA/bpe/tokens.lven --write-vocabulary $HEMINGWAY_DATA/bpe/token_freq.en $HEMINGWAY_DATA/bpe/token_freq.lv
# !subword-nmt learn-joint-bpe-and-vocab --input $HEMINGWAY_DATA/hemingway.en.tc.txt -s $num_bpe_merges -o $HEMINGWAY_DATA/bpe/tokens.en --write-vocabulary $HEMINGWAY_DATA/bpe/token_freq.en
# !subword-nmt learn-joint-bpe-and-vocab --input $HEMINGWAY_DATA/hemingway.lv.tc.txt -s $num_bpe_merges -o $HEMINGWAY_DATA/bpe/tokens.lv --write-vocabulary $HEMINGWAY_DATA/bpe/token_freq.lv

In [15]:
def build_vocab(freq_file, vocab_size):
    vocab = Counter(['<unk>', '<pad>', '<eos>'])
    with open(freq_file, 'r') as f:
        for line in f.readlines():
            token, num_occurs = line.split()
            # vocab.append(token)
            vocab[token] += int(num_occurs)
    return vocab #[:vocab_size]
#     return vocab[:vocab_size]

en_vocab = build_vocab(f'{HEMINGWAY_DATA}/bpe/token_freq.en', vocab_size)
lv_vocab = build_vocab(f'{HEMINGWAY_DATA}/bpe/token_freq.lv', vocab_size)

with open(f'{HEMINGWAY_DATA}/bpe/vocab.en', 'w') as f:
    for i, token in enumerate(en_vocab):
        # f.write(f"{token} {i + 1} \n")
        f.write(f"{token} {en_vocab[token]} \n")

with open(f'{HEMINGWAY_DATA}/bpe/vocab.lv', 'w') as f:
    for i, token in enumerate(lv_vocab):
        # f.write(f"{token} {i + 1} \n")
        f.write(f"{token} {lv_vocab[token]} \n")

joint_vocab = Counter(en_vocab)
joint_vocab.update(lv_vocab)

with open(f'{HEMINGWAY_DATA}/bpe/vocab.lven', 'w') as f:
    for i, token in enumerate(joint_vocab):
        # f.write(f"{token} {i + 1} \n")
        f.write(f"{token} {joint_vocab[token]} \n")



In [16]:
print("en_vocab:", len(en_vocab), "lv_vocab:", len(lv_vocab), "joint_vocab", len(joint_vocab))

en_vocab: 6597 lv_vocab: 3710 joint_vocab 9385


In [17]:
#!subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.en --vocabulary $HEMINGWAY_DATA/bpe/vocab.en --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.en.tc.txt > $HEMINGWAY_DATA/hemingway.en.BPE.txt
#!subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.lv --vocabulary $HEMINGWAY_DATA/bpe/vocab.lv --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.lv.tc.txt > $HEMINGWAY_DATA/hemingway.lv.BPE.txt

# !subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.lven --vocabulary $HEMINGWAY_DATA/bpe/token_freq.en --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.en.tc.txt > $HEMINGWAY_DATA/hemingway.en.BPE.txt
# !subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.lven --vocabulary $HEMINGWAY_DATA/bpe/token_freq.lv --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.lv.tc.txt > $HEMINGWAY_DATA/hemingway.lv.BPE.txt

!subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.lven --vocabulary $HEMINGWAY_DATA/bpe/vocab.lven --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.en.tc.txt > $HEMINGWAY_DATA/hemingway.en.BPE.txt
!subword-nmt apply-bpe -c $HEMINGWAY_DATA/bpe/tokens.lven --vocabulary $HEMINGWAY_DATA/bpe/vocab.lven --vocabulary-threshold 1 < $HEMINGWAY_DATA/hemingway.lv.tc.txt > $HEMINGWAY_DATA/hemingway.lv.BPE.txt


In [18]:
special_tokens = ['<unk>', '<pad>', '<eos>', '<sep>'] #, '<S>', '</S>', '<bos>', '<eos>', '<sep>', '<NONE>', '<|>']
                  
    #  '---Exits---']  #, COMMAND_TOKEN]

                 # '+open', '+closed', '+roasted', '+baked', '+fried', '+raw',
                 # '+sliced', '+diced', '+chopped', '++Carrying:', ]
normalizer = normalizers.Sequence([Strip(), Lowercase()])
pre_tokenizer = Whitespace()

model = tokenizers.models.WordLevel(unk_token='<unk>')
# model = tokenizers.models.WordPiece()
tokenizer = tokenizers.Tokenizer(model=model)


tokenizer.add_special_tokens(special_tokens)
tokenizer.normalizer = normalizer
tokenizer.pre_tokenizer = pre_tokenizer

# filelist = glob.glob(PTHRU_DIR+"valid/*.pthru")
# filelist.extend( glob.glob(PTHRU_DIR+"test/*.pthru"))
# filelist.extend( glob.glob(PTHRU_DIR+"train/*.pthru"))


# token_strs = [tok for (tok, span) in pre_tokenizer.pre_tokenize_str(str1)]
# print(token_strs)

# filelist = glob.glob(PTHRU_DIR+"valid/*.pthru")

filelist = glob.glob(f"{HEMINGWAY_DATA}/hemingway.*.BPE.txt")

filelist = sorted(filelist)
print(len(filelist), filelist[:10])


# unigram_trainer = tokenizers.trainers.UnigramTrainer()
# trainer = tokenizers.trainers.WordPieceTrainer(vocab_size=vocab_size)
trainer = tokenizers.trainers.WordLevelTrainer(vocab_size=joint_vocab_size, special_tokens=special_tokens)

tokenizer.train(files=filelist, trainer=trainer)

2 ['./data/hemingway/hemingway.en.BPE.txt', './data/hemingway/hemingway.lv.BPE.txt']


In [19]:
vocab_dict = tokenizer.get_vocab(with_added_tokens=False)
print("ACTUAL VOCAB SIZE =", len(vocab_dict))
print(vocab_dict)
# !! ACTUAL VOCAB SIZE = 900 (first try when joint_vocab but separate --vocabulary token_freq.lang)
# ACTUAL VOCAB SIZE = 8637

ACTUAL VOCAB SIZE = 8637
{'gers': 8353, 'saārdīts': 7643, 'ings': 1543, 'cis': 1926, 'poor': 2167, 'chamois': 7340, 'qu': 1563, 'brauksim': 1344, 'certain': 3587, 'hi': 2199, 'vii': 2641, 'pārliecināts': 2292, 'viz': 5595, 'kuņģ': 7524, 'sap': 1295, 'pāris': 4999, 'stiff': 1960, 'katrs': 2203, 'novembra': 4872, 'tiksim': 6896, 'karso': 3591, 'pabrauca': 5179, 'rituli': 6972, 'smel': 3747, 'sakust': 5790, 'vīrie': 8082, 'silhou': 8388, 'guar': 8434, 'hun': 2113, 'dziņu': 6172, 'sort': 2972, 'jūsu': 483, 'understood': 2962, 'liras': 3026, 'success': 4573, 'nolēmu': 7541, 'īga': 603, 'somas': 1237, 'auga': 3701, 'bil': 8373, 'hurry': 2746, 'agli': 7964, 'noteica': 388, 'gu': 590, 'klājās': 4246, 'lampas': 4959, 'steps': 5428, 'ditch': 4811, 'mokas': 6479, 'vec': 1115, 'hands': 783, 'jād': 3411, 'fd': 7676, 'su': 538, 'ievainojumiem': 5596, 'pilieni': 5752, 'veic': 5104, 'sloku': 7046, 'sākt': 7877, 'medical': 2451, 'spoke': 1892, 'ūdenim': 7156, 'patlaban': 1582, 'amp': 3929, 'pateicos': 

In [20]:
with open(f'{HEMINGWAY_DATA}/hemingway.lv.BPE.txt', 'r') as f:
    text_input = f.read()

with open(f'{HEMINGWAY_DATA}/hemingway.en.BPE.txt', 'r') as f:
    text_output = f.read()

#MinGPT

In [21]:
import random
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

def top_k_logits(logits, k):
    v, ix = torch.topk(logits, k)
    out = logits.clone()
    out[out < v[:, [-1]]] = -float('Inf')
    return out

def calculate_attention_token(attention, top_k, model):
    logits = model.head(attention)
    logits = logits[:, -1, :]
    logits = top_k_logits(logits, top_k)

    probs = F.softmax(logits)

    _, ix = torch.topk(probs, k=1, dim=-1)
    ix = torch.multinomial(probs, num_samples=top_k)

    return ix[0]


@torch.no_grad()
def sample(model, x, steps, temperature=1.0, sample=False, top_k=None, output_attention=False):
    """
    take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in
    the sequence, feeding the predictions back into the model each time. Clearly the sampling
    has quadratic complexity unlike an RNN that is only linear, and has a finite context window
    of block_size, unlike an RNN that has an infinite context window.
    """
    block_size = model.get_block_size()
    model.eval()
    attention_state = [[] for _ in model.blocks]

    for k in range(steps):
        x_cond = x if x.size(1) <= block_size else x[:, -block_size:] # crop context if needed
        logits, _ = model(x_cond)
        # pluck the logits at the final step and scale by temperature
        logits = logits[:, -1, :] / temperature
        # optionally crop probabilities to only the top k options
        if top_k is not None:
            logits = top_k_logits(logits, top_k)
        # apply softmax to convert to probabilities
        probs = F.softmax(logits, dim=-1)
        # sample from the distribution or take the most likely
        if sample:
            ix = torch.multinomial(probs, num_samples=1)
        else:
            _, ix = torch.topk(probs, k=1, dim=-1)

        if output_attention:
            b, t = x.size()

            for block_id in range(len(model.blocks)):
                att = model.blocks[block_id].attn.att
                attention_state[block_id].append(att)

        # append to the sequence and continue
        x = torch.cat((x, ix), dim=1)

    if output_attention:
        return x, attention_state

    return x


In [22]:
"""
GPT model:
- the initial stem consists of a combination of token encoding and a positional encoding
- the meat of it is a uniform sequence of Transformer blocks
    - each Transformer is a sequential combination of a 1-hidden-layer MLP block and a self-attention block
    - all blocks feed into a central residual pathway similar to resnets
- the final decoder is a linear projection into a vanilla Softmax classifier
"""

import math
import logging

import torch
import torch.nn as nn
from torch.nn import functional as F

logger = logging.getLogger(__name__)

class GPTConfig:
    """ base GPT config, params common to all GPT versions """
    embd_pdrop = 0.1
    resid_pdrop = 0.1
    attn_pdrop = 0.1

    def __init__(self, vocab_size, block_size, **kwargs):
        self.vocab_size = vocab_size
        self.block_size = block_size
        for k,v in kwargs.items():
            setattr(self, k, v)

class GPT1Config(GPTConfig):
    """ GPT-1 like network roughly 125M params """
    n_layer = 12
    n_head = 12
    n_embd = 768

class CausalSelfAttention(nn.Module):
    """
    A vanilla multi-head masked self-attention layer with a projection at the end.
    It is possible to use torch.nn.MultiheadAttention here but I am including an
    explicit implementation here to show that there is nothing too scary here.
    """

    def __init__(self, config):
        super().__init__()
        assert config.n_embd % config.n_head == 0
        # key, query, value projections for all heads
        self.key = nn.Linear(config.n_embd, config.n_embd)
        self.query = nn.Linear(config.n_embd, config.n_embd)
        self.value = nn.Linear(config.n_embd, config.n_embd)
        # regularization
        self.attn_drop = nn.Dropout(config.attn_pdrop)
        self.resid_drop = nn.Dropout(config.resid_pdrop)
        # output projection
        self.proj = nn.Linear(config.n_embd, config.n_embd)
        # causal mask to ensure that attention is only applied to the left in the input sequence
        self.register_buffer("mask", torch.tril(torch.ones(config.block_size, config.block_size))
                                     .view(1, 1, config.block_size, config.block_size))
        self.n_head = config.n_head
        self.att = None

    def forward(self, x, layer_past=None):
        B, T, C = x.size()

        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
        k = self.key(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
        q = self.query(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
        v = self.value(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)

        # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
        att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
        att = F.softmax(att, dim=-1)
        att = self.attn_drop(att)
        y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
        y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side

        # output projection
        y = self.resid_drop(self.proj(y))

        self.att = att

        return y

class Block(nn.Module):
    """ an unassuming Transformer block """

    def __init__(self, config):
        super().__init__()
        self.ln1 = nn.LayerNorm(config.n_embd)
        self.ln2 = nn.LayerNorm(config.n_embd)
        self.attn = CausalSelfAttention(config)
        self.mlp = nn.Sequential(
            nn.Linear(config.n_embd, 4 * config.n_embd),
            nn.GELU(),
            nn.Linear(4 * config.n_embd, config.n_embd),
            nn.Dropout(config.resid_pdrop),
        )

    def forward(self, x):
        x = x + self.attn(self.ln1(x))
        x = x + self.mlp(self.ln2(x))
        return x

class GPT(nn.Module):
    """  the full GPT language model, with a context size of block_size """

    def __init__(self, config):
        super().__init__()

        # input embedding stem
        self.tok_emb = nn.Embedding(config.vocab_size, config.n_embd)
        self.pos_emb = nn.Parameter(torch.zeros(1, config.block_size, config.n_embd))
        self.drop = nn.Dropout(config.embd_pdrop)
        # transformer
        self.blocks = nn.Sequential(*[Block(config) for _ in range(config.n_layer)])
        # decoder head
        self.ln_f = nn.LayerNorm(config.n_embd)
        self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

        self.block_size = config.block_size
        self.apply(self._init_weights)

        logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))

    def get_block_size(self):
        return self.block_size

    def _init_weights(self, module):
        if isinstance(module, (nn.Linear, nn.Embedding)):
            module.weight.data.normal_(mean=0.0, std=0.02)
            if isinstance(module, nn.Linear) and module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def configure_optimizers(self, train_config):
        """
        This long function is unfortunately doing something very simple and is being very defensive:
        We are separating out all parameters of the model into two buckets: those that will experience
        weight decay for regularization and those that won't (biases, and layernorm/embedding weights).
        We are then returning the PyTorch optimizer object.
        """

        # separate out all parameters to those that will and won't experience regularizing weight decay
        decay = set()
        no_decay = set()
        whitelist_weight_modules = (torch.nn.Linear, )
        blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding)
        for mn, m in self.named_modules():
            for pn, p in m.named_parameters():
                fpn = '%s.%s' % (mn, pn) if mn else pn # full param name

                if pn.endswith('bias'):
                    # all biases will not be decayed
                    no_decay.add(fpn)
                elif pn.endswith('weight') and isinstance(m, whitelist_weight_modules):
                    # weights of whitelist modules will be weight decayed
                    decay.add(fpn)
                elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules):
                    # weights of blacklist modules will NOT be weight decayed
                    no_decay.add(fpn)

        # special case the position embedding parameter in the root GPT module as not decayed
        no_decay.add('pos_emb')

        # validate that we considered every parameter
        param_dict = {pn: p for pn, p in self.named_parameters()}
        inter_params = decay & no_decay
        union_params = decay | no_decay
        assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params), )
        assert len(param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \
                                                    % (str(param_dict.keys() - union_params), )

        # create the pytorch optimizer object
        optim_groups = [
            {"params": [param_dict[pn] for pn in sorted(list(decay))], "weight_decay": train_config.weight_decay},
            {"params": [param_dict[pn] for pn in sorted(list(no_decay))], "weight_decay": 0.0},
        ]
        optimizer = torch.optim.AdamW(optim_groups, lr=train_config.learning_rate, betas=train_config.betas)
        return optimizer

    def forward(self, idx, targets=None):
        b, t = idx.size()
        assert t <= self.block_size, "Cannot forward, model block size is exhausted."

        # forward the GPT model
        token_embeddings = self.tok_emb(idx) # each index maps to a (learnable) vector
        position_embeddings = self.pos_emb[:, :t, :] # each position maps to a (learnable) vector
        x = self.drop(token_embeddings + position_embeddings)
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.head(x)

        # if we are given some desired targets also calculate the loss
        loss = None
        if targets is not None:
            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))

        return logits, loss


In [23]:
"""
Simple training loop; Boilerplate that could apply to any arbitrary neural network,
so nothing in this file really has anything to do with GPT specifically.
"""

import sacrebleu
import math
import logging
from random import choice

from tqdm import tqdm
import numpy as np

import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data.dataloader import DataLoader

logger = logging.getLogger(__name__)

def clean_tokens(sentence):
    return sentence.replace('@@ ', '').replace(' @', '').replace('@ ', '')

class TrainerConfig:
    # optimization parameters
    max_epochs = 10
    batch_size = 64
    learning_rate = 3e-4
    betas = (0.9, 0.95)
    grad_norm_clip = 1.0
    weight_decay = 0.1 # only applied on matmul weights
    # learning rate decay params: linear warmup followed by cosine decay to 10% of original
    lr_decay = False
    warmup_tokens = 375e6 # these two numbers come from the GPT-3 paper, but may not be good defaults elsewhere
    final_tokens = 260e9 # (at what point we reach 10% of original LR)
    # checkpoint settings
    ckpt_path = None
    num_workers = 0 # for DataLoader

    def __init__(self, **kwargs):
        for k,v in kwargs.items():
            setattr(self, k, v)

class Trainer:

    def __init__(self, model, train_dataset, test_dataset, valid_dataset, config):
        self.model = model
        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
        self.valid_dataset = valid_dataset
        self.config = config

        # take over whatever gpus are on the system
        self.device = 'cpu'
        if torch.cuda.is_available():
            self.device = torch.cuda.current_device()
            self.model = torch.nn.DataParallel(self.model).to(self.device)

    def save_checkpoint(self, postfix=''):
        # DataParallel wrappers keep raw model object in .module attribute
        raw_model = self.model.module if hasattr(self.model, "module") else self.model
        checkpoint_path = self.config.ckpt_path + postfix + '.pt'
        logger.info("saving %s", checkpoint_path)
        torch.save(raw_model.state_dict(), checkpoint_path)

    def train(self):
        model, config = self.model, self.config
        raw_model = model.module if hasattr(self.model, "module") else model
        optimizer = raw_model.configure_optimizers(config)

        def run_epoch(split):
            is_train = split == 'train'
            model.train(is_train)
            data = self.train_dataset
            if split == 'test':
                data = self.test_dataset
            elif split == 'valid':
                data = self.valid_dataset
                model.eval()
            loader = DataLoader(data, shuffle=True, pin_memory=True,
                                batch_size=config.batch_size if is_train else 8,
                                num_workers=config.num_workers)

            losses = []
            pbar = tqdm(enumerate(loader), total=len(loader)) if is_train else enumerate(loader)
            logits_total = None
            x_total = None
            y_total = None
            for it, (x, y) in pbar:

                # place data on the correct device
                x = x.to(self.device)
                y = y.to(self.device)

                # forward the model
                with torch.set_grad_enabled(is_train):
                    logits, loss = model(x, y)
                    loss = loss.mean() # collapse all losses if they are scattered on multiple gpus
                    losses.append(loss.item())
                    if split == 'valid':
                        if logits_total is None:
                            logits_total = logits
                            x_total = x
                            y_total = y
                        else:
                            logits_total = torch.cat((logits_total, logits), dim=0)
                            x_total = torch.cat((x_total, x), dim=0)
                            y_total = torch.cat((y_total, y), dim=0)
                        

                if is_train:
                    # backprop and update the parameters
                    model.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_norm_clip)
                    optimizer.step()

                    # decay the learning rate based on our progress
                    if config.lr_decay:
                        self.tokens += (y >= 0).sum() # number of tokens processed this step (i.e. label is not -100)
                        if self.tokens < config.warmup_tokens:
                            # linear warmup
                            lr_mult = float(self.tokens) / float(max(1, config.warmup_tokens))
                        else:
                            # cosine learning rate decay
                            progress = float(self.tokens - config.warmup_tokens) / float(max(1, config.final_tokens - config.warmup_tokens))
                            lr_mult = max(0.1, 0.5 * (1.0 + math.cos(math.pi * progress)))
                        lr = config.learning_rate * lr_mult
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr
                    else:
                        lr = config.learning_rate

                    # report progress
                    pbar.set_description(f"epoch {epoch+1} iter {it}: train loss {loss.item():.5f}. mean loss: {float(np.mean(losses)):.5f}. lr {lr:e}")

            if split == 'train':
                train_loss = float(np.mean(losses))
                print(f"train loss: {train_loss}")
                return train_loss

            if split == 'test':
                test_loss = float(np.mean(losses))
                print(f"test loss: {test_loss}")
                return test_loss

            if split == 'valid':
                test_loss = float(np.mean(losses))
                print(f"valid loss: {test_loss}")

                eval_results = []
                translation_results = []
                context_list = []

                for idx in range(len(logits_total)):
                    intent = (x_total[idx] == valid_dataset.tokenizer_input.encode(['<eos>'])[0]).nonzero(as_tuple=True)[0][0]

                    probs = F.softmax(logits_total[idx], dim=-1)
                    # sample from the distribution or take the most likely
                    _, predicted = torch.topk(probs, k=1, dim=-1)
                    context = clean_tokens(data.tokenizer_input.decode(x_total[idx][:intent - 1], True))
                    completion = clean_tokens(data.tokenizer_output.decode(predicted[intent:], True))
                    real = clean_tokens(data.tokenizer_output.decode(y_total[idx][intent:], True))

                    context_list.append(context)
                    translation_results.append(completion)
                    eval_results.append(real)
                
                with open('valid.txt', 'w') as f:
                    f.write("\n".join(translation_results))

                with open('eval.txt', 'w') as f:
                    f.write("\n".join(eval_results))

                with open('context.txt', 'w') as f:
                    f.write("\n".join(context_list))


                !cat valid.txt | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > valid.detok.txt
                !cat eval.txt | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > eval.detok.txt
                !cat context.txt | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > context.detok.txt

                with open('eval.detok.txt', 'r') as f:
                    eval_results = [l.strip() for l in f.readlines()]
                with open('valid.detok.txt', 'r') as f:
                    translation_results = [l.strip() for l in f.readlines()]
                with open('context.detok.txt', 'r') as f:
                    context_list = [l.strip() for l in f.readlines()]

#                 idx = choice(range(len(context_list)))
                valid_sentences = ['the driver wore a cap and his face was thin and very tanned.',
                                   'outside it was getting dark.',
                                   'the two girls were asleep.',
                                   'I would like to have had the uniform off although I did not care much about the outward forms.',
                                   'I watched the flashes on San Gabriele.',
                                   'I asked.',
                                   '"no.']

                idx_list = [i for i, sentence in enumerate(eval_results) if sentence in valid_sentences]
                
                for idx in idx_list:
                    print(f'Input:            {context_list[idx]}')
                    print(f'Predicted output: {translation_results[idx]}')
                    print(f'Real output:      {eval_results[idx]}')
                    print('--------------------------------------------------')

                refs = [eval_results]
                sys = translation_results
                bleu = sacrebleu.corpus_bleu(sys, refs)
                print(f'BLEU: {bleu.score}')
                print('##############################################################')

                return test_loss, bleu.score

        train_loss_list = []
        test_loss_list = []
        valid_loss_list = []
        valid_bleu_list = []
        best_loss = float('inf')
        best_bleu = 0.0
        self.tokens = 0 # counter used for learning rate decay
        for epoch in range(config.max_epochs):

            train_loss = run_epoch('train')
            train_loss_list.append(train_loss)
            if self.test_dataset is not None:
                test_loss = run_epoch('test')
                test_loss_list.append(test_loss)

            if self.valid_dataset is not None:
                valid_loss, bleu_score = run_epoch('valid')
                valid_loss_list.append(valid_loss)
                valid_bleu_list.append(bleu_score)

            # supports early stopping based on the test loss, or just save always if no test set is provided
            # good_model = self.test_dataset is None or test_loss < best_loss
            good_model = self.valid_dataset is None or bleu_score > best_bleu
            if self.config.ckpt_path is not None and good_model:
                best_loss = test_loss
                best_bleu = bleu_score
                self.save_checkpoint("_best")

            if epoch % 10 == 0:
                self.save_checkpoint(f"_{epoch}")

            self.save_checkpoint("_last")

        return train_loss_list, test_loss_list, valid_loss_list, valid_bleu_list


#Training

In [24]:

class Tokenizer:
    def __init__(self, data, vocab_size, vocab):
        self.vocab_size = vocab_size
        self.vocab = vocab
        self.vocab_size = len(vocab)
        if self.vocab_size != vocab_size:
            logger.warn(f"Tokenizer len(vocab) != vocab_size: {len(vocab)} {vocab_size}")
        print(f"Tokenizer vocab_size={vocab_size} len(vocab)={len(vocab)}")
        self.stoi = {ch: i for i, ch in enumerate(self.vocab)}
        self.itos = {i: ch for i, ch in enumerate(self.vocab)}
    
    def tokenize(self, data, block_size):
        tokenized_text = data.split()
        # Filter empty strings
        tokenized_text = [x for x in tokenized_text if x]
        result = []
        for tokenized in tokenized_text:
            # In case other single # found, replace them with <unk> special token, marking the element as unknown
            if tokenized in self.vocab:
                result.append(tokenized)
            else:
                logger.warn(f"Tokenizer UNKNOWN TOKEN: |{tokenized}|")
                result.append('<unk>')

        # in case the sentence is longer, than block_size, we trim the sentence
        return result[:block_size]
    
    def encode(self, data):
        return [self.stoi[s] for s in data]
    
    def decode(self, data, clean_paddings=False):
        text = ' '.join([self.itos[int(i)] for i in data])

        if not clean_paddings:
            return text
        return text.replace('<pad>', '').replace('  ', '')

In [25]:
# vocab_size = 10000

# vocab_input = None
# if os.path.exists('vocab_input.pkl'):
#     with open('vocab_input.pkl', 'rb') as f:
#         vocab_input = pickle.load(f)
        
# vocab_output = None
# if os.path.exists('vocab_output.pkl'):
#     with open('vocab_output.pkl', 'rb') as f:
#         vocab_output = pickle.load(f)

# building vocabluary can take some time. ~5 minutes for 10_000 tokens for each tokenizer. 
tokenizer_input = Tokenizer(text_input, vocab_size, list(joint_vocab))
tokenizer_output = Tokenizer(text_output, vocab_size, list(joint_vocab))

  logger.warn(f"Tokenizer len(vocab) != vocab_size: {len(vocab)} {vocab_size}")
Tokenizer len(vocab) != vocab_size: 9385 5500
Tokenizer len(vocab) != vocab_size: 9385 5500


Tokenizer vocab_size=5500 len(vocab)=9385
Tokenizer vocab_size=5500 len(vocab)=9385


In [26]:
# with open('vocab_input.pkl', 'wb') as f:
#     pickle.dump(tokenizer_input.vocab, f)

# with open('vocab_output.pkl', 'wb') as f:
#     pickle.dump(tokenizer_output.vocab, f)

In [27]:
assert len(text_input.splitlines()) == len(text_output.splitlines()), \
   f"{len(text_input.splitlines())} {len(text_output.splitlines())}"
assert len(text_lv.splitlines()) == len(text_en.splitlines())
assert len(text_lv.splitlines()) == len(text_input.splitlines())
line_idxs = list(range(len(text_input.splitlines())))
random.shuffle(line_idxs)
print(len(line_idxs), len(text_input.splitlines()))
# print(line_idxs[:10], line_idxs[-10:])

train_dataset_size = round(0.75 * len(line_idxs))
test_dataset_size = round(0.15 * len(line_idxs))
valid_dataset_size = round(0.1 * len(line_idxs))

train_idxs = line_idxs[:train_dataset_size]
test_idxs = line_idxs[train_dataset_size:train_dataset_size + test_dataset_size]
valid_idxs = line_idxs[-valid_dataset_size:]

assert len(train_idxs) + len(valid_idxs) + len(test_idxs) == len(line_idxs)
assert set(line_idxs) == set(train_idxs) | set(valid_idxs) | set(test_idxs)

8812 8812


In [28]:
print(text_input[:200])
print(f"{len(text_lv.splitlines())} {len(text_input.splitlines())}")

ar@@ die@@ vas IE@@ R@@ O@@ Č@@ IE@@ M
pirmā DAĻA
I NODAĻA
to@@ gad vēl@@ ā vasar@@ ā bijām izviet@@ oti kādā cie@@ mā , un no mūsu māj@@ iņas pāri upei un līdzenumam pavērās skat@@ s uz kalniem .
upe
8812 8812


In [29]:
# Shuffle texts by lines
# texts = list(zip(text_output.splitlines(), text_input.splitlines()))
# random.shuffle(texts)
# output_texts, input_texts = zip(*texts)

In [30]:
# Split texts into train, test and validation datasets
# train_dataset_size = round(0.75 * len(output_texts))
# test_dataset_size = round(0.15 * len(output_texts))
# valid_dataset_size = round(0.1 * len(output_texts))

# train_input = input_texts[:train_dataset_size]
# test_input = input_texts[train_dataset_size:train_dataset_size + test_dataset_size]
# valid_input = input_texts[-valid_dataset_size:]

# train_output = output_texts[:train_dataset_size]
# test_output = output_texts[train_dataset_size:train_dataset_size + test_dataset_size]
# valid_output = output_texts[-valid_dataset_size:]

def separate_lines(text, train_idxs, valid_idxs, test_idxs):
    text_lines = text.splitlines()
    train_lines = [text_lines[idx] for idx in train_idxs]
    valid_lines = [text_lines[idx] for idx in valid_idxs]
    test_lines = [text_lines[idx] for idx in test_idxs]
    return train_lines, valid_lines, test_lines

train_input, valid_input, test_input = separate_lines(text_input, train_idxs, valid_idxs, test_idxs)

train_output, valid_output, test_output = separate_lines(text_output, train_idxs, valid_idxs, test_idxs)



In [31]:

with open('data/hemingway/train.lv', 'w') as f:
    f.write("\n".join(train_input))

with open('data/hemingway/test.lv', 'w') as f:
    f.write("\n".join(test_input))

with open('data/hemingway/valid.lv', 'w') as f:
    f.write("\n".join(valid_input))


with open('data/hemingway/train.en', 'w') as f:
    f.write("\n".join(train_output))

with open('data/hemingway/test.en', 'w') as f:
    f.write("\n".join(test_output))

with open('data/hemingway/valid.en', 'w') as f:
    f.write("\n".join(valid_output))


In [32]:
from torch.utils.data import Dataset

class WordDataset(Dataset):

    def __init__(self, output_text, input_text, tokenizer_output, tokenizer_input, block_size):
        self.tokenizer_output = tokenizer_output
        self.tokenizer_input = tokenizer_input

        self.block_size = block_size * 2 + 1
        self.output_text = [tokenizer_output.tokenize(t, block_size) for t in output_text]
        self.input_text = [tokenizer_input.tokenize(t, block_size) for t in input_text]

    def __len__(self):
        return len(self.output_text)

    def __getitem__(self, idx):
        """
        The idea is to get the input sentence
        and translate it to output sentence (sentences could be on any language).

        In the init method we already split a sentence into tokens and filled with spaces,
        to have an equal sentence size. In this method we just encode the tokens to
        ids (a list of numbers), and we're trying to map ids sequences
        """

        tokenized_input_text = self.tokenizer_input.encode(self.input_text[idx])
        tokenized_output_text = self.tokenizer_output.encode(self.output_text[idx])

        dix = tokenized_input_text + self.tokenizer_output.encode(['<eos>']) + tokenized_output_text
        if len(dix) < self.block_size:
            dix += self.tokenizer_output.encode(['<pad>']) * (self.block_size - len(dix))

        x = torch.tensor(dix[:-1], dtype=torch.long)
        y = torch.tensor(dix[1:], dtype=torch.long)
        y[:len(tokenized_input_text) - 1] = -100

        return x, y

In [33]:
block_size = 100  # the estimate how long lines the text could be (token count)

train_dataset = WordDataset(train_output, train_input, tokenizer_output, tokenizer_input, block_size)
test_dataset = WordDataset(test_output, test_input, tokenizer_output, tokenizer_input, block_size)
valid_dataset = WordDataset(valid_output, valid_input, tokenizer_output, tokenizer_input, block_size)

In [34]:
# NOTE: fixed, no longer shows UNKNOWN TOKEN

# joint_vocab -s 10000
# UNKNOWN TOKEN

# |;@@| (2040)  # I &@@ apos@@ ;@@ m
# |q@@| (148)
# |R| (40)
# |v| (409)


In [35]:
number_of_heads = 8
number_of_layers = 6

# from mingpt.model import GPT, GPTConfig
embd_pdrop = 0.1
resid_pdrop = 0.1
attn_pdrop = 0.1

max_vocab = max(tokenizer_input.vocab_size, tokenizer_output.vocab_size)
mconf = GPTConfig(max_vocab, train_dataset.block_size,
                  n_layer=number_of_layers, n_head=number_of_heads, n_embd=512,
                  embd_pdrop=embd_pdrop, resid_pdrop=resid_pdrop, attn_pdrop=attn_pdrop)

model = GPT(mconf)

In [36]:
# from mingpt.trainer import Trainer, TrainerConfig

tokens_per_epoch = len(train_dataset) * block_size
train_epochs = 100
batch_size = 64  #128

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=train_epochs, 
                      batch_size=64, learning_rate=3e-4,
                      lr_decay=True, warmup_tokens=tokens_per_epoch, final_tokens=train_epochs*tokens_per_epoch,
                      ckpt_path='minGPT-LV-EN-translator_model',
                      num_workers=1, weight_decay=0.0001, betas=(0.9, 0.98))
trainer = Trainer(model, train_dataset, test_dataset, valid_dataset, tconf)

In [37]:
param_count = sum([param.nelement() for param in model.parameters()])

print(f'Parameters count: {param_count}')

Parameters count: 28628480


In [38]:
train_loss_list, test_loss_list, valid_loss_list, valid_bleu_list = trainer.train()

epoch 1 iter 103: train loss 0.33353. mean loss: 0.72054. lr 2.999389e-04: 100%|██████████| 104/104 [00:23<00:00,  4.47it/s]

train loss: 0.7205399552790018





test loss: 0.3125832687659436
valid loss: 0.308916897387118
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the road and was road and the road. a. the road..
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I, a the the a a road...,..... road.....
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: I the was a a.
R

epoch 2 iter 103: train loss 0.28461. mean loss: 0.29490. lr 2.994085e-04: 100%|██████████| 104/104 [00:17<00:00,  5.89it/s]

train loss: 0.2949029627041175





test loss: 0.26937345810324315
valid loss: 0.26451114031511386
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I would be a be a a lake to the it had not be to to the lake.. to..
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsn

epoch 3 iter 103: train loss 0.32394. mean loss: 0.25930. lr 2.983352e-04: 100%|██████████| 104/104 [00:15<00:00,  6.59it/s]

train loss: 0.2593017343718272





test loss: 0.25394043694418594
valid loss: 0.25405368061216027
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the looked the road of the glass.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I would be a be a been war to to I could not see to to the war time time to time.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
----------------------

epoch 4 iter 103: train loss 0.23022. mean loss: 0.23421. lr 2.967230e-04: 100%|██████████| 104/104 [00:15<00:00,  6.65it/s]

train loss: 0.2342080306261778





test loss: 0.24522869481918325
valid loss: 0.24407511828718959
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the other other were very.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the road of the face.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was very very.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēd

epoch 5 iter 103: train loss 0.24435. mean loss: 0.21295. lr 2.945778e-04: 100%|██████████| 104/104 [00:15<00:00,  6.62it/s]

train loss: 0.21294650105902782





test loss: 0.24102674931826362
valid loss: 0.23555255614153975
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the walked the lake on the sh.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was very dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            šoferi

epoch 6 iter 103: train loss 0.15740. mean loss: 0.19190. lr 2.919072e-04: 100%|██████████| 104/104 [00:15<00:00,  6.64it/s]

train loss: 0.1919016853834574





test loss: 0.24245391669402638
valid loss: 0.23871296436131537
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the window of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted ou

epoch 7 iter 103: train loss 0.22746. mean loss: 0.17269. lr 2.887210e-04: 100%|██████████| 104/104 [00:15<00:00,  6.84it/s]

train loss: 0.1726944180062184





test loss: 0.2452612469892904
valid loss: 0.23841992188412864
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the wet of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            nē
Predicted output: no no. "
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was dark dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Inpu

epoch 8 iter 103: train loss 0.17081. mean loss: 0.15299. lr 2.850309e-04: 100%|██████████| 104/104 [00:15<00:00,  6.71it/s]

train loss: 0.15298862719478515





test loss: 0.24753492312079453
valid loss: 0.2412271835916751
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked said.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were all.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------

epoch 9 iter 103: train loss 0.10128. mean loss: 0.13361. lr 2.808501e-04: 100%|██████████| 104/104 [00:15<00:00,  6.72it/s]

train loss: 0.1336078683200937





test loss: 0.25410313621522435
valid loss: 0.24887177072935276
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was dark dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the woman was a big and she face was very. she big ded.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim noz

epoch 10 iter 103: train loss 0.07885. mean loss: 0.11529. lr 2.761939e-04: 100%|██████████| 104/104 [00:15<00:00,  6.63it/s]

train loss: 0.11529059242457151





test loss: 0.25798941461975317
valid loss: 0.2548450358413361
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the girl was his big and she face was was. his hard t.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted outpu

epoch 11 iter 103: train loss 0.10305. mean loss: 0.09852. lr 2.710792e-04: 100%|██████████| 104/104 [00:15<00:00,  6.60it/s]

train loss: 0.09852381215359156





test loss: 0.26601254679711467
valid loss: 0.2589003746775356
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:    

epoch 12 iter 103: train loss 0.04897. mean loss: 0.08260. lr 2.655246e-04: 100%|██████████| 104/104 [00:15<00:00,  6.62it/s]

train loss: 0.08259680458846





test loss: 0.2766875560473965
valid loss: 0.2756765824448955
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore on the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were all.
Real output:      the two girls were asleep.
--------------------------

epoch 13 iter 103: train loss 0.06128. mean loss: 0.06893. lr 2.595502e-04: 100%|██████████| 104/104 [00:15<00:00,  6.68it/s]

train loss: 0.06892755774494547





test loss: 0.28137747768357574
valid loss: 0.2730548216873226
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the girl was his face of his face was ddhis hot sted and
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the wind over the Gabriele.
Real output:      I watched the flashes on San Gabriele.
---------------------------

epoch 14 iter 103: train loss 0.15607. mean loss: 0.05749. lr 2.531778e-04: 100%|██████████| 104/104 [00:15<00:00,  6.58it/s]

train loss: 0.057486347818317324





test loss: 0.2872633930879185
valid loss: 0.28718323576020766
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were all.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: along saw the wind between the Gabriele.
Real output:      I watched the flashes on San Gabriele.
-------------------

epoch 15 iter 103: train loss 0.05268. mean loss: 0.04646. lr 2.464306e-04: 100%|██████████| 104/104 [00:15<00:00,  6.60it/s]

train loss: 0.04645801672282127





test loss: 0.29371005426867897
valid loss: 0.29575788968049727
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I want have a be seen some bar before that I 'd something want much before it Abruzzi iniit ust.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw low shore against the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:         

epoch 16 iter 103: train loss 0.03246. mean loss: 0.03705. lr 2.393329e-04: 100%|██████████| 104/104 [00:15<00:00,  6.63it/s]

train loss: 0.03705406894620795





test loss: 0.30097467775445386
valid loss: 0.304549661574063
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I want have a be to some own. you it 'd something want. potatoes it Abruzzi. tuit g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted

epoch 17 iter 103: train loss 0.02732. mean loss: 0.03019. lr 2.319107e-04: 100%|██████████| 104/104 [00:15<00:00,  6.64it/s]

train loss: 0.030191730313862745





test loss: 0.30510202795267105
valid loss: 0.29952701067065335
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I should have a be to some usto you I 'd not want much more it Abruzzi inal it ust.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his face of she face was over. his looking l.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were all.
Real output:      the two girls were asle

epoch 18 iter 103: train loss 0.02533. mean loss: 0.02442. lr 2.241909e-04: 100%|██████████| 104/104 [00:15<00:00,  6.69it/s]

train loss: 0.024423152673989534





test loss: 0.31161741678973276
valid loss: 0.3050594339231113
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi

epoch 19 iter 103: train loss 0.02252. mean loss: 0.02009. lr 2.162016e-04: 100%|██████████| 104/104 [00:15<00:00,  6.64it/s]

train loss: 0.020093227333675783





test loss: 0.31669200880520315
valid loss: 0.3104970569948892
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the head was his face and she face was tan. his sksted.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I want be a be to seen proto you I should not want much before it Abruzzi ini. g.
Real output:      I would like to have had the un

epoch 20 iter 103: train loss 0.01423. mean loss: 0.01669. lr 2.079717e-04: 100%|██████████| 104/104 [00:15<00:00,  6.63it/s]

train loss: 0.0166948469212422





test loss: 0.32074406415403606
valid loss: 0.313401622211074
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the others his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
I

epoch 21 iter 103: train loss 0.01179. mean loss: 0.01436. lr 1.995312e-04: 100%|██████████| 104/104 [00:15<00:00,  6.67it/s]

train loss: 0.014363427803171082





test loss: 0.3243972714317132
valid loss: 0.31736062493947176
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I 'd be a be to some legs to that I 'd not want about more it Abruzzi iniit g.
Real output:      I would like to have had the uniform off although I did not care much abo

epoch 22 iter 103: train loss 0.01868. mean loss: 0.01238. lr 1.909107e-04: 100%|██████████| 104/104 [00:15<00:00,  6.61it/s]

train loss: 0.012375041985741029





test loss: 0.32977950568479225
valid loss: 0.32104510938360536
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the head was his major door she face was hard. his capned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
-------------------------------

epoch 23 iter 103: train loss 0.01849. mean loss: 0.01076. lr 1.821416e-04: 100%|██████████| 104/104 [00:15<00:00,  6.72it/s]

train loss: 0.010763978222922351





test loss: 0.3350420465849968
valid loss: 0.32786156123017407
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought have a be to some son to that it 'd. want. 'it bar. y. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real

epoch 24 iter 103: train loss 0.00921. mean loss: 0.00951. lr 1.732557e-04: 100%|██████████| 104/104 [00:15<00:00,  6.74it/s]

train loss: 0.009506045066070957





test loss: 0.3378385629908866
valid loss: 0.33219634647573437
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark and
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: both two girls were over.
Real output

epoch 25 iter 103: train loss 0.00341. mean loss: 0.00838. lr 1.642853e-04: 100%|██████████| 104/104 [00:15<00:00,  6.58it/s]

train loss: 0.008383612524574766





test loss: 0.34145973847214
valid loss: 0.3326037479816256
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I want be a be some some Italian to that it 'd. to much fifteen it stairs. eit g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā 

epoch 26 iter 103: train loss 0.00748. mean loss: 0.00740. lr 1.552630e-04: 100%|██████████| 104/104 [00:15<00:00,  6.69it/s]

train loss: 0.007399112496596689





test loss: 0.3457155084484313
valid loss: 0.33813350667824615
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was a gray-she face was ddhis looking ned.
Real output:      the driver wore a cap and his

epoch 27 iter 103: train loss 0.00667. mean loss: 0.00653. lr 1.462216e-04: 100%|██████████| 104/104 [00:15<00:00,  6.59it/s]

train loss: 0.006526591390586243





test loss: 0.34858572662594806
valid loss: 0.34605687749278435
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: both girls girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predict

epoch 28 iter 103: train loss 0.00629. mean loss: 0.00579. lr 1.371939e-04: 100%|██████████| 104/104 [00:15<00:00,  6.61it/s]

train loss: 0.005789005369968856





test loss: 0.34793904459620095
valid loss: 0.34096593785661833
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were all.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the girl was a face-she face was tandhis skned and
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
------

epoch 29 iter 103: train loss 0.00386. mean loss: 0.00516. lr 1.282127e-04: 100%|██████████| 104/104 [00:15<00:00,  6.62it/s]

train loss: 0.005158833622072752





test loss: 0.3559858200690114
valid loss: 0.35797581452507155
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I want have a be to a wheel to that I 'd not want. more it Abruzzi ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:          

epoch 30 iter 103: train loss 0.00826. mean loss: 0.00469. lr 1.193107e-04: 100%|██████████| 104/104 [00:15<00:00,  6.80it/s]

train loss: 0.0046927222149231685





test loss: 0.35612112283706665
valid loss: 0.34660127698569687
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni

epoch 31 iter 103: train loss 0.00362. mean loss: 0.00401. lr 1.105202e-04: 100%|██████████| 104/104 [00:15<00:00,  6.67it/s]

train loss: 0.004010097188606428





test loss: 0.3588593412325325
valid loss: 0.34896447046383006
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: I girl was a gray of she face was tanjahis comned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
---------------------------------------------

epoch 32 iter 103: train loss 0.00204. mean loss: 0.00369. lr 1.018732e-04: 100%|██████████| 104/104 [00:15<00:00,  6.71it/s]

train loss: 0.00369448169770364





test loss: 0.36359151605382023
valid loss: 0.35197829983401274
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the girl was a gray of she face was hard. his capned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
----------------------------------------

epoch 33 iter 103: train loss 0.00326. mean loss: 0.00333. lr 9.340097e-05: 100%|██████████| 104/104 [00:15<00:00,  6.54it/s]

train loss: 0.0033317346510907207





test loss: 0.35925842242607153
valid loss: 0.3521879521945307
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: both two girls did di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the girl was a face of she face was hard. his simple ned.
Real output:      the driver wore a c

epoch 34 iter 103: train loss 0.00320. mean loss: 0.00294. lr 8.513441e-05: 100%|██████████| 104/104 [00:15<00:00,  6.75it/s]

train loss: 0.0029357695020735264





test loss: 0.36405292530375793
valid loss: 0.35640102047641
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I want have a be to a Italian to that that should not to. fever it potatoes ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, u

epoch 35 iter 103: train loss 0.00200. mean loss: 0.00274. lr 7.710353e-05: 100%|██████████| 104/104 [00:15<00:00,  6.71it/s]

train loss: 0.0027357826094465475





test loss: 0.36532150532107754
valid loss: 0.3570399607892509
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought believe a be had a Italian to that that 'd not to. fever it bar ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
------------------------

epoch 36 iter 103: train loss 0.00193. mean loss: 0.00241. lr 6.933751e-05: 100%|██████████| 104/104 [00:15<00:00,  6.60it/s]

train loss: 0.0024081483413913073





test loss: 0.36514112110956604
valid loss: 0.35826653027319694
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be had a Italian to that that 'd not to. fever it bar ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            vaicāju
Predicted output: I aske

epoch 37 iter 103: train loss 0.00192. mean loss: 0.00219. lr 6.186455e-05: 100%|██████████| 104/104 [00:15<00:00,  6.65it/s]

train loss: 0.002193531589000486





test loss: 0.3695452648652605
valid loss: 0.3598185785047643
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was a gray door she face was tanbrown his simple ned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it wasn getting dark.
Real output:      outsid

epoch 38 iter 103: train loss 0.00227. mean loss: 0.00199. lr 5.471182e-05: 100%|██████████| 104/104 [00:15<00:00,  6.66it/s]

train loss: 0.0019915709730972033





test loss: 0.36875818530658644
valid loss: 0.36077391571021294
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought believe a be to a Italian to that I 'd not to. fever it two ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the girl was his gray of his face was

epoch 39 iter 103: train loss 0.00250. mean loss: 0.00181. lr 4.790530e-05: 100%|██████████| 104/104 [00:15<00:00,  6.67it/s]

train loss: 0.0018111658327353115





test loss: 0.36986364814710904
valid loss: 0.3627934751209912
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the sun was a gray door his face was tanbrown his simple ned.
Rea

epoch 40 iter 103: train loss 0.00184. mean loss: 0.00166. lr 4.146972e-05: 100%|██████████| 104/104 [00:15<00:00,  6.66it/s]

train loss: 0.0016599886696964789





test loss: 0.36869648221251833
valid loss: 0.36147298536322137
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his gray door his face was tanbrown his simple ned.

epoch 41 iter 103: train loss 0.00131. mean loss: 0.00153. lr 3.542846e-05: 100%|██████████| 104/104 [00:15<00:00,  6.62it/s]

train loss: 0.0015283610062816967





test loss: 0.3705499371042453
valid loss: 0.36328793176122615
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be to a Italian to that that 'd not to. fever it two ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.

epoch 42 iter 103: train loss 0.00077. mean loss: 0.00137. lr 3.000000e-05: 100%|██████████| 104/104 [00:14<00:00,  7.02it/s]

train loss: 0.0013743404454614322





test loss: 0.37095890328826675
valid loss: 0.36310952666911994
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be to a Italian to that that 'd not to. fever it track ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore his the Gabriele.
Real output:      I watched the flashes on San Gabriel

epoch 43 iter 103: train loss 0.00249. mean loss: 0.00131. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.54it/s]

train loss: 0.0013111722409331168





test loss: 0.37143117288149985
valid loss: 0.3662309156613307
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the head was his gray of his face was tanbrown his simple ned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was get

epoch 44 iter 103: train loss 0.00296. mean loss: 0.00125. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.71it/s]

train loss: 0.0012471394254064832





test loss: 0.37587054230901135
valid loss: 0.3634512687756403
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his gray door his face was tanbrown his simple ned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
------------------------------

epoch 45 iter 103: train loss 0.00057. mean loss: 0.00123. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.67it/s]

train loss: 0.0012319018194550434





test loss: 0.3726211631872568
valid loss: 0.3726639609347593
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were above.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be to a Italian to that that 'd not to. fever it railway ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
-------------------------------------------------

epoch 46 iter 103: train loss 0.00184. mean loss: 0.00120. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.69it/s]

train loss: 0.0012038691302922626





test loss: 0.3747400341622801
valid loss: 0.37479804321989285
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
------------------------

epoch 47 iter 103: train loss 0.00179. mean loss: 0.00115. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.72it/s]

train loss: 0.0011457540805657539





test loss: 0.37444506818989676
valid loss: 0.36643112182348697
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were above.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his gray of his face was tanbrown his simple ned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt b

epoch 48 iter 103: train loss 0.00255. mean loss: 0.00117. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.68it/s]

train loss: 0.0011731352133210748





test loss: 0.37701121475323135
valid loss: 0.37102989087233673
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought have a be to a Italian to that that 'd not to.. them railway ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were above.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
----------------------------------

epoch 49 iter 103: train loss 0.00140. mean loss: 0.00108. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.64it/s]

train loss: 0.0010753383690965935





test loss: 0.3783213773585228
valid loss: 0.36798332040911325
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his face of his face was tanface his comned.
Real output:      the driver wore a cap and his face was t

epoch 50 iter 103: train loss 0.00040. mean loss: 0.00109. lr 3.000000e-05: 100%|██████████| 104/104 [00:16<00:00,  6.44it/s]

train loss: 0.0010873093930198452





test loss: 0.37693958496113855
valid loss: 0.37060959811683175
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were above.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be to a Italian to that that 'd not to.. them railway inic. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the mountains his the Gabriele.
Real output: 

epoch 51 iter 103: train loss 0.00035. mean loss: 0.00104. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.60it/s]

train loss: 0.0010441531927566617





test loss: 0.37786292361028223
valid loss: 0.3682765270675625
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his face of his face was tanbrown his comned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:    

epoch 52 iter 103: train loss 0.00016. mean loss: 0.00102. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.62it/s]

train loss: 0.0010207813556917245





test loss: 0.3764829544507595
valid loss: 0.36943365358286073
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be to a Italian to that he 'd not to.. it. inic. g.

epoch 53 iter 103: train loss 0.00103. mean loss: 0.00102. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.61it/s]

train loss: 0.001015520063027417





test loss: 0.377856493534812
valid loss: 0.37113879419661855
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought have a be to a Italian to that he 'd not to.. it railway ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were di.
Real output:      the two girls were asleep.


epoch 54 iter 103: train loss 0.00228. mean loss: 0.00103. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.61it/s]

train loss: 0.0010265054278044351





test loss: 0.37767648804618653
valid loss: 0.3743171769756455
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were above.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------

epoch 55 iter 103: train loss 0.00068. mean loss: 0.00098. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.65it/s]

train loss: 0.0009780434528903032





test loss: 0.37771628835204857
valid loss: 0.3721045770489418
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought believe a be to a Italian to that he 'd not to.. it. ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------

epoch 56 iter 103: train loss 0.00046. mean loss: 0.00093. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.59it/s]

train loss: 0.0009263490974262822





test loss: 0.38217492060488967
valid loss: 0.37360196661304784
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought believe a be to a Italian to that he 'd not to.. it. inic. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his face door his face was tanface his comned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:      

epoch 57 iter 103: train loss 0.00101. mean loss: 0.00103. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.61it/s]

train loss: 0.0010301098009222187





test loss: 0.3824520373200796
valid loss: 0.3758800817099777
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his face door his face was tan. his comned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real ou

epoch 58 iter 103: train loss 0.00040. mean loss: 0.00097. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.57it/s]

train loss: 0.0009723928042848667





test loss: 0.37985782801027757
valid loss: 0.37560030509222736
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predic

epoch 59 iter 103: train loss 0.00198. mean loss: 0.00093. lr 3.000000e-05: 100%|██████████| 104/104 [00:16<00:00,  6.47it/s]

train loss: 0.0009293319050742027





test loss: 0.3826836579625147
valid loss: 0.3773709157028714
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the sun was his face of she face was tanface his simple ned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two gir

epoch 60 iter 103: train loss 0.00053. mean loss: 0.00089. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.60it/s]

train loss: 0.0008887242413318466





test loss: 0.3829230957002525
valid loss: 0.3747074829565512
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were above.

epoch 61 iter 103: train loss 0.00023. mean loss: 0.00091. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.55it/s]

train loss: 0.0009122986570079005





test loss: 0.38094535456543943
valid loss: 0.3750567775521729
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:    

epoch 62 iter 103: train loss 0.00075. mean loss: 0.00090. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.59it/s]

train loss: 0.0009008592088111283





test loss: 0.3821488919566913
valid loss: 0.3802158303625949
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were above.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the sun was his gray of his face was tanbrown his valley ned.
Real output:      the driver wore a cap and h

epoch 63 iter 103: train loss 0.00018. mean loss: 0.00089. lr 3.000000e-05: 100%|██████████| 104/104 [00:15<00:00,  6.52it/s]

train loss: 0.0008936678014624005





test loss: 0.3835201986941947
valid loss: 0.3786359597165305
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted outpu

epoch 64 iter 103: train loss 0.00068. mean loss: 0.00090. lr 3.383500e-05: 100%|██████████| 104/104 [00:15<00:00,  6.70it/s]

train loss: 0.0008959008860424197





test loss: 0.3818831994950053
valid loss: 0.3758773555358251
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be to a Italian to that they 'd not to. two them bar 

epoch 65 iter 103: train loss 0.00241. mean loss: 0.00093. lr 3.976361e-05: 100%|██████████| 104/104 [00:15<00:00,  6.72it/s]

train loss: 0.0009308821678972051





test loss: 0.38372350713036146
valid loss: 0.3877259304260348
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his face of his face was tanbrown his comned.
Real output:      the driver wore a cap and his face was thin and very tanned.
-----------------------------------

epoch 66 iter 103: train loss 0.00053. mean loss: 0.00091. lr 4.609274e-05: 100%|██████████| 104/104 [00:16<00:00,  6.36it/s]

train loss: 0.0009145698417201997





test loss: 0.38549287051680575
valid loss: 0.3773857891089744
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be to a son to that that 'd not to. two it. ini. g.
Real output:     

epoch 67 iter 103: train loss 0.00093. mean loss: 0.00101. lr 5.279939e-05: 100%|██████████| 104/104 [00:15<00:00,  6.64it/s]

train loss: 0.0010058906515881133





test loss: 0.386487227247422
valid loss: 0.3818966871177828
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his face of his face was tanface his comned.
Real out

epoch 68 iter 103: train loss 0.00071. mean loss: 0.00113. lr 5.985921e-05: 100%|██████████| 104/104 [00:15<00:00,  6.69it/s]

train loss: 0.0011346767524418493





test loss: 0.3837919493306832
valid loss: 0.37718796280321776
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought not a be to a Italian to them to 'd not to. two them railway iniit g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real o

epoch 69 iter 103: train loss 0.00057. mean loss: 0.00114. lr 6.724653e-05: 100%|██████████| 104/104 [00:15<00:00,  6.68it/s]

train loss: 0.001140626723589286





test loss: 0.3850756653820176
valid loss: 0.38122031844414034
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the door was his face of his face was t

epoch 70 iter 103: train loss 0.00265. mean loss: 0.00128. lr 7.493452e-05: 100%|██████████| 104/104 [00:15<00:00,  6.58it/s]

train loss: 0.0012792892168643167





test loss: 0.3855851454189025
valid loss: 0.38348405022878906
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be to some Italian to that to 'd not to. two it two ini. g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getti

epoch 71 iter 103: train loss 0.00030. mean loss: 0.00132. lr 8.289524e-05: 100%|██████████| 104/104 [00:16<00:00,  6.45it/s]

train loss: 0.0013151626886750679





test loss: 0.38697435347789744
valid loss: 0.3815895024317879
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: I driver was his face hat she face was tan. his comned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------

epoch 72 iter 103: train loss 0.00200. mean loss: 0.00150. lr 9.109978e-05: 100%|██████████| 104/104 [00:15<00:00,  6.65it/s]

train loss: 0.0014968434692351506





test loss: 0.3878095748733325
valid loss: 0.3877325953395517
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the girl was his face door she face was a. his comned.
Real output:      the driver wore a cap and his face was thin and v

epoch 73 iter 103: train loss 0.00119. mean loss: 0.00159. lr 9.951832e-05: 100%|██████████| 104/104 [00:15<00:00,  6.59it/s]

train loss: 0.0015898637105307828





test loss: 0.38820618081344177
valid loss: 0.38218837412627965
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
------------------------------------------------

epoch 74 iter 103: train loss 0.00147. mean loss: 0.00184. lr 1.081203e-04: 100%|██████████| 104/104 [00:15<00:00,  6.63it/s]

train loss: 0.0018426319181274336





test loss: 0.3924030448837453
valid loss: 0.37979890109115355
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the sun was his face door she face was a. his comned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai 

epoch 75 iter 103: train loss 0.00322. mean loss: 0.00203. lr 1.168744e-04: 100%|██████████| 104/104 [00:15<00:00,  6.60it/s]

train loss: 0.0020308897958155004





test loss: 0.3916485735989479
valid loss: 0.38684179533172297
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: the girls girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            "nē
Pred

epoch 76 iter 103: train loss 0.00156. mean loss: 0.00218. lr 1.257489e-04: 100%|██████████| 104/104 [00:15<00:00,  6.58it/s]

train loss: 0.0021799997775815427





test loss: 0.3896925584020385
valid loss: 0.3814570890353607
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw on mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be had a legs to that it hoped not to. decided it. iniit g.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicte

epoch 77 iter 103: train loss 0.00272. mean loss: 0.00224. lr 1.347114e-04: 100%|██████████| 104/104 [00:15<00:00,  6.56it/s]

train loss: 0.0022395138814597605





test loss: 0.3865173016843128
valid loss: 0.38090948471883396
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it wasn getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni

epoch 78 iter 103: train loss 0.00106. mean loss: 0.00252. lr 1.437296e-04: 100%|██████████| 104/104 [00:15<00:00,  6.61it/s]

train loss: 0.0025189098658022257





test loss: 0.38935603003904046
valid loss: 0.379344244786832
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā

epoch 79 iter 103: train loss 0.00333. mean loss: 0.00269. lr 1.527705e-04: 100%|██████████| 104/104 [00:15<00:00,  6.73it/s]

train loss: 0.0026920810397803928





test loss: 0.3896842134375888
valid loss: 0.38146243405503194
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I want be a be a been Italian. them some 'd not to.. them.. i. m.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the s

epoch 80 iter 103: train loss 0.00513. mean loss: 0.00314. lr 1.618013e-04: 100%|██████████| 104/104 [00:15<00:00,  6.64it/s]

train loss: 0.003135421594309334





test loss: 0.387949101626873
valid loss: 0.38193980429892066
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: both two girls did hurry.
Real output:      the two girls were asleep.
-------------------------------------

epoch 81 iter 103: train loss 0.00356. mean loss: 0.00316. lr 1.707893e-04: 100%|██████████| 104/104 [00:15<00:00,  6.73it/s]

train loss: 0.003161476164398034





test loss: 0.3874904454113489
valid loss: 0.4061981214059366
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I suppose be a be to seen hell to that they hoped not want. underit medical. i. ces.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:    

epoch 82 iter 103: train loss 0.00780. mean loss: 0.00354. lr 1.797017e-04: 100%|██████████| 104/104 [00:15<00:00,  6.58it/s]

train loss: 0.0035388561205204148





test loss: 0.3861213195395757
valid loss: 0.383421842176635
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: both two girls did hurry.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no

epoch 83 iter 103: train loss 0.00336. mean loss: 0.00380. lr 1.885063e-04: 100%|██████████| 104/104 [00:15<00:00,  6.66it/s]

train loss: 0.0037995267283314695





test loss: 0.3891859520451132
valid loss: 0.38379311890483975
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw San mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it was getting dark and
Real output:      outside it was getting dark.
--------------------------------------------------
Input:     

epoch 84 iter 103: train loss 0.00148. mean loss: 0.00366. lr 1.971709e-04: 100%|██████████| 104/104 [00:15<00:00,  6.76it/s]

train loss: 0.0036562063446931112





test loss: 0.38899133569863903
valid loss: 0.3790999314672238
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be to let cto up to 'd not to. every them two ini. ces.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw San mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
----------------

epoch 85 iter 103: train loss 0.00344. mean loss: 0.00415. lr 2.056641e-04: 100%|██████████| 104/104 [00:15<00:00,  6.66it/s]

train loss: 0.004154333844780922





test loss: 0.38860348104892006
valid loss: 0.3839241289072209
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan

epoch 86 iter 103: train loss 0.00382. mean loss: 0.00423. lr 2.139551e-04: 100%|██████████| 104/104 [00:14<00:00,  6.96it/s]

train loss: 0.004226993357476134





test loss: 0.38967165332960796
valid loss: 0.37891339946974506
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            vaicāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his face was she face was hard. his looking ned.
Real ou

epoch 87 iter 103: train loss 0.00365. mean loss: 0.00441. lr 2.220137e-04: 100%|██████████| 104/104 [00:15<00:00,  6.53it/s]

train loss: 0.004413818521872879





test loss: 0.3885288133499134
valid loss: 0.381248731870909
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was a long of she face was a. his comned and
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I was not a be some a Italian there you that hoped not married. two them two. b. m.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es

epoch 88 iter 103: train loss 0.00212. mean loss: 0.00470. lr 2.298107e-04: 100%|██████████| 104/104 [00:15<00:00,  6.66it/s]

train loss: 0.0046972402129680495





test loss: 0.3884735001049128
valid loss: 0.3799975803709245
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his German of she face was d. his comned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
----------------------

epoch 89 iter 103: train loss 0.00190. mean loss: 0.00472. lr 2.373177e-04: 100%|██████████| 104/104 [00:15<00:00,  6.64it/s]

train loss: 0.004720256802661775





test loss: 0.38662298661039535
valid loss: 0.3808311986493635
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his glass of she face was d. his comned.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
------------------------------------

epoch 90 iter 103: train loss 0.00525. mean loss: 0.00480. lr 2.445074e-04: 100%|██████████| 104/104 [00:15<00:00,  6.59it/s]

train loss: 0.004798029838219428





test loss: 0.3864641422966877
valid loss: 0.38057764398085103
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
----------------------------------------------

epoch 91 iter 103: train loss 0.00908. mean loss: 0.00528. lr 2.513538e-04: 100%|██████████| 104/104 [00:15<00:00,  6.59it/s]

train loss: 0.00527527980067624





test loss: 0.3941685734204499
valid loss: 0.38492407347704916
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it watch was getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were everybody.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            "nē
Predicted

epoch 92 iter 103: train loss 0.00224. mean loss: 0.00544. lr 2.578319e-04: 100%|██████████| 104/104 [00:15<00:00,  6.58it/s]

train loss: 0.0054417860139018065





test loss: 0.3872711042682809
valid loss: 0.3784796258674301
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I must be a be to a Italian that about it hoped it to. easily them beard inenit tu.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the room was his hat of his face was over dhis comws.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input

epoch 93 iter 103: train loss 0.00567. mean loss: 0.00518. lr 2.639182e-04: 100%|██████████| 104/104 [00:15<00:00,  6.55it/s]

train loss: 0.005181853746762499





test loss: 0.39212681568530666
valid loss: 0.3824781191510123
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            abas meitenes bija iemigušas
Predicted output: both two girls were di.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            nē
Predicted output: no no.
Real output:      "no.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw the shore his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
-----------------------------------------

epoch 94 iter 103: train loss 0.00420. mean loss: 0.00522. lr 2.695907e-04: 100%|██████████| 104/104 [00:15<00:00,  6.55it/s]

train loss: 0.005217233533935191





test loss: 0.39021687393626536
valid loss: 0.3837490323427561
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw them mountains his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I would be a be to a hell to about it would not mind. handthem two ini. ces.
Real output:      I would like to have had the uniform off although I did not care much about the outward

epoch 95 iter 103: train loss 0.00798. mean loss: 0.00533. lr 2.748286e-04: 100%|██████████| 104/104 [00:15<00:00,  6.56it/s]

train loss: 0.005327789374860004





test loss: 0.3912852954074561
valid loss: 0.3825406212661717
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw their mountains from the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it hurt was getting dark.
Real output:      outside it was getting dark.
-----------------------------

epoch 96 iter 103: train loss 0.00508. mean loss: 0.00556. lr 2.796130e-04: 100%|██████████| 104/104 [00:15<00:00,  6.59it/s]

train loss: 0.0055578304902435495





test loss: 0.3933637792087463
valid loss: 0.38362281756089617
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            vaicāju
Predicted output: asked asked.
Real output:      I asked.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw both mountains of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            "nē
Predicted output: "no.
Real output:      "no.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: the two girls were all.
Real output:      the two girls were asleep.
-------------------------------------

epoch 97 iter 103: train loss 0.00964. mean loss: 0.00551. lr 2.839264e-04: 100%|██████████| 104/104 [00:15<00:00,  6.54it/s]

train loss: 0.005513740190788387





test loss: 0.38947504633162394
valid loss: 0.382352555992904
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the saw horse storm of the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I 'd be a be some some Italian to them to 'd to bomthem two them two ini. ces.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I as

epoch 98 iter 103: train loss 0.00717. mean loss: 0.00578. lr 2.877533e-04: 100%|██████████| 104/104 [00:15<00:00,  6.59it/s]

train loss: 0.005780671306778318





test loss: 0.3906045586588871
valid loss: 0.38411663392105616
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            abas meitenes bija iemigušas
Predicted output: both two girls were very.
Real output:      the two girls were asleep.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be a be a a same to them to 'd something cheer. two it two in.. ces.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
--------------------------------

epoch 99 iter 103: train loss 0.00405. mean loss: 0.00553. lr 2.910797e-04: 100%|██████████| 104/104 [00:15<00:00,  6.56it/s]

train loss: 0.0055296501646248195





test loss: 0.39073865883142117
valid loss: 0.3812473718714607
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            virs Sangabrielas ik pa brīdim nozibsnīja
Predicted output: the rowed top man his the Gabriele.
Real output:      I watched the flashes on San Gabriele.
--------------------------------------------------
Input:            es jautāju
Predicted output: I asked.
Real output:      I asked.
--------------------------------------------------
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the girl was his major of his face was tanface his beautiful ned.
Real output:      the driver wore a cap and his face was thin and very tanned.
-------------------------------------------

epoch 100 iter 103: train loss 0.00755. mean loss: 0.00548. lr 2.938935e-04: 100%|██████████| 104/104 [00:15<00:00,  6.54it/s]

train loss: 0.005476330145029351





test loss: 0.3968887376677559
valid loss: 0.3873372358513308
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv
Input:            šoferim galvā bija cepure, un viņa seja bija kalsna un stipri iedegusi
Predicted output: the driver was his major of she face was d. his lovely sted.
Real output:      the driver wore a cap and his face was thin and very tanned.
--------------------------------------------------
Input:            metās jau tumsa
Predicted output: it it wasn getting dark.
Real output:      outside it was getting dark.
--------------------------------------------------
Input:            labprāt būtu aizmetis šo uniformu, lai gan es daudz nebēdāju par ārieni
Predicted output: I ought be that be had seemed quit them it 'd it be.. them two ts i. ces.
Real output:      I would like to have had the uniform off although I did not care much about the outward forms.
------------------

In [39]:
epochs = range(len(test_loss_list))
# plt.subplots(nrows=number_of_layers, ncols=number_of_heads, figsize=(30, 20))
fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(20, 10))
axs[0].plot(epochs, train_loss_list)
axs[0].set_title('Train loss')
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')

axs[0].plot(epochs, test_loss_list)
axs[0].set_title('Test loss')
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')

axs[1].plot(epochs, valid_loss_list)
axs[1].set_title('Validation loss')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Loss')

axs[2].plot(epochs, valid_bleu_list)
axs[2].set_title('Validation BLEU')
axs[2].set_xlabel('Epochs')
axs[2].set_ylabel('BLEU')

plt.show()

  plt.show()


In [40]:
plt.savefig("hemingway_losses.png")


#Evaluate

In [42]:
print(train_loss_list)
print()
print(f"Max BLEU: {max(valid_bleu_list)}")

[0.7205399552790018,
 0.2949029627041175,
 0.2593017343718272,
 0.2342080306261778,
 0.21294650105902782,
 0.1919016853834574,
 0.1726944180062184,
 0.15298862719478515,
 0.1336078683200937,
 0.11529059242457151,
 0.09852381215359156,
 0.08259680458846,
 0.06892755774494547,
 0.057486347818317324,
 0.04645801672282127,
 0.03705406894620795,
 0.030191730313862745,
 0.024423152673989534,
 0.020093227333675783,
 0.0166948469212422,
 0.014363427803171082,
 0.012375041985741029,
 0.010763978222922351,
 0.009506045066070957,
 0.008383612524574766,
 0.007399112496596689,
 0.006526591390586243,
 0.005789005369968856,
 0.005158833622072752,
 0.0046927222149231685,
 0.004010097188606428,
 0.00369448169770364,
 0.0033317346510907207,
 0.0029357695020735264,
 0.0027357826094465475,
 0.0024081483413913073,
 0.002193531589000486,
 0.0019915709730972033,
 0.0018111658327353115,
 0.0016599886696964789,
 0.0015283610062816967,
 0.0013743404454614322,
 0.0013111722409331168,
 0.0012471394254064832,
 0.0

In [41]:
checkpoint = torch.load('minGPT-LV-EN-translator_model_best.pt')
model.load_state_dict(checkpoint)

<All keys matched successfully>

In [43]:
with open('hemingway_train_loss.txt', 'w') as f:
    f.write('\n'.join([str(s) for s in train_loss_list]))


In [44]:
with open('hemingway_test_loss.txt', 'w') as f:
    f.write('\n'.join([str(s) for s in test_loss_list]))

with open('hemingway_valid_loss.txt', 'w') as f:
    f.write('\n'.join([str(s) for s in valid_loss_list]))

with open('hemingway_valid_blue.txt', 'w') as f:
    f.write('\n'.join([str(s) for s in valid_bleu_list]))

In [45]:
from random import choice

for _ in range(5):
    idx = choice(range(len(valid_output)))

    context = valid_input[idx]
    encoded_input = tokenizer_input.encode(tokenizer_input.tokenize(context, block_size))
    x = torch.tensor(encoded_input, dtype=torch.long)[None,...].to(trainer.device)
    y = sample(model, x, block_size, temperature=1.0, sample=False, top_k=10)[0]

    intent = len(encoded_input) + 1

    predicted = y[intent:]
    completion = tokenizer_output.decode(predicted, True)
    print(f'Input:            {context}')
    print(f'Predicted output: {completion}')
    print(f'Real output:      {valid_output[idx]}')
    print('--------------------------------------------------')

Input:            mēs varam apmesties augšā kalnos &quot; .
Predicted output: we could go on around the mountain up there . &quot;
Real output:      we can find some place up in the mountains . &quot;
--------------------------------------------------
Input:            &quot; esmu noguris no ie@@ šanas &quot; .
Predicted output: &quot; I &apos;m tired from rowing . &quot; 
Real output:      &quot; I &apos;m tired of this walking / &apos; &quot; W@@ ell , all we have to do is walk now .
--------------------------------------------------
Input:            es attai@@ sīju un iz@@ krat@@ īju am@@ u@@ le@@ tu
Predicted output: I shook it . 
Real output:      I opened the capsule and spilled him out into my hand .
--------------------------------------------------
Input:            varbūt , tēvs .
Predicted output: maybe the father . 
Real output:      perhaps , father .
--------------------------------------------------
Input:            &quot; man ir daži liet@@ oti zoben@@ i par lē@@ tu n

In [46]:
idx = choice(range(len(valid_output)))

context = valid_input[idx]
encoded_input = tokenizer_input.encode(tokenizer_input.tokenize(context, block_size))
x = torch.tensor(encoded_input, dtype=torch.long)[None,...].to(trainer.device)
y, attention_state = sample(model, x, block_size, temperature=1.0, sample=False, top_k=10, output_attention=True)

intent = len(encoded_input) + 1

predicted = y[0][intent:]
completion = tokenizer_output.decode(predicted,)
print(f'Input:            {context}')
print(f'Predicted output: {completion}')
print(f'Real output:      {valid_output[idx]}')
print('--------------------------------------------------')


Input:            &quot; noteikti &quot; .
Predicted output: &quot; certainly . &quot; <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>
Real output:      &quot; sure . &quot;
--------------------------------------------------


In [47]:
fig, plots = plt.subplots(nrows=number_of_layers, ncols=number_of_heads, figsize=(30, 20))

axis_text = tokenizer_input.decode(encoded_input, True).split()

axis_text.append('<eos>')

axis_text += tokenizer_input.decode(predicted, True).split()

limit = len(axis_text)
for bi in range(number_of_layers):
    for hi in range(number_of_heads):
        attetion_plot = torch.zeros(limit, limit)
        for di in range(limit):
            attetion_plot[:di, :di] = attention_state[bi][di][0,hi,:di,:di].data

        ax = plots[bi][hi]
        ax.matshow(attetion_plot.numpy(), cmap='bone')

        # Set up axes
        ax.set_xticklabels([''] + axis_text, rotation=90)
        ax.set_yticklabels([''] + axis_text)

        # Show label at every tick
        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

        # Set up a title
        ax.set_title(f'Block {bi + 1} Head {hi + 1}', size=25, pad=30)
        
plt.show()

  ax.set_xticklabels([''] + axis_text, rotation=90)
  ax.set_yticklabels([''] + axis_text)
  plt.show()


In [48]:
# In case the previous cell is not plotting anything, uncomment the code below and execute. After that, the plotting should be fine.
# %matplotlib inline
# import numpy as np
# x = np.linspace(0, 10, 100)

# fig = plt.figure()
# plt.plot(x, np.sin(x), '-')
# plt.plot(x, np.cos(x), '--');

#Calculate BLEU

In [49]:
def clean_tokens(sentence):
    return sentence.replace('@@ ', '').replace(' @', '').replace('@ ', '')

In [50]:
# from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# smooth = SmoothingFunction().method7

translation_results = []
eval_text = []
bleu_results = []
for idx, context in enumerate(valid_input):
    encoded_input = tokenizer_input.encode(tokenizer_input.tokenize(context, block_size))
    x = torch.tensor(encoded_input, dtype=torch.long)[None,...].to(trainer.device)
    y = sample(model, x, block_size, temperature=1.0, sample=False, top_k=10)[0]

    intent = len(encoded_input) + 1
    predicted = y[intent:]
    completion = clean_tokens(tokenizer_output.decode(predicted, True))
    translation_results.append(completion)

    eval = clean_tokens(valid_output[idx])
    eval_text.append(eval)
    # bleu = sentence_bleu([eval], completion, smoothing_function=smooth)
    # bleu_results.append(bleu)

# print(f"Averare BLEU: {np.mean(bleu_results)}")

In [51]:
# joint_vocab -s 10000
# UNKNOWN TOKEN

# |v|

In [52]:
with open('hemingway_valid.out', 'w') as f:
    f.write("\n".join(translation_results))

with open('hemingway_valid.ref', 'w') as f:
    f.write("\n".join(eval_text))

In [53]:
!perl mosesdecoder/scripts/generic/multi-bleu.perl hemingway_valid.ref < hemingway_valid.out

BLEU = 13.47, 48.0/19.6/9.4/5.5 (BP=0.908, ratio=0.912, hyp_len=8670, ref_len=9509)
It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization.  Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.


In [54]:
# BLEU = 7.92, 38.4/12.4/4.2/2.0 (BP=1.000, ratio=1.021, hyp_len=9711, ref_len=9509)

# joint_vocab -s 10,000
# BLEU = 8.61, 44.4/15.1/5.5/2.8 (BP=0.852, ratio=0.862, hyp_len=8198, ref_len=9509)

# full joint_vocab
# BLEU = 9.18, 41.7/14.1/5.4/2.8 (BP=0.948, ratio=0.950, hyp_len=9030, ref_len=9509)

# model_best.pt
# BLEU = 13.47, 48.0/19.6/9.4/5.5 (BP=0.908, ratio=0.912, hyp_len=8670, ref_len=9509)

In [55]:
!cat hemingway_valid.out | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > hemingway_valid.detok.out
!cat hemingway_valid.ref | mosesdecoder/scripts/tokenizer/detokenizer.perl -l lv > hemingway_valid.detok.ref

Detokenizer Version $Revision: 4134 $
Language: lv
Detokenizer Version $Revision: 4134 $
Language: lv


In [56]:
#!pip install sacrebleu
!pip show sacrebleu

Name: sacrebleu
Version: 1.5.1
Summary: Hassle-free computation of shareable, comparable, and reproducible BLEU, chrF, and TER scores
Home-page: https://github.com/mjpost/sacrebleu
Author: Matt Post
Author-email: post@cs.jhu.edu
License: Apache License 2.0
Location: /home/gstrazds/anaconda3/envs/tw131/lib/python3.8/site-packages
Requires: portalocker
Required-by: 


In [57]:
import sacrebleu

with open('hemingway_valid.detok.ref', 'r') as f:
    eval_ref = [l.strip() for l in f.readlines()]
with open('hemingway_valid.detok.out', 'r') as f:
    translation_results = [l.strip() for l in f.readlines()]

refs = [eval_ref]
sys = translation_results
bleu = sacrebleu.corpus_bleu(sys, refs)
print(bleu.score)

13.481896471451254


In [58]:
# 7.918993465381516
# joint_vocab -s 10000  8.534786641173136

# full joint_vocab 9.174070997058795

# model_best.pt 
13.481896471451254

#Interactive translator

In [59]:
context = input("Enter your English text to translate: ")

# Predict Latvian output
encoded_input = tokenizer_input.encode(tokenizer_input.tokenize(context, block_size))
x = torch.tensor(encoded_input, dtype=torch.long)[None,...].to(trainer.device)
y, attention_state = sample(model, x, block_size, temperature=1.0, sample=False, top_k=10, output_attention=True)

intent = len(encoded_input) + 1

predicted = y[0][intent:]
completion = tokenizer_output.decode(predicted, True)
print(f'Input:            {context}')
print(f'Predicted output: {completion}')


# Plot attention
fig, plots = plt.subplots(nrows=number_of_layers, ncols=number_of_heads, figsize=(30, 20))

axis_text = tokenizer_input.decode(encoded_input, True).split()

axis_text.append('<eos>')

axis_text += tokenizer_input.decode(predicted, True).split()

limit = len(axis_text)
for bi in range(number_of_layers):
    for hi in range(number_of_heads):
        attetion_plot = torch.zeros(limit, limit)
        for di in range(limit):
            attetion_plot[:di, :di] = attention_state[bi][di][0,hi,:di,:di].data

        ax = plots[bi][hi]
        ax.matshow(attetion_plot.numpy(), cmap='bone')

        # Set up axes
        ax.set_xticklabels([''] + axis_text, rotation=90)
        ax.set_yticklabels([''] + axis_text)

        # Show label at every tick
        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

        # Set up a title
        ax.set_title(f'Block {bi + 1} Head {hi + 1}', size=25, pad=30)
        
plt.show()

KeyboardInterrupt: Interrupted by user