Google Colab-en exekutatuz gero, aktibatu GPUaren erabilpena (Entorno de ejecución -> Cambiar tipo de entorno de ejecución -> Acelerador por hardware -> GPU).

Jarri hemen "parametroak.pt" eta "bpe_hirurak.model" gorde dituzun karpetaren helbidea:

In [2]:
helbidea = 'drive/My Drive/Colab Notebooks/HACOSDatuak'

# Google Colab-en exekutatuz gero:

Colab-etik probatzeko, liburutegi batzuk instalatu beharra dago. Exekutatu hurrengo gelaxkak.

In [3]:
!pip install youtokentome
!pip install pyonmttok

Collecting youtokentome
[?25l  Downloading https://files.pythonhosted.org/packages/a3/65/4a86cf99da3f680497ae132329025b291e2fda22327e8da6a9476e51acb1/youtokentome-1.0.6-cp36-cp36m-manylinux2010_x86_64.whl (1.7MB)
[K     |████████████████████████████████| 1.7MB 4.6MB/s 
Installing collected packages: youtokentome
Successfully installed youtokentome-1.0.6
Collecting pyonmttok
[?25l  Downloading https://files.pythonhosted.org/packages/ff/fc/aaa5096a948f2923d5e012409586274956368e00a6a4008412fb2807882d/pyonmttok-1.18.5-cp36-cp36m-manylinux1_x86_64.whl (2.2MB)
[K     |████████████████████████████████| 2.2MB 4.7MB/s 
[?25hInstalling collected packages: pyonmttok
Successfully installed pyonmttok-1.18.5


In [4]:
import os
os.environ['CUDA_HOME'] = '/usr/local/cuda-10.1'

!git clone https://github.com/NVIDIA/apex
%cd apex
!pip install -v --no-cache-dir ./
#!pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
%cd ..

Cloning into 'apex'...
remote: Enumerating objects: 7431, done.[K
remote: Total 7431 (delta 0), reused 0 (delta 0), pack-reused 7431[K
Receiving objects: 100% (7431/7431), 13.90 MiB | 18.85 MiB/s, done.
Resolving deltas: 100% (5024/5024), done.
/content/apex
Created temporary directory: /tmp/pip-ephem-wheel-cache-55_q6ukb
Created temporary directory: /tmp/pip-req-tracker-dfn5bvq6
Created requirements tracker '/tmp/pip-req-tracker-dfn5bvq6'
Created temporary directory: /tmp/pip-install-xdnjejt2
Processing /content/apex
  Created temporary directory: /tmp/pip-req-build-ty41_mn1
  Added file:///content/apex to build tracker '/tmp/pip-req-tracker-dfn5bvq6'
    Running setup.py (path:/tmp/pip-req-build-ty41_mn1/setup.py) egg_info for package from file:///content/apex
    Running command python setup.py egg_info


    torch.__version__  = 1.6.0+cu101


    running egg_info
    creating /tmp/pip-req-build-ty41_mn1/pip-egg-info/apex.egg-info
    writing /tmp/pip-req-build-ty41_mn1/pip-egg-in

# Exekutatu hauek guztiak

In [5]:
import copy
import math
import time

from apex import amp
import youtokentome as yttm
import pyonmttok
import torch
import torch.nn as nn
import torch.nn.functional as F

In [6]:
pad = 0
sos = 2
eos = 3

max_seq_len = 120
vocab_size = 20000
d_model = 512    # Hitza adierazteko bektoreen luzera
N = 6            # Geruza kopurua
heads = 8        # Attention-head kopurua

#batch_size = 90
batch_size_val = 50

fp16 = True

In [7]:
class Embedder(nn.Module):
    def __init__(self, vocab_size, d_model):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, d_model)
    def forward(self, x):
        return self.embed(x)

In [8]:
class PositionalEncoder(nn.Module):
    def __init__(self, d_model):
        super().__init__()
        self.d_model = d_model
        # Orain dagoen moduan, max_seq_len aldagai globalak definituta egon
        # behar du
        pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
                
        pe.unsqueeze_(0) # Lista -> [Lista]. in-place egiteak ezer aldatzen du?
        # batch_size dimentsioa gehitzeko
        # Zergatik tentsoreekin bai eta zenbakiekin (d_model) ez?
        self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # Hasierako embedding-ak pisu handiagoa izateko, suposatzen da:
        x = x * math.sqrt(self.d_model)
        seq_len = x.size(1)
        # Bi adibideetan hemen Variable torch.tensor-en ordez:
        # x = x + torch.tensor(self.pe[:,:seq_len], requires_grad=False).cuda()
        #print("Size x: " + str(x.size()))
        #print("Size pe: " + str(self.pe.size()))
        x = x + self.pe[:,:seq_len] # <- Hau bakarrik eginda zer aldatzen da?
        # Bai beste adibidean eta honen GitHub-eko kodean: 
        # return self.dropout(x)
        return x

In [9]:
class MultiHeadAttention(nn.Module):
    def __init__(self, heads, d_model, dropout = 0.1):
        super().__init__()
        
        self.d_model = d_model
        self.d_k = d_model // heads
        # Suposatzen da tamainak ez duela zertan hori izan behar, baina
        # multi-head izateagatik kalkulu gehiago egitea saihesten du
        self.h = heads
        
        # Attention head guztiak batera:
        self.q_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout)     
        self.out = nn.Linear(d_model, d_model)  # Artikuluko WO matrizea
    
    def forward(self, q, k, v, mask=None):
        
        # Encoder-etan, q, k eta v-tik gauza bera iritsiko zaio, 
        # sarrera embedding-arekin (edo aurreko geruzako irteera). 
        # Decoder-etan, 1.an, irteerako aurrekoak hiruretatik (edo aurreko 
        # geruzakoa).
        # 2.ean, q-tik 1.aren emaitza, eta k eta v-tik encoder-aren irteera.
        
        bs = q.size(0)
        
        # Buru guztiak elkartuta k, q eta v lortu, eta gero buruak banatu
        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
        
        # sl = sentence length
        
        # transpose to get dimensions bs * h * sl * d_k
        # Artikuluan d_k beharrean d_model jartzen du
       
        k = k.transpose(1,2)
        q = q.transpose(1,2)
        v = v.transpose(1,2)

        scores = attention(q, k, v, self.d_k, mask, self.dropout)
        # Dimentsioak: bs*h*sl*d_k
        
        # concatenate heads and put through final linear layer
        concat = scores.transpose(1,2).contiguous().view(bs, -1, self.d_model)
        # Uste dut reshape() egitea edo contiguous() + view() berdina dela
        
        # Dimentsioak = bs*sl*d_model (d_model = d_k*heads delako, 
        # bestela handiagoa izan zitekeen)
        
        output = self.out(concat)
        
        # Dimentsioak = bs*sl*d_model
    
        return output

In [10]:
def attention(q, k, v, d_k, mask=None, dropout=None):
    
    scores = torch.matmul(q, k.transpose(-2, -1)) /  math.sqrt(d_k)
    # Lehenengo 2 dimentsioak, bs eta h, independente mantentzen dira
    # Beste 2ekin, sl eta d_k, matrize-biderketa normala, 
    # batch eta head bakoitzean. Emaitza: bs*h*sl*sl, hitz bakoitzeko esaldiko 
    # beste hitz bakoitzari eman beharreko "garrantzia".
    
    if mask is not None:
        mask = mask.unsqueeze(1) # Head-i dagokion dimentsioa sortzeko
        #print("Maskara " + str(mask.size()))
        #print("Scores " + str(scores.size()))
        #print(mask)
        scores = scores.masked_fill(mask == 0, -1e4)
        #print("Scores: ")
        #print(scores)
        # Maskaran dagokion balioa 0 denean, mask == 0 True (1), 
        # -1e9 jartzen du, -1 000 000 000 (-inf 'illustrated' artikuluan)
        # Zergatik? Nonbaitetik etor daitezke negatiboak?
    scores = F.softmax(scores, dim=-1)
    # softmax azkeneko dimentsioan zehar
    
    #print("Scores softmax: ")
    #print(scores)
    
    if dropout is not None:
        scores = dropout(scores)
        
    output = torch.matmul(scores, v)
    # Dimentsioak: bs*h*sl*d_k
    
    #print("Output: ")
    #print(output)
    
    return output

In [11]:
# Posizioka exekutatuko da. Posizio desberdinek parametroak partekatuta,
# baina geruza desberdinek ez
class FeedForward(nn.Module):
    def __init__(self, d_model, d_ff=2048, dropout = 0.1):
        super().__init__() 
        # d_ff = ezkutuko geruzaren tamaina, originalean 2048
        self.linear_1 = nn.Linear(d_model, d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(d_ff, d_model)
    def forward(self, x):
        x = self.dropout(F.relu(self.linear_1(x)))
        x = self.linear_2(x)
        return x

In [12]:
class Norm(nn.Module):
    def __init__(self, d_model, eps = 1e-6):
        super().__init__()
    
        self.size = d_model # forward-en ez da erabiltzen. Zertarako gorde?
        # Normalizazioa doitzeko parametroak. Zergatik?
        self.alpha = nn.Parameter(torch.ones(self.size))
        self.bias = nn.Parameter(torch.zeros(self.size))
        self.eps = eps
    def forward(self, x):
        mean = x.mean(dim=-1, keepdim=True) # dimentsioak = bs*sl*1
        std = x.std(dim=-1, keepdim=True)
        norm = self.alpha * (x - mean) / (std + self.eps) + self.bias
        return norm

In [13]:
def get_clones(module, N):
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

In [14]:
class EncoderLayer(nn.Module):
    def __init__(self, d_model, heads, dropout = 0.1):
        super().__init__()
        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)
        self.attn = MultiHeadAttention(heads, d_model)
        self.ff = FeedForward(d_model)
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        
    def forward(self, x, mask):
        # Zergatik normalizazioak hor eta ez artikuluan bezala?
        x2 = self.norm_1(x)
        x = x + self.dropout_1(self.attn(x2,x2,x2,mask))
        x2 = self.norm_2(x)
        x = x + self.dropout_2(self.ff(x2))
        return x

In [15]:
class DualDecoderLayer(nn.Module):
    def __init__(self, d_model, heads, dropout=0.1):
        super().__init__()
        self.norm_1 = Norm(d_model)
        self.norm_2 = Norm(d_model)
        self.norm_3 = Norm(d_model)
        self.norm_4 = Norm(d_model)
        
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        self.dropout_3 = nn.Dropout(dropout)
        self.dropout_4 = nn.Dropout(dropout)
        
        self.attn_1 = MultiHeadAttention(heads, d_model)
        self.attn_2 = MultiHeadAttention(heads, d_model)
        self.attn_3 = MultiHeadAttention(heads, d_model)
        self.ff = FeedForward(d_model)
  
    def forward(self, x, e1_outputs, e2_outputs, src1_mask, src2_mask, trg_mask):
          # e1_outputs: SRC encoder-aren irteeera
          # e2_outputs: MT encoder-aren irteera
          # src1_mask: SRCko esaldien maskara
          # src2_mask: MTko esaldien maskara
          x2 = self.norm_1(x)
          x = x + self.dropout_1(self.attn_1(x2, x2, x2, trg_mask))
          x2 = self.norm_2(x)
          x = x + self.dropout_2(self.attn_2(x2, e2_outputs, e2_outputs,
          src2_mask))
          x2 = self.norm_3(x)
          x = x + self.dropout_3(self.attn_3(x2, e1_outputs, e1_outputs,
          src1_mask))
          x2 = self.norm_4(x)
          x = x + self.dropout_4(self.ff(x2))
          return x

In [16]:
class SharedEncoder(nn.Module):
    def __init__(self, vocab_size, d_model, N, heads):
        super().__init__()
        self.N = N
        self.pe = PositionalEncoder(d_model) # Ez dio max_seq pasatzen
        self.layers = get_clones(EncoderLayer(d_model, heads), N)
        self.norm = Norm(d_model)
    def forward(self, src, mask):
        # src Embedder-etik pasata jaso behar du
        x = self.pe(src)
        for i in range(N):
            x = self.layers[i](x, mask)
        return self.norm(x)
      
class SharedDualDecoder(nn.Module):
    def __init__(self, vocab_size, d_model, N, heads):
        super().__init__()
        self.N = N
        self.pe = PositionalEncoder(d_model)
        self.layers = get_clones(DualDecoderLayer(d_model, heads), N)
        self.norm = Norm(d_model)
    def forward(self, trg, e1_outputs, e2_outputs, src1_mask, src2_mask, trg_mask):
        # trg Embedder-etik pasata jaso behar du
        x = self.pe(trg)
        for i in range(self.N):
            #print('2a:', i, torch.cuda.memory_allocated(0))
            x = self.layers[i](x, e1_outputs, e2_outputs, src1_mask, src2_mask, trg_mask)
        return self.norm(x)

In [17]:
class Transformer(nn.Module):
    def __init__(self, src_vocab, trg_vocab, d_model, N, heads):
        super().__init__()
        self.encoder = Encoder(src_vocab, d_model, N, heads)
        self.decoder = Decoder(trg_vocab, d_model, N, heads)
        self.out = nn.Linear(d_model, trg_vocab)
    def forward(self, src, trg, src_mask, trg_mask):
        e_outputs = self.encoder(src, src_mask)
        d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
        output = self.out(d_output)
        return output

In [18]:
class SharedDualTransformer(nn.Module):
    def __init__(self, vocab_size, d_model, N, heads):
        super().__init__()
        self.embed = Embedder(vocab_size, d_model)       
        self.encoder = SharedEncoder(vocab_size, d_model, N, heads)
        self.decoder = SharedDualDecoder(vocab_size, d_model, N, heads)
        self.out = nn.Linear(d_model, vocab_size)
    def forward(self, src1, src2, trg, src1_mask, src2_mask, trg_mask):
        emb = self.embed(src1)
        e1_outputs = self.encoder(emb, src1_mask)
        emb = self.embed(src2)
        e2_outputs = self.encoder(emb, src2_mask)
        emb = self.embed(trg)
        d_output = self.decoder(emb, e1_outputs, e2_outputs, src1_mask, src2_mask, trg_mask)
        output = self.out(d_output)
        return output

In [19]:
tokenizer = pyonmttok.Tokenizer("conservative", joiner_annotate=True, case_markup=True, soft_case_regions=True)

destokenizer = pyonmttok.Tokenizer("conservative", joiner_annotate=True, 
                                    case_markup=True, soft_case_regions=True)

def tokenizatu_str(lerroa):
    tokens, _ = tokenizer.tokenize(lerroa)
    lerroa_tok = ' '.join(tokens)
    lerroa_tok = lerroa_tok.replace('｟mrk_case_modifier_C｠', '｟C')
    lerroa_tok = lerroa_tok.replace('｟mrk_begin_case_region_U｠', '｟B')
    lerroa_tok = lerroa_tok.replace('｟mrk_end_case_region_U｠', '｟E')
    return lerroa_tok

In [20]:
def ordenatzeko(elem):
    return len(elem[0])

def sartu_padding(esaldia, luzera):
    return esaldia + (luzera-len(esaldia))*[pad]

In [69]:
def itzuli_beam(model, src, k, b_s):
    
    hasi = time.time()
    
    model.eval()

    with torch.no_grad():
   
        src = [(esaldia[0], esaldia[1], i) for i, esaldia in enumerate(src)]
        src.sort(key=ordenatzeko)
        idazteko = len(src) * ["\n"]
        
        # encoder-erako eta decoder-eko 1. posiziorako tamaina:
        batch_size_enc = b_s // k
        
        #for j in range(0, len(src), batch_size_enc):
        berriro_saiatu = False
        j = 0
        while j < len(src):
            
            print(torch.cuda.memory_allocated(0))

            if berriro_saiatu and batch_size_enc > 1:
                batch_size_enc //= 2
                #if batch_size_enc == 0:
                #    print("Ezin da jarraitu.")
                #    idatzi(idazteko, pe_fitx)
                #    return
            
            print("{}. esalditik aurrera itzultzen...".format(j+1))

            batch = src[j : j+batch_size_enc]
            luzeena = len(batch[-1][0])
            src1 = [sartu_padding(esaldia[0], luzeena) for esaldia in batch]
            luzeena = max([len(esaldia[1]) for esaldia in batch])
            src2 = [sartu_padding(esaldia[1], luzeena) for esaldia in batch]
            src1 = torch.LongTensor(src1)
            src2 = torch.LongTensor(src2)

            src1_mask = (src1 != pad).unsqueeze(-2).cuda()
            src2_mask = (src2 != pad).unsqueeze(-2).cuda()
            emb1 = model.embed(src1.cuda())
            e1_outputs = model.encoder(emb1, src1_mask)
            emb2 = model.embed(src2.cuda())
            e2_outputs = model.encoder(emb2, src2_mask)

            # Lehenengo hitzerako aukerak lortu:

            trg_mask = torch.tril(torch.ones(1, 1, 1, dtype=torch.uint8)).cuda()

            hasierakoa = torch.full(
                (len(src1), 1), sos, dtype=src1.dtype).cuda()
            embed = model.embed(hasierakoa)
            out = model.out(model.decoder(embed, 
                e1_outputs, e2_outputs, src1_mask, src2_mask, trg_mask))
            out = F.softmax(out, dim=-1)
            # Lerro berria:
            out = out.cpu().float()

            val, ix = out[:, -1].topk(k)

            # Hurrengo hitzetarako begizta prestatu:

            batch_size_dec = k*len(src1)
            outputs = torch.zeros(
                batch_size_dec, max_seq_len, dtype=src1.dtype)#.cuda()
            outputs[:, 0] = torch.LongTensor([sos])
            outputs[:, 1] = ix.flatten()
            beam_prob = val.flatten().log()#.float()

            src1_mask = src1_mask.repeat_interleave(k, dim=0)
            src2_mask = src2_mask.repeat_interleave(k, dim=0)
            e1_outputs = e1_outputs.repeat_interleave(k, dim=0)
            e2_outputs = e2_outputs.repeat_interleave(k, dim=0)

            bukatu_du = batch_size_dec*[False]
            
            berriro_saiatu = False

            # Hurrengo hitzetarako begizta:

            for i in range(2, max_seq_len):

                if torch.cuda.memory_allocated(0) > 12_000_000_000 and \
                    batch_size_enc > 1:
                    print("Memoria betetzen ari da.")
                    berriro_saiatu = True
                    break

                trg_mask = torch.tril(torch.ones(1, i, i, dtype=torch.uint8)).cuda()
                embed = model.embed(outputs[:, :i].cuda())
                out = model.decoder(embed, 
                    e1_outputs, e2_outputs, src1_mask, src2_mask, trg_mask)
                out = model.out(out)
                out = out[:, -1]
                out = F.softmax(out, dim=-1)
                out = out.log()
                out = out.cpu()
                out = out.float()

                outputs_berria = torch.zeros_like(outputs)
                bukatu_du_berria = batch_size_dec*[False]

                for batch_ix in range(0, batch_size_dec, k):
                    aukera_guztiak = torch.tensor([])
                    for k_ix in range(k):
                        if not bukatu_du[batch_ix+k_ix]:
                            berriak = beam_prob[batch_ix+k_ix]+ \
                                out[batch_ix+k_ix]
                            aukera_guztiak = torch.cat((aukera_guztiak, berriak))
                        else:
                            # Aukera hau <eos>era iritsi bada, ez dugu zabaldu nahi,
                            # baina aukera bezala utzi nahi dugu. Beraz, behin
                            # sartzen da, eta gainontzeko tokiak -inf-ekin betetzen
                            # dira.
                            berriak = torch.tensor(
                            [beam_prob[batch_ix+k_ix]]+(vocab_size-1)*[-math.inf])
                            aukera_guztiak = torch.cat((aukera_guztiak, berriak))

                    val, ix = aukera_guztiak.topk(k)

                    for k_ix in range(k):
                        # zenbatgarren aukeratik datorren:
                        ix_zahar = ix[k_ix] // vocab_size 
                        # hitz berriaren zenbakia:
                        hitza = ix[k_ix] % vocab_size

                        # bukatu_du eguneratu:
                        if hitza == eos:
                            bukatu_du_berria[batch_ix+k_ix] = True
                        else:
                            bukatu_du_berria[batch_ix+k_ix] = \
                                bukatu_du[batch_ix+ix_zahar]

                        # Aurreko hitzak hartu:
                        outputs_berria[batch_ix+k_ix] = outputs[batch_ix+ix_zahar]

                        # Hitz berria gehitu:
                        outputs_berria[batch_ix+k_ix, i] = hitza
                        # Aukeren probabilitateak eguneratu:
                        beam_prob[batch_ix+k_ix] = val[k_ix]

                outputs = outputs_berria
                bukatu_du = bukatu_du_berria

                # Aukera guztiak bukatu badira, hurrengo batch-era pasa
                if not (False in bukatu_du):
                    break

            if berriro_saiatu:
                continue
                    
            # Aukera onena hartu eta idatzi:
            #for esaldia in outputs:
            for batch_ix in range(0, batch_size_dec, k):
                max_ix = beam_prob[batch_ix : batch_ix+k].argmax()
                onena = outputs[batch_ix + max_ix]
                for pos in range(max_seq_len):
                    if onena[pos] == eos:
                        break
                deskodetuta = bpe_hirurak.decode(onena[1:pos].tolist())[0]
                ordenatuko_ix = j + batch_ix//k
                jatorrizko_ix = src[ordenatuko_ix][2]
                idazteko[jatorrizko_ix] = deskodetuta
                
            j = j + batch_size_enc
        
    print(f"{time.time()-hasi} segundo behar izan ditu.")
    return idazteko

In [22]:
%cd $helbidea

/content/drive/My Drive/Colab Notebooks/HACOSDatuak


In [23]:
bpe_hirurak = yttm.BPE(model='bpe_hirurak.model')

In [24]:
model = SharedDualTransformer(vocab_size, d_model, N, heads)
model.cuda()
model = amp.initialize(model, opt_level='O2')
model.load_state_dict(torch.load('parametroak.pt')['model'])

model.eval()

Selected optimization level O2:  FP16 training with FP32 batchnorm and FP32 master weights.

Defaults for this optimization level are:
enabled                : True
opt_level              : O2
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : True
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O2
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : True
loss_scale             : dynamic


SharedDualTransformer(
  (embed): Embedder(
    (embed): Embedding(20000, 512)
  )
  (encoder): SharedEncoder(
    (pe): PositionalEncoder()
    (layers): ModuleList(
      (0): EncoderLayer(
        (norm_1): Norm()
        (norm_2): Norm()
        (attn): MultiHeadAttention(
          (q_linear): Linear(in_features=512, out_features=512, bias=True)
          (v_linear): Linear(in_features=512, out_features=512, bias=True)
          (k_linear): Linear(in_features=512, out_features=512, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (out): Linear(in_features=512, out_features=512, bias=True)
        )
        (ff): FeedForward(
          (linear_1): Linear(in_features=512, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear_2): Linear(in_features=2048, out_features=512, bias=True)
        )
        (dropout_1): Dropout(p=0.1, inplace=False)
        (dropout_2): Dropout(p=0.1, inplace=False)
      )
      (1): Encod

# Itzultzailea probatzeko

Idatzi hurrengo gelaxkan gaztelaniazko eta ingelesezko testuak. Ondoren, exekutatu azpikoak. Euskarazko itzulpena behean agertuko da.

In [66]:
es = "Esto es un traductor de dos fuentes: pruébalo aquí."
en = "This is a dual source translator: try it here."

In [70]:
itzultzeko = [[es, en]]
itzultzeko_tok = [[tokenizatu_str(jat[0]), tokenizatu_str(jat[1])] 
                  for jat in itzultzeko]

itzultzeko_zenb = [[bpe_hirurak.encode(jat[0]), bpe_hirurak.encode(jat[1])] 
                   for jat in itzultzeko_tok]

itzulia = itzuli_beam(model, itzultzeko_zenb, 4, batch_size_val)

142761472
1. esalditik aurrera itzultzen...
0.2946932315826416 segundo behar izan ditu.


In [71]:
for lerroa in itzulia:
    lerroa = lerroa.replace('｟C', '｟mrk_case_modifier_C｠')
    lerroa = lerroa.replace('｟B', '｟mrk_begin_case_region_U｠')
    lerroa = lerroa.replace('｟E', '｟mrk_end_case_region_U｠')
    print(destokenizer.detokenize(lerroa.split()))

Hau bi iturrien itzultzaile bat da: proba ezazu hemen.
