In [1]:
!pip install youtokentome
!pip install pytelegrambotapi

Collecting youtokentome
  Downloading youtokentome-1.0.6-cp37-cp37m-manylinux2010_x86_64.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 9.2 MB/s 
Installing collected packages: youtokentome
Successfully installed youtokentome-1.0.6
Collecting pytelegrambotapi
  Downloading pyTelegramBotAPI-4.4.0.tar.gz (147 kB)
[K     |████████████████████████████████| 147 kB 7.6 MB/s 
Building wheels for collected packages: pytelegrambotapi
  Building wheel for pytelegrambotapi (setup.py) ... [?25l[?25hdone
  Created wheel for pytelegrambotapi: filename=pyTelegramBotAPI-4.4.0-py3-none-any.whl size=128152 sha256=4aef33818b739f0a54153bb6ff59aa53253dcf517a9f920163d1f210cfced4ed
  Stored in directory: /root/.cache/pip/wheels/1e/6d/ff/435b12799b8212017f08dbbfdb7a1e5174d72d20fb7c0f4703
Successfully built pytelegrambotapi
Installing collected packages: pytelegrambotapi
Successfully installed pytelegrambotapi-4.4.0


In [2]:
!gdown --id 1ziBcswF_zllclO2laZP7KrqFsn-eorGR
!gdown --id 1tUnOlmPWWU1ri31l2QlQM59Hs4xhtJt8
!gdown --id 1hEgmZQetO0W373-MdsRrrq6kYNSfDmhr
!gdown --id 1ehMlyygXlJXWelTmkzUaGWWmVh1Q1bB9
!gdown --id 1OKCeRoefDIEN-PgRyVsBjSJkl97lXU5_

Downloading...
From: https://drive.google.com/uc?id=1ziBcswF_zllclO2laZP7KrqFsn-eorGR
To: /content/last-val-model.pt
100% 58.6M/58.6M [00:00<00:00, 81.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=1tUnOlmPWWU1ri31l2QlQM59Hs4xhtJt8
To: /content/best-val-model.pt
100% 58.6M/58.6M [00:00<00:00, 75.7MB/s]
Downloading...
From: https://drive.google.com/uc?id=1hEgmZQetO0W373-MdsRrrq6kYNSfDmhr
To: /content/trg_vocab.pth
100% 489k/489k [00:00<00:00, 34.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1ehMlyygXlJXWelTmkzUaGWWmVh1Q1bB9
To: /content/src_vocab.pth
100% 442k/442k [00:00<00:00, 99.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1OKCeRoefDIEN-PgRyVsBjSJkl97lXU5_
To: /content/bpe_en.bin
100% 203k/203k [00:00<00:00, 67.5MB/s]


In [3]:
import unicodedata
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torchtext
from torchtext.legacy.data import Field, BucketIterator
import numpy as np
import telebot
from youtokentome import BPE

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

src_vocab = torch.load('src_vocab.pth')
trg_vocab = torch.load('trg_vocab.pth')
bpe_en = BPE('bpe_en.bin')

SRC_PAD = src_vocab.stoi['<pad>']
TRG_PAD = trg_vocab.stoi['<pad>']
INP_DIM = len(src_vocab)
OUT_DIM = len(trg_vocab)
HID_DIM = 256
N_LAYERS = 3
N_HEADS = 8
DROPOUT = 0.1
BATCH_SIZE = 128

class EncoderBlock(nn.Module):
    
    def __init__(self, hid_dim, n_heads, dropout):

        super().__init__()

        self.attn = nn.MultiheadAttention(hid_dim, n_heads, dropout=dropout, batch_first=True)
        self.attn_norm = nn.LayerNorm(hid_dim)
        self.fc1 = nn.Linear(hid_dim, 2 * hid_dim)
        self.fc2 = nn.Linear(2 * hid_dim, hid_dim)
        self.ff_norm = nn.LayerNorm(hid_dim)
        self.dropout = nn.Dropout(dropout)


    def forward(self, src, src_pad_mask, src_attn_mask):

        x, _ = self.attn(src, src, src, key_padding_mask=src_pad_mask, attn_mask=src_attn_mask)
        src = self.attn_norm(src + self.dropout(x))
        fc1 = torch.relu(self.fc1(src))
        fc2 = self.fc2(self.dropout(fc1))
        src = self.ff_norm(src + self.dropout(fc2))
        return src


class Encoder(nn.Module):

    def __init__(self, in_dim, hid_dim, n_heads, n_layers, batch_size, max_len, dropout):

        super().__init__()

        self.emb = nn.Embedding(in_dim, hid_dim)

        position = torch.arange(max_len).unsqueeze(1)
        freq = torch.exp(-torch.arange(hid_dim // 2) * 2 / hid_dim * torch.log(torch.FloatTensor([10000])))
        pos_enc = torch.zeros(max_len, 1, hid_dim)
        pos_enc[:, 0, 0::2] = torch.sin(position * freq)
        pos_enc[:, 0, 1::2] = torch.cos(position * freq)
        self.pos_enc = pos_enc.repeat(1, batch_size, 1).permute(1, 0, 2).contiguous().to(device)

        self.encoder = nn.ModuleList([EncoderBlock(hid_dim, n_heads, dropout) for i in range(n_layers)])

        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)


    def forward(self, src, src_pad_mask, src_attn_mask):

      src = self.dropout(self.emb(src) * self.scale + self.pos_enc[:src.size(0),:src.size(1),:])

      for block in self.encoder:
          src = block(src, src_pad_mask, src_attn_mask)

      return src


class DecoderBlock(nn.Module):

    def __init__(self, hid_dim, n_heads, dropout):

        super().__init__()
    
        self.dec_attn = nn.MultiheadAttention(hid_dim, n_heads, dropout=dropout, batch_first=True)
        self.dec_attn_norm = nn.LayerNorm(hid_dim)
        self.cross_attn = nn.MultiheadAttention(hid_dim, n_heads, dropout=dropout, batch_first=True)
        self.cross_attn_norm = nn.LayerNorm(hid_dim)
        self.fc1 = nn.Linear(hid_dim, 2 * hid_dim)
        self.fc2 = nn.Linear(2 * hid_dim, hid_dim)
        self.ff_norm = nn.LayerNorm(hid_dim)
        self.dropout = nn.Dropout(dropout)


    def forward(self, trg, trg_pad_mask, trg_attn_mask, src, cross_pad_mask, cross_attn_mask):
        
        x, _ = self.dec_attn(trg, trg, trg, key_padding_mask=trg_pad_mask, attn_mask=trg_attn_mask)
        trg = self.dec_attn_norm(self.dropout(x) + trg)
        x, attn = self.cross_attn(trg, src, src, key_padding_mask=cross_pad_mask, attn_mask=cross_attn_mask)
        trg = self.cross_attn_norm(self.dropout(x) + trg)
        fc1 = torch.relu(self.fc1(trg))
        fc2 = self.fc2(self.dropout(fc1))
        trg = self.ff_norm(trg + self.dropout(fc2))
        return trg, attn


class Decoder(nn.Module):

    def __init__(self, out_dim, hid_dim, n_heads, n_layers, batch_size, max_len, dropout):

        super().__init__()

        self.emb = nn.Embedding(out_dim, hid_dim)

        position = torch.arange(max_len).unsqueeze(1)
        freq = torch.exp(-torch.arange(hid_dim // 2) * 2 / hid_dim * torch.log(torch.FloatTensor([10000])))
        pos_enc = torch.zeros(max_len, 1, hid_dim)
        pos_enc[:, 0, 0::2] = torch.sin(position * freq)
        pos_enc[:, 0, 1::2] = torch.cos(position * freq)
        self.pos_enc = pos_enc.repeat(1, batch_size, 1).permute(1, 0, 2).contiguous().to(device)

        self.decoder = nn.ModuleList([DecoderBlock(hid_dim, n_heads, dropout) for i in range(n_layers)])

        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)

        self.out = nn.Linear(hid_dim, out_dim)


    def forward(self, trg, trg_pad_mask, trg_attn_mask, src, cross_pad_mask, cross_attn_mask):

        trg = self.dropout(self.emb(trg) * self.scale + self.pos_enc[:trg.size(0),:trg.size(1),:])

        for block in self.decoder:
            trg, attn = block(trg, trg_pad_mask, trg_attn_mask, src, cross_pad_mask, cross_attn_mask)

        return self.out(trg), attn


class Transformer(nn.Module):

    def __init__(self, in_dim, out_dim, hid_dim, n_heads, n_layers, dropout, batch_size, src_pad, trg_pad, max_len=1000):

        super().__init__()

        self.src_pad = src_pad
        self.trg_pad = trg_pad
        self.encoder = Encoder(in_dim, hid_dim, n_heads, n_layers, batch_size, max_len, dropout)
        self.decoder = Decoder(out_dim, hid_dim, n_heads, n_layers, batch_size, max_len, dropout)
        
    
    def make_pad_mask(self, x, pad_id):

        return (x == pad_id)


    def make_attn_mask(self, x_size, y_size, mode):

        if mode == 'src':
            return torch.ones(x_size, y_size)
        else:
            mask = torch.triu(torch.ones(x_size, y_size), diagonal=1)
            mask[mask.bool()] = -float('inf')
            return mask

    
    def forward(self, src, trg):

        s = src.size(1)
        t = trg.size(1)
        src_pad_mask = self.make_pad_mask(src, self.src_pad).to(device)
        cross_pad_mask = self.make_pad_mask(src, self.src_pad).to(device)
        trg_pad_mask = self.make_pad_mask(trg, self.trg_pad).to(device)
        src_attn_mask = self.make_attn_mask(s, s, 'src').to(device)
        cross_attn_mask = self.make_attn_mask(t, s, 'cross').to(device)
        trg_attn_mask = self.make_attn_mask(t, t, 'trg').to(device)

        enc_src = self.encoder(src, src_pad_mask, src_attn_mask)
        output, attention = self.decoder(trg, trg_pad_mask, trg_attn_mask, enc_src, cross_pad_mask, cross_attn_mask)
        return output, attention


model = Transformer(INP_DIM,
                    OUT_DIM,
                    HID_DIM,
                    N_HEADS,
                    N_LAYERS,
                    DROPOUT,
                    BATCH_SIZE,
                    SRC_PAD,
                    TRG_PAD).to(device)

model.load_state_dict(torch.load('last-val-model.pt'))


def translate_sentence(sentence, src_field, trg_field, model, device, max_len = 50):
    
    model.eval()
        
    if isinstance(sentence, str):
        tokens = [bpe_en.id_to_subword(token) for token in bpe_en.encode(sentence.lower())]
    else:
        tokens = [token.lower() for token in sentence]

    tokens = ['<sos>'] + tokens + ['<eos>']
        
    src_indexes = [src_field.stoi[token] for token in tokens]

    src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)
    
    src_pad_mask = model.make_pad_mask(src_tensor, model.src_pad).to(device)

    src_attn_mask = model.make_attn_mask(src_tensor.size(1), src_tensor.size(1), 'src').to(device)
    
    with torch.no_grad():
        enc_src = model.encoder(src_tensor, src_pad_mask, src_attn_mask)

    trg_indexes = [trg_field.stoi['<sos>']]

    for i in range(max_len):

        trg_tensor = torch.LongTensor(trg_indexes).unsqueeze(0).to(device)

        trg_pad_mask = model.make_pad_mask(trg_tensor, model.trg_pad).to(device)

        trg_attn_mask = model.make_attn_mask(trg_tensor.size(1), trg_tensor.size(1), 'trg').to(device)

        cross_pad_mask = model.make_pad_mask(src_tensor, model.src_pad).to(device)

        cross_attn_mask = model.make_attn_mask(trg_tensor.size(1), src_tensor.size(1), 'cross').to(device)
        
        with torch.no_grad():
            output, attention = model.decoder(trg_tensor, trg_pad_mask, trg_attn_mask, enc_src, cross_pad_mask, cross_attn_mask)
        
        pred_token = output.argmax(2)[:,-1].item()
        
        trg_indexes.append(pred_token)

        if pred_token == trg_field.stoi['<eos>']:
            break
    
    trg_tokens = [trg_field.itos[i] for i in trg_indexes][1:-1]

    result = ''.join(trg_tokens)
    
    return result.replace('▁', ' '), attention

In [4]:
translation, attention = translate_sentence(input(), src_vocab, trg_vocab, model, device)

print(f'TRANSLATION = {translation}')

I have a dog
TRANSLATION =  sɛiɣ aydi-ines.


In [5]:
token = '5252552654:AAFUbQXT3vgB4ipTqQFPeQTgpRFrWoinQuo'
bot = telebot.TeleBot(token)

@bot.message_handler(commands=['start'])
def start_message(message):
    bot.send_message(message.chat.id, 'Введите текст на английском языке для перевода.')

@bot.message_handler(commands=['bleu'])
def report_bleu(message):
    bot.send_message(message.chat.id, 'BLEU = 22.34')

@bot.message_handler(content_types=['text'])
def translate(message):
    translation, attention = translate_sentence(message.text, src_vocab, trg_vocab, model, device)        
    bot.send_message(message.chat.id, ' '.join(translation))

bot.polling(none_stop=True)