In [1]:
%env CUDA_VISIBLE_DEVICES=2

import sys
sys.path.append("/home/isprovilkov/rupo")
import pickle
import rupo.api
import numpy as np
from enum import Enum
from collections import namedtuple
from typing import NamedTuple, Optional, Tuple
import tensorflow as tf
import keras.layers as L
from keras import backend as K
from utils import infer_length, infer_mask
from nltk.translate.bleu_score import corpus_bleu

sess = tf.InteractiveSession()

env: CUDA_VISIBLE_DEVICES=2


Using TensorFlow backend.


In [2]:
#MODEL_ROOT = '/srv/hd7/data/aklyopova/models/model_translator_attn_reversed_amalgama_subtitles_09_03_2019/'
#MODEL_PATH = MODEL_ROOT + 'model_r2_14_03_2019.pkl'
MODEL_PATH = '/home/isprovilkov/Poems/model_translator_attn_reversed_amalgama_subtitles_09_03_2019_550001_iters_fine_tuned_amalgama_300002_iters.pkl'

RUPO_DATA_ROOT = '/home/isprovilkov/Poems/'
RUPO_STRESS_MODEL_PATH   = '/home/isprovilkov/rupo/rupo/data/stress_models/stress_ru.h5'
RUPO_ZALYZNIAK_DICT_PATH = RUPO_DATA_ROOT + 'zaliznyak.txt'
del RUPO_DATA_ROOT

In [3]:
def get_reversed(line):
    return ''.join(reversed(line))

def compute_bleu(model, inp_lines, out_lines, bpe_sep='@@ ', **flags):
    """ Estimates corpora-level BLEU score of model's translations
        given inp and reference out """
    translations, _ = model.translate_lines(inp_lines, **flags)
    # Note: if you experience out-of-memory error,
    # split input lines into batches and translate separately
    return corpus_bleu([[ref] for ref in out_lines], translations) * 100

def compute_bleu_large(model, inp_lines, out_lines):
    batch_size = 256
    result = 0.0
    for i in range(0, inp_lines.shape[0], batch_size):
        current_bleu = compute_bleu(model,
                                    inp_lines[i : i + batch_size],
                                    out_lines[i : i + batch_size])
        current_bleu *= min(i + batch_size, inp_lines.shape[0]) - i
        result += current_bleu
    result /= inp_lines.shape[0]
    return result

In [4]:
class LetterType(Enum):
    NOT_LETTER = 0
    VOWEL = 1
    CONSONANT = 2
    SIGN = 3

class RuAlphabetInfo(object):
    
    _ru_letters = set(''.join([chr(n) for n in range(ord('а'), ord('я') + 1)]) + 'ё')
    assert len(_ru_letters) == 33
    _ru_vowels = set('аеёийоуыэюя')
    _ru_consonants = set('бвгджзклмнпрстфхцчшщ')
    _ru_signs = set('ъь')

    assert _ru_vowels & _ru_consonants == set()
    assert _ru_vowels | _ru_consonants | _ru_signs == _ru_letters
    
    @staticmethod
    def get_ru_letter_type(ch: str) -> LetterType:

        if ch in RuAlphabetInfo._ru_vowels:
            return LetterType.VOWEL
        elif ch in RuAlphabetInfo._ru_consonants:
            return LetterType.CONSONANT
        elif ch in RuAlphabetInfo._ru_signs:
            return LetterType.SIGN
        else:
            return LetterType.NOT_LETTER
    
    @staticmethod
    def lower_and_strip_left_non_letters(line: str) -> str:
        
        line = line.lower()
        
        # Skip everything before first letter:
        while len(line) > 0 and line[0] not in RuAlphabetInfo._ru_letters:
            line = line[1:]
        
        return line

In [5]:
#class RhymeInfo(NamedTuple):
#    text: str
#    finished: bool

# Old-style for Python 3.5:
RhymeInfo = NamedTuple('RhymeInfo',
                       [('text', str), # text that will be used for testing whether rhyme is present
                        ('finished', bool) # is text complete
                       ])
        
class IRhymeTester(object):

    def extract_rhyme_info(self, line: str) -> RhymeInfo:
        raise Exception('Not implemented')
        
    def is_rhyme(self, info1: RhymeInfo, info2: RhymeInfo) -> Optional[bool]:
        # Returns True - rhyme present, False - absent, None - don't know
        raise Exception('Not implemented')

In [6]:
class RuReversedSuffixRhymeTester(IRhymeTester):
    
    def extract_rhyme_info(self, line: str) -> RhymeInfo:
        # input: line is REVERSED string
        # We iterate all vowels from the beginning of line until first consonant
        # Or all consonants until first vowel
        # SIGNs are ignored but inserted to output

        line = RuAlphabetInfo.lower_and_strip_left_non_letters(line)
            
        finished = False
        prefix_len = 0
        prev_ch_type = None

        for ch in line:
            
            ch_type = RuAlphabetInfo.get_ru_letter_type(ch)
            
            if ch_type == LetterType.NOT_LETTER:
                finished = True
                break
            
            prefix_len += 1

            if ch_type != LetterType.SIGN:
                if prev_ch_type is None:
                    prev_ch_type = ch_type
                elif prev_ch_type != ch_type:
                    finished = True
                    break

        return RhymeInfo(text = line[:prefix_len],
                         finished = finished)
    
    
    def is_rhyme(self, info1: RhymeInfo, info2: RhymeInfo) -> Optional[bool]:
        
        if not info1.finished or not info2.finished:
            l = min(len(info1.text), len(info2.text))
            if info1.text[:l] != info2.text[:l]:
                return False
            return None
        
        return info1.text == info2.text

    
def test():
    
    tester = RuReversedSuffixRhymeTester()
    for line, suffix, finished in [('пижама', 'ма', True),
                                   ('обученный', 'ный', True),
                                   ('Ихтиандр', 'андр', True),
                                   ('КНДР', 'кндр', False),
                                   ('махать', 'ать', True)]:
        for l in (line, line + '!', ' ' + line + ', '):
            info = tester.extract_rhyme_info(get_reversed(l))
            assert info.text == get_reversed(suffix)
            assert info.finished == (finished or l[0] == ' ')
            assert tester.is_rhyme(info, info) == (True if info.finished else None)

test()

In [7]:
def make_rupo_engine():
    rupo_engine = rupo.api.Engine(language = 'ru')
    rupo_engine.load(stress_model_path = RUPO_STRESS_MODEL_PATH,
                     zalyzniak_dict = RUPO_ZALYZNIAK_DICT_PATH)
    return rupo_engine

global_rupo_engine = make_rupo_engine()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.




In [8]:
class RuReversedWordRhymeTester(IRhymeTester):
    
    _WORD_INTERNAL_CHARS = set('-.')
    
    
    def __init__(self, rupo_engine = None):
        
        if rupo_engine is None:
            self._rupo_engine = rupo.api.Engine(language = 'ru')
            self._rupo_engine.load(stress_model_path = RUPO_STRESS_MODEL_PATH,
                                   zalyzniak_dict = RUPO_ZALYZNIAK_DICT_PATH)
        else:
            # Assume loaded engine:
            assert rupo_engine.language == 'ru'
            self._rupo_engine = rupo_engine
    
    
    def extract_rhyme_info(self, line: str) -> RhymeInfo:
        # line is REVERSED string
        # Here we skip everything until first letter
        # return: it TRIES to return complete word (returns part in case if line contains only part)
        
        
        line = RuAlphabetInfo.lower_and_strip_left_non_letters(line)
        
        is_internal_char = lambda ch: ch in RuReversedWordRhymeTester._WORD_INTERNAL_CHARS
            
        finished = False
        word_len = 0
        
        for ch in line:
            
            is_word_char = is_internal_char(ch) or \
                           RuAlphabetInfo.get_ru_letter_type(ch) != LetterType.NOT_LETTER
            
            if not is_word_char:
                finished = True
                break
            
            word_len += 1
        
        while word_len > 0 and is_internal_char(line[word_len - 1]):
            word_len -= 1

        return RhymeInfo(text = get_reversed(line[:word_len]),
                         finished = finished)
    
    
    def is_rhyme(self, info1: RhymeInfo, info2: RhymeInfo) -> Optional[bool]:
        
        if not info1.finished or not info2.finished:
            return None
        
        return self._rupo_engine.is_rhyme(info1.text, info2.text)

    
def test():
    
    tester = RuReversedWordRhymeTester(global_rupo_engine)
    augment_line = lambda l: (l, l + '!', ' ' + l + ', ', 'Мы и ' + l)
    
    for line, word, finished in [('серая корова', 'корова', True),
                                 ('молока, много', 'много', True),
                                 ('Ихтиандр', 'ихтиандр', False),
                                 ('КНДР', 'кндр', False),
                                 ('аб-вг', 'аб-вг', False),
                                 ('.-аб.вг.-', 'аб.вг', False)]:
        for l in augment_line(line):
            info = tester.extract_rhyme_info(get_reversed(l))
            assert info.text == word
            assert info.finished == (finished or not l.startswith(line))
            any_vowels = any([RuAlphabetInfo.get_ru_letter_type(ch) == LetterType.VOWEL for ch in word])
            assert tester.is_rhyme(info, info) == (any_vowels if info.finished else None)
    
    for line1, line2, is_rhyme in [('серая корова', 'не очень здорова', True),
                                   ('молока много', 'не мало', False),
                                   ('и играть', 'и скакать', False),
                                   ('и играть', 'не играть', True)]:
        for l1 in augment_line(line1):
            for l2 in augment_line(line2):
                info1 = tester.extract_rhyme_info(get_reversed(l1))
                info2 = tester.extract_rhyme_info(get_reversed(l2))
                assert tester.is_rhyme(info1, info2) == is_rhyme

test()

In [9]:
RHYME_DEBUG_PRINT = False # 1, 2 or 3 for more debug info

class RhymeType(Enum):
    SUFFIX = 1
    WORD = 2

    
class ITranslationModel(object):
    # Our model which we trained now is accessed via this interface

    def make_initial_state(self, lines):
        # one state per line (though format is model-depending)
        raise Exception('Not implemented')
    
    def get_next_state_and_logits(self, state, outputs):
        raise Exception('Not implemented')
    
    def get_output_vocabulary(self):
        raise Exception('Not implemented')
        

class RhymeTranslator(object):
    
    def __init__(self, model: ITranslationModel, rupo_engine = None):
        
        self._model = model
        self._out_voc = model.get_output_vocabulary()
        
        self._suffix_rhyme_tester = RuReversedSuffixRhymeTester()
        self._word_rhyme_tester = RuReversedWordRhymeTester(rupo_engine = rupo_engine)
    
    
    def _lines_to_model_lines(self, lines):
        return list(map(get_reversed, lines))
    
    def _merge_tokens(self, line):
        return line.replace('@@ ', '')
    
    def _model_tokens_to_model_line(self, output, eos_as_space):
        [line] = self._out_voc.to_lines([output])
        if eos_as_space and output[-1] == self._out_voc.eos_ix:
            line += ' '
        return self._merge_tokens(line)
    
    def _model_tokens_to_lines(self, outputs):
        lines = self._out_voc.to_lines(outputs)
        return [get_reversed(self._merge_tokens(l)) for l in lines]
    
    def _apply_softmax(self, logits, temperature):
        
        if temperature != 1:
            if temperature < 1:
                # Convert to 64-bit float to avoid overflows:
                # Note: There will still be overflows for T < 0.03
                logits = logits.astype(np.float64)
            
            logits /= temperature

        np.exp(logits, out = logits)
        logits /= logits.sum(axis = -1)[..., np.newaxis]
        return logits
    
    
    def translate_lines(self,
                        lines,
                        sample_temperature = 0, 
                        max_len = 100):
        
        model_lines = self._lines_to_model_lines(lines)
        state = self._model.make_initial_state(model_lines)
        
        outputs = np.empty((len(lines), max_len + 1), dtype = np.int64)
        outputs[:, 0] = self._out_voc.bos_ix
        finished = np.zeros((len(lines),), dtype = bool)

        for t in range(max_len):
            
            state, logits = self._model.get_next_state_and_logits(state, outputs[:, :t + 1])
            
            if sample_temperature:
                # Sample from softmax with temperature:
                logits = self._apply_softmax(logits, sample_temperature)
                next_tokens = np.array([np.random.choice(len(probs), p = probs) for probs in logits])
            else:
                next_tokens = np.argmax(logits, axis = -1)
            
            outputs[:, t + 1] = next_tokens
            finished |= next_tokens == self._out_voc.eos_ix
            
            if finished.sum() == len(lines):
                break # Early exis if all lines finished
                
        return self._model_tokens_to_lines(outputs)
    
    
    
    def _translate_lines_in_rhyme(self,
                                  lines,
                                  rhyme_tester,
                                  sample_temperature,
                                  max_len,
                                  rhyme_test_counts,
                                  max_total_rhyme_tests):
        # rhyme_test_counts = (2, 3, 4) means that first token is variated 2 variants
        # second is variated 3 variants and third is variated 4 variants
        
        model_lines = self._lines_to_model_lines(lines)
        initial_state = self._model.make_initial_state(model_lines)
        initial_states = [[data[i: i+1] for data in initial_state] for i in range(len(lines))]
        
        outputs = np.empty((len(lines), max_len + 1), dtype = np.int64)
        outputs[:, 0] = self._out_voc.bos_ix
        finished = np.zeros((len(lines),), dtype = bool)
        
        # Rhyme states:
        # * True - rhyme found,
        # * False - there is no rhyme,
        # * None - not sure yet
        rhyme_state = None
        
        GenState = namedtuple('GenState', ['state', 'toks', 'prob', 'next_states'])
        
        gen_states = [GenState(state,
                               toks = [self._out_voc.bos_ix],
                               prob = 1,
                               next_states = [None] * rhyme_test_counts[0]) for state in initial_states]
        
        def fill_next_states(gen_state, t):
            
            last_state = t == len(rhyme_test_counts)
            if last_state:
                assert gen_state.next_states is None
                line_last_gen_states.append(gen_state)
                return
                
            test_count = rhyme_test_counts[t]
            assert test_count == len(gen_state.next_states)
            
            state, logits = self._model.get_next_state_and_logits(gen_state.state, [gen_state.toks])
            [probs] = self._apply_softmax(logits, temperature = 1) # TODO set temperature
            
            best_line_tokens = np.argpartition(probs, kth = -test_count, axis = -1)[-test_count:]
            best_line_token_probs = probs[best_line_tokens]
            
            for i in range(test_count):
                next_gen_state = GenState(state,
                                          toks = gen_state.toks + [best_line_tokens[i]],
                                          prob = gen_state.prob * best_line_token_probs[i],
                                          next_states = [None] * rhyme_test_counts[t + 1]
                                                        if t + 1 < len(rhyme_test_counts)
                                                        else None)
                gen_state.next_states[i] = next_gen_state
                fill_next_states(next_gen_state, t + 1)
            
        if RHYME_DEBUG_PRINT >= 2:
            print('*** DEBUG: Generating {} x {} states... ***'.format(len(lines), rhyme_test_counts)) # DEBUG
        last_gen_states = []
        for gen_state in gen_states:
            line_last_gen_states = []
            fill_next_states(gen_state, t = 0)
            last_gen_states.append(line_last_gen_states)
            
        # by this moment we have tree-structure (stored in last_gen_states) for each line
            
        assert [len(line_last_gen_states) == np.prod(rhyme_test_counts) for line_last_gen_states in last_gen_states]
        
        if RHYME_DEBUG_PRINT >= 3:
            for i, line_last_gen_states in enumerate(last_gen_states):
                print('*** DEBUG: Line {} suffixes: ***'.format(i + 1)) # DEBUG
                for line_last_gen_state in line_last_gen_states:
                    suffix = get_reversed(self._model_tokens_to_model_line(line_last_gen_state.toks,
                                                                           eos_as_space = True))
                    print('*** DEBUG:  line {}: "{}" ***'.format(i + 1, suffix)) # DEBUG
        
        if RHYME_DEBUG_PRINT >= 2:
            print('*** DEBUG: Generating state pairs... ***') # DEBUG
        assert len(lines) == 2
        last_gen_state_pairs = []
        for line_1_last_gen_state in last_gen_states[0]:
            for line_2_last_gen_state in last_gen_states[1]:
                
                pair = (line_1_last_gen_state.prob * line_2_last_gen_state.prob,
                        line_1_last_gen_state,
                        line_2_last_gen_state)
                last_gen_state_pairs.append(pair)
        # pair is actually a triple: prob, state1, state2
                
        last_gen_state_pairs.sort(key = lambda t: t[0], reverse = True)
        if max_total_rhyme_tests:
            last_gen_state_pairs = last_gen_state_pairs[:max_total_rhyme_tests]
        
        if RHYME_DEBUG_PRINT >= 2:
            print('*** DEBUG: Testing state pairs... ***') # DEBUG
        for _, line_1_last_gen_state, line_2_last_gen_state in last_gen_state_pairs:
            
            state = [np.concatenate((a, b), axis = 0)
                     for a, b in zip(line_1_last_gen_state.state, line_2_last_gen_state.state)]
            
            outputs[0, :len(rhyme_test_counts) + 1] = line_1_last_gen_state.toks
            outputs[1, :len(rhyme_test_counts) + 1] = line_2_last_gen_state.toks
            
            rhyme_state = None
            
            def update_rhyme_state(t):
                # it will say whether we have a rhyme currently
                nonlocal rhyme_state
                
                line_1 = self._model_tokens_to_model_line(outputs[0, :t + 1],
                                                          eos_as_space = True)
                line_2 = self._model_tokens_to_model_line(outputs[1, :t + 1],
                                                          eos_as_space = True)
                info_1 = rhyme_tester.extract_rhyme_info(line_1)
                info_2 = rhyme_tester.extract_rhyme_info(line_2)
                rhyme_state = rhyme_tester.is_rhyme(info_1, info_2)
            
            update_rhyme_state(len(rhyme_test_counts))
            if rhyme_state == False:
                continue
                
            finished.fill(False)

            # And now the same generation function like in our model but with checking rhymes
            for t in range(len(rhyme_test_counts), max_len):

                state, logits = self._model.get_next_state_and_logits(state, outputs[:, :t + 1])

                if sample_temperature:
                    # Sample from softmax with temperature:
                    logits = self._apply_softmax(logits, sample_temperature)
                    next_tokens = np.array([np.random.choice(len(probs), p = probs) for probs in logits])
                else:
                    next_tokens = np.argmax(logits, axis = -1)

                outputs[:, t + 1] = next_tokens
                finished |= next_tokens == self._out_voc.eos_ix
                
                if rhyme_state is None:
                    update_rhyme_state(t)
                    if rhyme_state == False:
                        break

                if finished.sum() == len(lines):
                    break # Early exis if all lines finished

            if rhyme_state != True:
                continue
            
            if RHYME_DEBUG_PRINT:
                print('*** DEBUG: Rhyme found! ***') # DEBUG
            return self._model_tokens_to_lines(outputs)
    
        if RHYME_DEBUG_PRINT:
            print('*** DEBUG: Failed to find rhyme. ***') # DEBUG
        return self.translate_lines(lines,
                                    sample_temperature,
                                    max_len)
    
    
    def translate_lines_with_rhyme(self,
                                   lines,
                                   rhyme_type = RhymeType.WORD,
                                   sample_temperature = 0,
                                   max_len = 100,
                                   rhyme_test_counts = (10, 10),
                                   max_total_rhyme_tests = 1000):
        
        if rhyme_type == RhymeType.SUFFIX:
            rhyme_tester = self._suffix_rhyme_tester
        elif rhyme_type == RhymeType.WORD:
            rhyme_tester = self._word_rhyme_tester
        else:
            assert False
        
        translated = []
        for pair_idx in range(len(lines) // 2):
            
            pair_lines = lines[pair_idx * 2 : (pair_idx + 1) * 2]
            
            translated += self._translate_lines_in_rhyme(pair_lines,
                                                         rhyme_tester,
                                                         sample_temperature,
                                                         max_len,
                                                         rhyme_test_counts,
                                                         max_total_rhyme_tests)
        
        if len(lines) % 2 == 1:
            translated.append(self.translate_lines(lines[-1:])[0])
            
        return translated

In [10]:
class AttentionLayer:
    
    def __init__(self, name, hid_size, activ=tf.tanh,):
        """ A layer that computes additive attention response and weights """
        self.name = name
        self.hid_size = hid_size # attention layer hidden units
        self.activ = activ       # attention layer hidden nonlinearity

        with tf.variable_scope(name):
            # YOUR CODE - create layer variables
            #<YOUR CODE>
            self.linear_e = L.Dense(hid_size)
            self.linear_d = L.Dense(hid_size)
            self.linear_out = L.Dense(1)

    def __call__(self, enc, dec, inp_mask):
        """
        Computes attention response and weights
        :param enc: encoder activation sequence, float32[batch_size, ninp, enc_size]
        :param dec: single decoder state used as "query", float32[batch_size, dec_size]
        :param inp_mask: mask on enc activatons (0 after first eos), float32 [batch_size, ninp]
        :returns: attn[batch_size, enc_size], probs[batch_size, ninp]
            - attn - attention response vector (weighted sum of enc)
            - probs - attention weights after softmax
        """
        with tf.variable_scope(self.name):
            
            # Compute logits
            #<...>
            logits_seq = self.linear_out(self.activ(self.linear_e(enc) + \
                                                    self.linear_d(dec)[:, tf.newaxis, :]))
            logits_seq = tf.squeeze(logits_seq, axis = -1)
            
            # Apply mask - if mask is 0, logits should be -inf or -1e9
            # You may need tf.where
            #<...>
            
            logits_seq = tf.where(inp_mask, logits_seq, tf.fill(tf.shape(logits_seq),
                                                                -np.inf))
            
            # Compute attention probabilities (softmax)
            probs = tf.nn.softmax(logits_seq) # <...>
            
            # Compute attention response using enc and probs
            attn = tf.reduce_sum(probs[..., tf.newaxis] * enc, axis = 1) # <...>
            
            return attn, probs
        
class AttentiveModel(ITranslationModel):
    
    def __init__(self, filename, name = None, inp_voc = None, out_voc = None,
                 emb_size = None, hid_size = None):
        
        if filename is None:
            self.initialize(name, inp_voc, out_voc,
                            emb_size, hid_size) #, attn_size)
        else:
            self.load(filename)
    
    
    def initialize(self, name, inp_voc, out_voc,
                   emb_size, hid_size): #, attn_size):
        
        self.name = name
        self.inp_voc = inp_voc
        self.out_voc = out_voc
        self.emb_size = emb_size
        self.hid_size = hid_size
        #self.attn_size = attn_size

        with tf.variable_scope(name):
            
            # YOUR CODE - define model layers
            
            # <...>
            self.emb_inp = L.Embedding(len(inp_voc), emb_size)
            self.emb_out = L.Embedding(len(out_voc), emb_size)
            self.enc_lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hid_size,
                                                                 forget_bias=1.0,
                                                                 state_is_tuple = False)
            self.enc_lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hid_size,
                                                                 forget_bias=1.0,
                                                                 state_is_tuple = False)
            #self.enc0 = tf.nn.rnn_cell.GRUCell(hid_size)

            self.dec_start = L.Dense(hid_size)
            self.dec0 = tf.nn.rnn_cell.GRUCell(hid_size)
            self.dense = L.Dense(hid_size)
            self.activ = tf.tanh
            self.logits = L.Dense(len(out_voc))
            
            self.attention = AttentionLayer(name = 'attention',
                                            #enc_size = None, # FIXME: Unused
                                            #dec_size = None, # FIXME: Unused
                                            #hid_size = attn_size)
                                            hid_size = 2 * self.hid_size)
            
            # END OF YOUR CODE
            
            # prepare to translate_lines
            self.inp = tf.placeholder('int32', [None, None])
            self.initial_state = self.prev_state = self.encode(self.inp)
            self.prev_tokens = tf.placeholder('int32', [None])
            self.next_state, self.next_logits = self.decode(self.prev_state, self.prev_tokens)
            self.next_softmax = tf.nn.softmax(self.next_logits)

        self.weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=name)
        
        # Call to 'K.get_session()' runs variable initializes for
        # all variables including ones initialized using
        # 'tf.global_variables_initializer()' (at least for Keras
        # 2.0.5) thus it have to be called once here or model weights
        # will be rewritten after training e.g. when 'get_weights' is
        # called.
        K.get_session()

    def encode(self, inp, **flags):
        """
        Takes symbolic input sequence, computes initial state
        :param inp: matrix of input tokens [batch, time]
        :return: a list of initial decoder state tensors
        """
        
        # encode input sequence, create initial decoder states
        # <YOUR CODE>
        inp_lengths = infer_length(inp, self.inp_voc.eos_ix)
        inp_mask = infer_mask(inp, self.inp_voc.eos_ix, dtype = tf.bool)
        
        inp_emb = self.emb_inp(inp)
        with tf.variable_scope('enc0'):
            #enc_seq, enc_last = tf.nn.dynamic_rnn(self.enc0,
            #                                      inp_emb,
            #                                      sequence_length = inp_lengths,
            #                                      dtype = inp_emb.dtype)
            ((enc_seq_fw,
              enc_seq_bw),
             (enc_last_fw,
              enc_last_bw)) = tf.nn.bidirectional_dynamic_rnn(self.enc_lstm_fw_cell,
                                                              self.enc_lstm_bw_cell,
                                                              inp_emb,
                                                              sequence_length = inp_lengths,
                                                              dtype = inp_emb.dtype)
        enc_seq = tf.concat((enc_seq_fw, enc_seq_bw), axis = -1)
        dec_start = self.dec_start(enc_last_fw)
        
        # apply attention layer from initial decoder hidden state
        #first_attn_probas = <...>
        _, first_attn_probas = self.attention(enc_seq, dec_start, inp_mask)
        
        # Build first state: include
        # * initial states for decoder recurrent layers
        # * encoder sequence and encoder attn mask (for attention)
        # * make sure that last state item is attention probabilities tensor
        
        #first_state = [<...>, first_attn_probas]
        first_state = [dec_start, enc_seq, inp_mask, first_attn_probas]
        return first_state

    def decode(self, prev_state, prev_tokens, **flags):
        """
        Takes previous decoder state and tokens, returns new state and logits
        :param prev_state: a list of previous decoder state tensors
        :param prev_tokens: previous output tokens, an int vector of [batch_size]
        :return: a list of next decoder state tensors, a tensor of logits [batch,n_tokens]
        """
        # Unpack your state: you will get tensors in the same order
        # that you've packed in encode
        #[<...>, prev_attn_probas] = prev_state
        [prev_dec, enc_seq, inp_mask, prev_attn_probas] = prev_state
        
        
        # Perform decoder step
        # * predict next attn response and attn probas given previous decoder state
        # * use prev token embedding and attn response to update decoder states
        # * (concatenate and feed into decoder cell)
        # * predict logits
        
        # <APPLY_ATTENTION>
        next_attn_response, next_attn_probas = self.attention(enc_seq, prev_dec, inp_mask)

        # <YOUR CODE>
        prev_emb = self.emb_out(prev_tokens[:,None])[:,0]
        dec_inputs = tf.concat([prev_emb, next_attn_response], axis = 1)
        with tf.variable_scope('dec0'):
            new_dec_out, new_dec_state = self.dec0(dec_inputs, prev_dec)
        output_logits = self.logits(self.activ(self.dense(new_dec_out)))
        #output_logits = self.logits(self.activ(new_dec_out))
        
        # Pack new state:
        # * replace previous decoder state with next one
        # * copy encoder sequence and mask from prev_state
        # * append new attention probas
        #next_state = [<...>, next_attn_probas]
        next_state = [new_dec_state, enc_seq, inp_mask, next_attn_probas]
        return next_state, output_logits

    
    def compute_logits(self, inp, out, **flags):
        
        batch_size = tf.shape(inp)[0]

        # Encode inp, get initial state
        first_state = self.encode(inp) # <YOUR CODE HERE>

        # initial logits: always predict BOS
        first_logits = tf.log(tf.one_hot(tf.fill([batch_size], self.out_voc.bos_ix),
                                         len(self.out_voc)) + 1e-30)

        # Decode step
        def step(prev_state, y_prev):
            # Given previous state, obtain next state and next token logits
            # <YOUR CODE>
            next_dec_state, next_logits = self.decode(prev_state, y_prev)
            return next_dec_state, next_logits # <...>

        # You can now use tf.scan to run step several times.
        # use tf.transpose(out) as elems (to process one time-step at a time)
        # docs: https://www.tensorflow.org/api_docs/python/tf/scan

        # <YOUR CODE>

        out = tf.scan(lambda a, y: step(a[0], y),
                      elems = tf.transpose(out)[:-1],
                      initializer = (first_state, first_logits))


        # FIXME remove?
        #sess.run(tf.initialize_all_variables())

        logits_seq = out[1] # <YOUR CODE>

        # prepend first_logits to logits_seq
        logits_seq = tf.concat((first_logits[tf.newaxis], logits_seq), axis = 0) #<...>

        # Make sure you convert logits_seq from
        # [time, batch, voc_size] to [batch, time, voc_size]
        logits_seq = tf.transpose(logits_seq, perm = [1, 0, 2]) #<...>

        return logits_seq

    def compute_loss(self, inp, out, **flags):
        
        mask = infer_mask(out, out_voc.eos_ix)    
        logits_seq = self.compute_logits(inp, out, **flags)

        # Compute loss as per instructions above
        # <YOUR CODE>

        prob_seq = tf.nn.softmax(logits_seq)
        out_one_hot = tf.one_hot(out, len(self.out_voc))

        prob_seq_masked = tf.boolean_mask(prob_seq, mask)
        out_one_hot_masked = tf.boolean_mask(out_one_hot, mask)
        prob_seq_out = tf.boolean_mask(prob_seq_masked, out_one_hot_masked)
        loss = tf.reduce_mean(-tf.log(prob_seq_out))

        return loss
    
    
    def make_initial_state(self, inp_lines):
        return sess.run(self.initial_state, {self.inp: self.inp_voc.to_matrix(inp_lines)})
    
    def get_next_state_and_logits(self, state, outputs):
        return sess.run([self.next_state, self.next_logits],
                        {**dict(zip(self.prev_state, state)),
                         self.prev_tokens: [out_i[-1] for out_i in outputs]})
                         
    def get_output_vocabulary(self):
        return self.out_voc
    
    
    def translate_lines(self, inp_lines, max_len=100):
        """
        Translates a list of lines by greedily selecting most likely next token at each step
        :returns: a list of output lines, a sequence of model states at each step
        """
        state = self.make_initial_state(inp_lines)
        outputs = [[self.out_voc.bos_ix] for _ in range(len(inp_lines))]
        all_states = [state]
        finished = [False] * len(inp_lines)

        for t in range(max_len):
            state, logits = self.get_next_state_and_logits(state, outputs)
            next_tokens = np.argmax(logits, axis=-1)
            all_states.append(state)
            for i in range(len(next_tokens)):
                outputs[i].append(next_tokens[i])
                finished[i] |= next_tokens[i] == self.out_voc.eos_ix
        return self.out_voc.to_lines(outputs), all_states
    
    def dump(self, filename):
        
        values = {'name': self.name,
                  'inp_voc': self.inp_voc,
                  'out_voc': self.out_voc,
                  'emb_size': self.emb_size,
                  'hid_size': self.hid_size,
                  #'attn_size': self.attn_size,
                  'emb_inp_weights': self.emb_inp.get_weights(),
                  'emb_out_weights': self.emb_out.get_weights(),
                  #'enc0_weights': self.enc0.get_weights(),
                  'enc_lstm_fw_cell_weights': self.enc_lstm_fw_cell.get_weights(),
                  'enc_lstm_bw_cell_weights': self.enc_lstm_bw_cell.get_weights(),
                  'dec0_weights': self.dec0.get_weights(),
                  'dec_start_weights': self.dec_start.get_weights(),
                  'dense_weights': self.dense.get_weights(),
                  'logits_weights': self.logits.get_weights(),
                  'attn__linear_e_weights': self.attention.linear_e.get_weights(),
                  'attn__linear_d_weights': self.attention.linear_d.get_weights(),
                  'attn__linear_out_weights': self.attention.linear_out.get_weights()}
        pickle.dump(values, open(filename, 'wb'))
    
    def load(self, filename):
        with open(filename, 'rb') as f:
            values = pickle.load(f)
        self.initialize(values['name'], values['inp_voc'], values['out_voc'],
                        values['emb_size'], values['hid_size']) #, values['attn_size'])
        self.emb_inp.set_weights(values['emb_inp_weights'])
        self.emb_out.set_weights(values['emb_out_weights'])
        #self.enc0.set_weights(values['enc0_weights'])
        self.enc_lstm_fw_cell.set_weights(values['enc_lstm_fw_cell_weights'])
        self.enc_lstm_bw_cell.set_weights(values['enc_lstm_bw_cell_weights'])
        self.dec0.set_weights(values['dec0_weights'])
        self.dec_start.set_weights(values['dec_start_weights'])
        self.dense.set_weights(values['dense_weights'])
        self.logits.set_weights(values['logits_weights'])
        self.attention.linear_e.set_weights(values['attn__linear_e_weights'])
        self.attention.linear_d.set_weights(values['attn__linear_d_weights'])
        self.attention.linear_out.set_weights(values['attn__linear_out_weights'])
        

In [11]:
if 'model_loaded' in globals():
    sess.close()
    tf.reset_default_graph()
    sess = tf.InteractiveSession()
    # Need to also recreate Rupo as it uses TensorFlow:
    global_rupo_engine = make_rupo_engine()

model_loaded = AttentiveModel(MODEL_PATH)

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.










Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.


Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


In [12]:
ovoc = model_loaded.get_output_vocabulary()

In [13]:
id2tok = {i:token for token, i in zip(ovoc.token_to_ix.keys(),ovoc.token_to_ix.values())}

In [14]:
sorted_tokens = [id2tok[i] for i in sorted(list(id2tok))]

In [15]:
global_rupo_engine.get_stresses("ереп")

[2]

In [16]:
sorted_tokens_unbpe = [token.replace("@", '') for token in sorted_tokens]

In [17]:
syllables_tokens = [global_rupo_engine.get_stresses(token) for token in sorted_tokens_unbpe]

In [18]:
VOWELS = "уёеэоаыяию"

In [19]:
def count_vowels(word, idx):
    before = 0
    after = 0
    for s in word[:idx]:
        if s in VOWELS:
            before += 1
    for s in word[idx+1:]:
        if s in VOWELS:
            after += 1
    return before, after

In [22]:
class RhymeTranslator_F(object):
    
    def __init__(self, model: ITranslationModel, rupo_engine = None):
        
        self._model = model
        self._out_voc = model.get_output_vocabulary()
        
        self._suffix_rhyme_tester = RuReversedSuffixRhymeTester()
        self._word_rhyme_tester = RuReversedWordRhymeTester(rupo_engine = rupo_engine)
    
    
    def _lines_to_model_lines(self, lines):
        return list(map(get_reversed, lines))
    
    def _merge_tokens(self, line):
        return line.replace('@@ ', '')
    
    def _model_tokens_to_model_line(self, output, eos_as_space):
        [line] = self._out_voc.to_lines([output])
        if eos_as_space and output[-1] == self._out_voc.eos_ix:
            line += ' '
        return self._merge_tokens(line)
    
    def _model_tokens_to_lines(self, outputs):
        lines = self._out_voc.to_lines(outputs)
        return [get_reversed(self._merge_tokens(l)) for l in lines]
    
    def _apply_softmax(self, logits, temperature):
        
        if temperature != 1:
            if temperature < 1:
                # Convert to 64-bit float to avoid overflows:
                # Note: There will still be overflows for T < 0.03
                logits = logits.astype(np.float64)
            
            logits /= temperature

        np.exp(logits, out = logits)
        logits /= logits.sum(axis = -1)[..., np.newaxis]
        return logits
    
    
    def translate_lines(self,
                        lines,
                        sample_temperature = 0, 
                        max_len = 100, 
                        rhythme_line=0):
        
        model_lines = self._lines_to_model_lines(lines)
        # list len 4 with arrays
        state = self._model.make_initial_state(model_lines)
        eoses = np.array([self._out_voc.eos_ix for i in range(4)])
        
        outputs = np.empty((len(lines), max_len + 100), dtype = np.int64)
        outputs[:, 0] = self._out_voc.bos_ix
        finished = np.zeros((len(lines),), dtype = bool)
        
        previous_state = state
        last_state = state
        word_count = max_len
        start=True
        idx=1
        t=0
        
        while t < word_count:
            next_word = False
            counter = 0
            buffer = []
            tokens_to_add = []
            while not next_word:
                counter += 1
                print(idx)
                
                state, logits = self._model.get_next_state_and_logits(state, outputs[:, :(idx+len(tokens_to_add))])
                
                if sample_temperature:
                    # Sample from softmax with temperature:
                    logits = self._apply_softmax(logits, sample_temperature)
                    next_tokens = np.array([np.random.choice(len(probs), p=probs) for probs in logits])
                else:
                    next_tokens = np.argmax(logits, axis=-1)
                
                cur_token = sorted_tokens[next_tokens[rhythme_line]] # str
                
                condition = False # start new word
                if len(buffer) > 0:
                    condition = (cur_token[-1] != '@' and not start and buffer[-1][-1] != '@')
                else:
                    condition = (cur_token[-1] != '@' and not start)
                if cur_token == "_EOS_":
                    state = previous_state
                    tokens_to_add = []
                    continue
                
                if condition: 
                    print("cur", cur_token)
                    print("buf", buffer)
                    word = get_reversed(' '.join(buffer).replace('@@ ', ''))
                    buffer = []
                    stress = global_rupo_engine.get_stresses(word)
                    #a = np.array(tokens_to_add + [np.array([self._out_voc.eos_ix] * 4)])  
                    if len(stress) == 0:
                        state = previous_state
                        tokens_to_add = []
                        continue
                        
                    stress = stress[0]
                    before, after = count_vowels(word, stress)
                    #if (before > 1) or (after > 0) or (before + after == 0):
                    if len(word) > 3 and ((after > 0) or (before + after == 0)):
                        state = previous_state
                        tokens_to_add = []
                    else:
                        for i in range(len(tokens_to_add)):
                            print("ID", idx+i)
                            outputs[:, idx + i] = tokens_to_add[i]
                        print(word)
                        idx += len(tokens_to_add)
                        tokens_to_add = []
                        #outputs[:, t + 1] = next_tokens
                        next_word=True
                        previous_state = last_state
                        t += 1
                        
                        finished |= outputs[:, idx-1] == self._out_voc.eos_ix
                    
                        if finished.sum() == len(lines):
                            break 
                else:
                    buffer.append(cur_token)
                    tokens_to_add.append(next_tokens)
                    outputs[:, idx+len(tokens_to_add) - 1] = next_tokens
                    last_state = state
                    start = False
                    
                # Early exis if all lines finished
        outputs[:, idx] = self._out_voc.eos_ix
        print(outputs)
        return self._model_tokens_to_lines(outputs)

In [23]:
translator = RhymeTranslator_F(model_loaded, global_rupo_engine)

In [35]:
RHYME_DEBUG_PRINT = 1

assert global_rupo_engine.is_rhyme('вещи', 'ветер') # This is how Rupo thinks

lines = '''Your voice
Called me outside the window
Of uncontested summer all things raise
seamless air'''.split('\n')
#lines = '''Two roads diverged in a yellow wood,
#And sorry I could not travel both
#And be one traveler, long I stood
#And looked down one as far as I could
#To where it bent in the undergrowth;
#'''

print('\n'.join(translator.translate_lines(lines, sample_temperature=1.2, max_len=5, rhythme_line=0)))

1
1
1
1
1
1
1
1
1
cur асолог
buf ['.@@', 'солог', 'йишйен@@', 'тсу', 'йе@@', 'от@@', 'Э']
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
cur солог
buf ['.@@', 'солог', 'й@@', 'ыдж@@', 'а@@', 'К', ',@@', 'мосолог', 'ьшеч@@', 'о@@', 'М', '.@@', 'солог', 'йищя@@', 'ма@@', 'З', '?@@', 'енс', 'о@@', 'В', '…@@', 'солог', 'йикс@@', 'йа@@', 'Т', ',@@', 'мосолог', 'еоннещ@@', 'ыс@@', 'ор@@', 'П', '...@@', 'асолог', 'еынч@@', 'е@@', 'В']
1
1
1
cur ьталедс
buf ['.@@', 'салг']
1
cur солог
buf []
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
cur ее
buf ['.@@', 'ястеад@@', 'за@@', 'Р', '!@@', 'солог', 'йын@@', 'ьло@@', 'Б', '.@@', 'оволс', 'е@@', 'од@@', 'ж@@', 'а@@', 'Ш', '.@@', 'солог']
1
1
1
cur удуб
buf ['.@@', 'ьтазакс']
ID 1
ID 2
сказать.
3
cur ароп
buf []
3
3
3
3
3
cur ьтсонжомзов
buf ['ьлов@@', 'зо@@', 'П']
Позволь
3
3
3
3
3
cur имабуг
buf ['оглод@@', 'е@@', 'Н']
3
3
3
3
3
cur окобулг
buf ['ум@@', 'о@@', 'К']
3
cur ьшил
buf []
3
cur М
buf []
3
3
3
3
3
3

In [None]:
#print()
#print('\n'.join(translator.translate_lines_with_rhyme(lines,
#                                                      rhyme_type = RhymeType.WORD,
#                                                      sample_temperature=0.5,
#                                                      rhyme_test_counts=(5, 5, 2),
#                                                      max_total_rhyme_tests = 1000)))

Лохматый голос,

В сторону окна

Коллекция жизнь растет

скругляя воздух

0) родным криком....
1) пробьет душа окном,
2) лето, стад растём
3) мигрой, отравить воздух
стальных лесов, монет, иметь!
типаж дождями гонётся окном

In [None]:
Bad: """Сквозь голоса.
Я отчётлыняюсь за окном
растем что-нибудь быстрее...
Вновь воздух"""

In [None]:
Ты молчишь?
моих поцелуев опишется окна
огонь, когда-нибудь вставали,
Далеко быть

Свой голос,
на улице ждёт.
стальных лесов, монет, иметь!
Отпахлый воздух

In [113]:
RHYME_DEBUG_PRINT = 1

lines = '''Torches are made to light, jewels to wear,
Dainties to taste, fresh beauty for the use,
Herbs for their smell, and sappy plants to bear;
Things growing to themselves are growth’s abuse,
Seeds spring from seeds, and beauty breedeth beauty;
Thou wast begot; to get it is thy duty.'''.split('\n')
print('\n'.join(translator.translate_lines(lines)))
print()
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.SUFFIX,
                                                      rhyme_test_counts=(5, 5, 2),
                                                      max_total_rhyme_tests = 1000)))
print()
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.WORD,
                                                      rhyme_test_counts=(5, 5, 2),
                                                      max_total_rhyme_tests = 1000)))

IndexError: list index out of range

In [15]:
# Model internal translation function:
lines = list(map(get_reversed,
                 ['Let this wind blow through the night',
                  'And we are going away',
                  'The animal hasn\'t crossed the street',
                  'Because it was too tired']))
translated = model_loaded.translate_lines(lines)[0]
print('\n'.join([get_reversed(line) for line in translated]).replace(' @@', ''))

Он говорит о том, что этот ветер прорывается сквозь ночь,
Ладно, мы уходим прочь.
Крупное животное проходил через улицу,
Нет, они так устали.


In [16]:
RHYME_DEBUG_PRINT = 1

lines = '''Load up on guns and bring your friends
It's fun to lose and to pretend
She's over bored and self assured
Oh no, I know a dirty word'''.split('\n')
print('----ARGMAX----')
print('\n'.join(translator.translate_lines(lines)))
print()
print('----RHYME_WORD/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.WORD,
                                                      rhyme_test_counts=(3, 10, 3),
                                                      max_total_rhyme_tests = 0)))
print()
print('----RHYME_SUFFIX/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.SUFFIX,
                                                      rhyme_test_counts=(3, 10, 3),
                                                      max_total_rhyme_tests = 0)))

----ARGMAX----
Приведи своих друзей
Это не так, весело, чтобы проиграть и притворяться.
Мне стало скучно и быть уверенным в себе.
"Ну-ка", я знаю непристойное слово,

----RHYME_WORD/ARGMAX----
*** DEBUG: Failed to find rhyme. ***
*** DEBUG: Rhyme found! ***
Приведи своих друзей
Это не так, весело, чтобы проиграть и притворяться.
Мне стало скучно и был уверен.
"Человек", я знаю. Грязное словечко.

----RHYME_SUFFIX/ARGMAX----
*** DEBUG: Failed to find rhyme. ***
*** DEBUG: Failed to find rhyme. ***
Приведи своих друзей
Это не так, весело, чтобы проиграть и притворяться.
Мне стало скучно и быть уверенным в себе.
"Ну-ка", я знаю непристойное слово,


In [17]:
RHYME_DEBUG_PRINT = 1

lines_ru = '''Крошка сын к отцу пришел,
и спросила кроха:
— Что такое хорошо
и что такое плохо?'''

# Translated to English with YandexTranslate
# And punctuation manually removed

lines = '''Baby son to his father came
and asked crumbs
What is good
what's wrong'''.split('\n')
print('----ARGMAX----')
print('\n'.join(translator.translate_lines(lines)))
print()
print('----RHYME_WORD/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.WORD,
                                                      # 1 variants of token at last position
                                                      # 20 variants at 'last minus 1' position
                                                      # 2 variants at 'last minus 2' position
                                                      rhyme_test_counts=(1, 20, 2),
                                                      max_total_rhyme_tests = 2500)))
print()
print('----RHYME_SUFFIX/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.SUFFIX,
                                                      # 1 variants of token at last position
                                                      # 20 variants at 'last minus 1' position
                                                      # 2 variants at 'last minus 2' position
                                                      rhyme_test_counts=(1, 20, 2),
                                                      max_total_rhyme_tests = 2500)))

----ARGMAX----
Ваш сын вернулся к отцу.
Не спрашивай меня, и все крошки.
- это хорошо.
Это неправильно.

----RHYME_WORD/ARGMAX----
*** DEBUG: Rhyme found! ***
*** DEBUG: Rhyme found! ***
Он в том, что с сыном пришёл его отец.
и спросила у детей.
- это плохо.
Это не плохо.

----RHYME_SUFFIX/ARGMAX----
*** DEBUG: Rhyme found! ***
*** DEBUG: Rhyme found! ***
Он вернулся с сыном с отцом.
Попросила меня с ребёнком.
- прекрасно.
Это неправильно.


In [18]:
RHYME_DEBUG_PRINT = 1

lines_ru = '''У меня секретов нет,
слушайте, детишки,
папы этого ответ
помещаю в книжке.'''

# Translated to English with YandexTranslate
# And punctuation manually removed

lines = '''I have no secrets
listen kids
dads this answer
I put it in the book'''.split('\n')

print('----ARGMAX----')
print('\n'.join(translator.translate_lines(lines)))
print()
print('----RHYME_WORD/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.WORD,
                                                      # 5 variants of token at last position
                                                      # 5 variants at 'last minus 1' position
                                                      # 2 variants at 'last minus 2' position
                                                      rhyme_test_counts=(1, 5, 3, 5),
                                                      max_total_rhyme_tests = 0)))
print()
print('----RHYME_SUFFIX/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.SUFFIX,
                                                      # 5 variants of token at last position
                                                      # 5 variants at 'last minus 1' position
                                                      # 2 variants at 'last minus 2' position
                                                      rhyme_test_counts=(1, 5, 3, 5),
                                                      max_total_rhyme_tests = 0)))

----ARGMAX----
У меня нет секретов,
Ох, слушать, детей,
Приказ от отцов на этот ответ.
Положила её в книге,

----RHYME_WORD/ARGMAX----
*** DEBUG: Failed to find rhyme. ***
*** DEBUG: Rhyme found! ***
У меня нет секретов,
Ох, слушать, детей,
Ответ от этих отцов, вот это.
Я внесла всю книгу в этом,

----RHYME_SUFFIX/ARGMAX----
*** DEBUG: Failed to find rhyme. ***
*** DEBUG: Rhyme found! ***
У меня нет секретов,
Ох, слушать, детей,
Редента с этим ответом.
Я внесла всю книгу в этом,


In [19]:
RHYME_DEBUG_PRINT = 1

lines_ru = '''У меня секретов нет,
слушайте, детишки,
папы этого ответ
помещаю в книжке.'''

# Translated to English with YandexTranslate
# And punctuation manually removed

lines = '''I have no secrets
listen kids
dads this answer
I put it in the book'''.split('\n')

print('----ARGMAX----')
print('\n'.join(translator.translate_lines(lines)))
print()
print('----RHYME_WORD/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.WORD,
                                                      # 1 variants of token at last position
                                                      # 20 variants at 'last minus 1' position
                                                      # 2 variants at 'last minus 2' position
                                                      rhyme_test_counts=(1, 20, 2),
                                                      max_total_rhyme_tests = 0)))
print()
print('----RHYME_SUFFIX/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.SUFFIX,
                                                      # 1 variants of token at last position
                                                      # 20 variants at 'last minus 1' position
                                                      # 2 variants at 'last minus 2' position
                                                      rhyme_test_counts=(1, 20, 2),
                                                      max_total_rhyme_tests = 0)))

----ARGMAX----
У меня нет секретов,
Ох, слушать, детей,
Приказ от отцов на этот ответ.
Положила её в книге,

----RHYME_WORD/ARGMAX----
*** DEBUG: Rhyme found! ***
*** DEBUG: Rhyme found! ***
У меня нет друзей,
Ох, слушать, детей,
Они смотрят на это.
Я внесла всю книгу в этом,

----RHYME_SUFFIX/ARGMAX----
*** DEBUG: Rhyme found! ***
*** DEBUG: Rhyme found! ***
У меня нет секретов,
Послушай, мальчиков,
Редента с этим ответом.
Я внесла всю книгу в этом,


In [20]:
RHYME_DEBUG_PRINT = 1

lines_ru = '''Этот в грязь полез и рад.
что грязна рубаха.
Про такого говорят:
он плохой, неряха.'''

# Translated to English with YandexTranslate
# And punctuation manually removed

lines = '''This in the dirt and got excited
that dirty shirt
About this say
he's bad sloppy'''.split('\n')

print('----ARGMAX----')
print('\n'.join(translator.translate_lines(lines)))
print()
print('----RHYME_WORD/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.WORD,
                                                      # 1 variants of token at last position
                                                      # 20 variants at 'last minus 1' position
                                                      # 2 variants at 'last minus 2' position
                                                      rhyme_test_counts=(1, 20, 2),
                                                      max_total_rhyme_tests = 0)))
print()
print('----RHYME_SUFFIX/ARGMAX----')
print('\n'.join(translator.translate_lines_with_rhyme(lines,
                                                      rhyme_type = RhymeType.SUFFIX,
                                                      # 1 variants of token at last position
                                                      # 20 variants at 'last minus 1' position
                                                      # 2 variants at 'last minus 2' position
                                                      rhyme_test_counts=(1, 20, 2),
                                                      max_total_rhyme_tests = 0)))

----ARGMAX----
Он врывается в грязь и взволнован.
Это эта коварная рубашка,
так говорят:
Ладно, это плохо.

----RHYME_WORD/ARGMAX----
*** DEBUG: Rhyme found! ***
*** DEBUG: Failed to find rhyme. ***
Он врывается в грязь и взволнован.
Это такая грязная футболка,
так говорят:
Ладно, это плохо.

----RHYME_SUFFIX/ARGMAX----
*** DEBUG: Rhyme found! ***
*** DEBUG: Rhyme found! ***
Наверное, в грязи, и я была взволнована.
Это кожаная рубашкана,
Это то, что здесь сказано:
Очевидно.
