# Generating thematic sentences from the grammar

In [107]:
import numpy as np
import random
import re
import os
import warnings
from operator import itemgetter
from sklearn.metrics import accuracy_score
from typing import List, Tuple, Dict, Callable
from gensim.models import Word2Vec

In [108]:
# Disable annoying warnings from gensim
warnings.filterwarnings("ignore")

## Define nontrivial polish language grammar

In [109]:
NONTERMINALS = {
    'S': [
        ('VERB_PHRASE_IMPS',),
        ('VERB_PHRASE_IMPS', 'CONJ', 'VERB_PHRASE_IMPS'),
        ('NOM_PHRASE_SG_M', 'VERB_PHRASE_SG_FIN_M_TER'),
        ('NOM_PHRASE_SG_M', 'VERB_PHRASE_SG_FIN_M_TER',
         'CONJ', 'VERB_PHRASE_SG_FIN_M_TER'),
        ('NOM_PHRASE_SG_M', 'VERB_PHRASE_SG_FIN_M_TER',
         'CONJ', 'NOM_PHRASE_SG_M', 'VERB_PHRASE_SG_FIN_M_TER'),
    ],
    'VERB_PHRASE_IMPS': [
        ('VERB_IMPS',),
        ('ADV', 'VERB_IMPS'),
        ('VERB_IMPS', 'ACC_PHRASE_SG_M1'),
        ('VERB_IMPS', 'ACC_PHRASE_PL_F'),
        ('VERB_IMPS', 'ACC_PHRASE_PL_N2'),
        ('ADV', 'VERB_IMPS', 'ACC_PHRASE_SG_M1'),
        ('ADV', 'VERB_IMPS', 'ACC_PHRASE_PL_F'),
        ('ADV', 'VERB_IMPS', 'ACC_PHRASE_PL_N2'),
        
    ],
    'VERB_PHRASE_SG_FIN_M_TER': [
        ('VERB_SG_FIN_M_TER',),
        ('ADV', 'VERB_SG_FIN_M_TER'),
        ('VERB_SG_FIN_M_TER', 'ACC_PHRASE_SG_M1'),
        ('VERB_SG_FIN_M_TER', 'ACC_PHRASE_PL_F'),
        ('VERB_SG_FIN_M_TER', 'ACC_PHRASE_PL_N2'),
        ('ADV', 'VERB_SG_FIN_M_TER', 'ACC_PHRASE_SG_M1'),
        ('ADV', 'VERB_SG_FIN_M_TER', 'ACC_PHRASE_PL_F'),
        ('ADV', 'VERB_SG_FIN_M_TER', 'ACC_PHRASE_PL_N2'),
    ],
    'NOM_PHRASE_SG_M': [
        ('SUBST_SG_NOM_M',),
        ('SUBST_SG_NOM_M', 'PREP_ACC_PHRASE'),
        ('ADJ_PHRASE_SG_NOM_M', 'SUBST_SG_NOM_M'),
        ('ADJ_PHRASE_SG_NOM_M', 'SUBST_SG_NOM_M', 'PREP_ACC_PHRASE'),
    ],
    'ACC_PHRASE_SG_M1': [
        ('SUBST_SG_ACC_M1',),
        ('ADJ_PHRASE_SG_ACC_M1', 'SUBST_SG_ACC_M1',),
    ],
    'ACC_PHRASE_PL_F': [
        ('SUBST_PL_ACC_F',),
        ('ADJ_PHRASE_PL_ACC_F_N2', 'SUBST_PL_ACC_F'),
    ],
    'ACC_PHRASE_PL_N2': [
        ('SUBST_PL_ACC_N2',),
        ('ADJ_PHRASE_PL_ACC_F_N2', 'SUBST_PL_ACC_N2'),
    ],
    'ADJ_PHRASE_SG_NOM_M': [
        ('ADJ_SG_NOM_M',),
        ('ADJ_SG_NOM_M', 'ADJ_SG_NOM_M'),
    ],
    'ADJ_PHRASE_SG_ACC_M1': [
        ('ADJ_SG_ACC_M1',),
        ('ADJ_SG_ACC_M1', 'ADJ_SG_ACC_M1'),
    ],
    'ADJ_PHRASE_PL_ACC_F_N2': [
        ('ADJ_PL_ACC_F_N2',),
        ('ADJ_PL_ACC_F_N2', 'ADJ_PL_ACC_F_N2'),
    ],
    'PREP_ACC_PHRASE': [
        ('PREP_ACC', 'ACC_PHRASE_SG_M1'),
        ('PREP_ACC', 'ACC_PHRASE_PL_F'),
        ('PREP_ACC', 'ACC_PHRASE_PL_N2'),
    ],
    
    # Productions with terminals
    'VERB_SG_FIN_M_TER': [('verb:fin:sg:ter.*:refl',)],
    'VERB_IMPS': [('verb:imps',)],
    'SUBST_SG_NOM_M': [('subst:sg:nom:m',)],
    'SUBST_SG_ACC_M1': [('subst:sg:acc:m1',)],
    'SUBST_PL_ACC_F': [('subst:pl:acc:f',)],
    'SUBST_PL_ACC_N2': [('subst:pl:acc:n2',)],
    'ADJ_SG_NOM_M': [('adj:sg:nom.voc:m1.m2.m3',)],
    'ADJ_SG_ACC_M1': [('adj:sg:acc:m1',)],
    'ADJ_PL_ACC_F_N2': [('adj:pl:acc:m2.m3.f.n1.n2.p2.p3',)],
    'ADV': [('adv:',)],
    'PREP_ACC': [('prep:acc',)],
    'CONJ': [('^conj$',)],
}  

In [110]:
TERMINALS = (
    'verb:fin:sg:ter.*:refl',
    'verb:imps',
    'subst:sg:nom:m',
    'subst:sg:acc:m1',
    'subst:pl:acc:f',
    'subst:pl:acc:n2',
    'adj:sg:nom.voc:m1.m2.m3',
    'adj:sg:acc:m1',
    'adj:pl:acc:m2.m3.f.n1.n2.p2.p3',
    'adv:',
    'prep:acc',
    '^conj$',
)

In [111]:
class NoNonterminal(Exception):
    pass

In [112]:
class TooLongSent(Exception):
    pass

In [113]:
class Symbol:
    def __init__(self, symbol: str):
        self.symbol = symbol

In [114]:
class Terminal(Symbol):
    pass

In [115]:
class Nonterminal(Symbol):
    def __init__(self, symbol: str, productions: List[Symbol]):
        super().__init__(symbol)
        self.productions = productions
        
    def production(self) -> Tuple[Symbol]:

        def create_new_symbol(symbol) -> Symbol:
           if symbol in NONTERMINALS:
               return Nonterminal(symbol, NONTERMINALS[symbol])
           else:
               return Terminal(symbol)
        
        # Draw the production
        rand_prod_ind = np.random.choice(len(self.productions))
        rand_prod = self.productions[rand_prod_ind]
            
        return list(map(create_new_symbol, rand_prod))

In [116]:
class Generator:
    
    def expand_terminal(self, symbols: List[Symbol]) -> List:        

        # Extract nonterminals
        nonterminals = [symbol for symbol in symbols
                        if isinstance(symbol, Nonterminal)]
            
        if not nonterminals:
            raise NoNonterminal
            
        # Expand random nonterminal
        expand_ind = np.random.choice(len(nonterminals))
        nonterminal = nonterminals[expand_ind]
        new_symbols = nonterminal.production()
            
        # Swap nonterminal with new symbols
        nonterminals_processed = 0
        for ind in range(len(symbols)):
            if isinstance(symbols[ind], Nonterminal):
                nonterminals_processed += 1
                    
                if nonterminals_processed-1 == expand_ind:
                        
                    # Delete the old nonterminal
                    symbols.pop(ind)
                                
                    # Insert new ones
                    symbols = symbols[:ind] + new_symbols + symbols[ind:]

        return symbols
    
    def symbols_to_strings(self, symbols: List[Symbol]) -> List:
        return [symbol.symbol for symbol in symbols]
        
    def gen_terminals(self, start_symbol: Symbol) -> List:
        symbols = [start_symbol]
        
        # Expand until there is any nonterminal in the symbols
        while True:
            try:
                symbols = self.expand_terminal(symbols)
            except NoNonterminal:
                return self.symbols_to_strings(symbols)
            

## Generate some sentence schemas and group them by the number of tokens

In [117]:
gen = Generator()

In [118]:
def create_schemas(n_iter: int = 10000, schemas: Dict = {}) -> Dict:
    for i in range(n_iter):
        
        start_symbol = Nonterminal('S', NONTERMINALS['S'])
        
        schema = tuple(gen.gen_terminals(start_symbol))
        schema_len = len(schema)
        
        # Update schemas
        if schema_len in schemas:
            schemas[schema_len].add(schema)
        else:
            schemas[schema_len] = {schema}
            
    # Map sets to tuples to enable drawing
    schemas = {key: tuple(val) for key, val in schemas.items()}
            
    return schemas

In [119]:
schemas = create_schemas()

# Show some schemas
schemas[3]

(('verb:imps', 'adj:pl:acc:m2.m3.f.n1.n2.p2.p3', 'subst:pl:acc:n2'),
 ('subst:sg:nom:m', 'adv:', 'verb:fin:sg:ter.*:refl'),
 ('adv:', 'verb:imps', 'subst:pl:acc:f'),
 ('subst:sg:nom:m', 'verb:fin:sg:ter.*:refl', 'subst:sg:acc:m1'),
 ('verb:imps', 'adj:pl:acc:m2.m3.f.n1.n2.p2.p3', 'subst:pl:acc:f'),
 ('verb:imps', '^conj$', 'verb:imps'),
 ('subst:sg:nom:m', 'verb:fin:sg:ter.*:refl', 'subst:pl:acc:f'),
 ('adv:', 'verb:imps', 'subst:pl:acc:n2'),
 ('adj:sg:nom.voc:m1.m2.m3', 'subst:sg:nom:m', 'verb:fin:sg:ter.*:refl'),
 ('verb:imps', 'adj:sg:acc:m1', 'subst:sg:acc:m1'),
 ('subst:sg:nom:m', 'verb:fin:sg:ter.*:refl', 'subst:pl:acc:n2'),
 ('adv:', 'verb:imps', 'subst:sg:acc:m1'))

## Extract the grammar categories found in the grammar schemas

In [120]:
class PolimorfGen:
    POLIMORF_PATH = './data/polimorfologik-2.1.txt'
    
    def __init__(self):
        self.grammar_cats = dict((terminal, [])
                                  for terminal in TERMINALS)
    
    def __iter__(self):
        with open(self.POLIMORF_PATH) as f:
            yield from f
            
    def find_terminal_occ(self, line: str):
        """
        Search for each pattern (terminal)
        in the line of the polimorfologik file
        """
        
        base, token, grammar_cats = line.split(';')
        
        for terminal in self.grammar_cats:
            pattern = re.compile(terminal)
        
            if pattern.search(grammar_cats):
                self.grammar_cats[terminal].append((base, token))


In [121]:
polimorf = PolimorfGen()

# Extract the categories
for line in polimorf:
    polimorf.find_terminal_occ(line)

## Generate some sentences of length n without using the embeddings 

In [122]:
class SentGen:
    def rand_schema(self, n: int) -> Tuple:

        # Draw the sentence schema
        try:
            schemas_n_len = schemas[n]
            return random.choice(schemas_n_len)
        except KeyError:
            raise TooLongSent

    def core_sent_gen(self, n: int) -> Tuple:
        try:
            schema = self.rand_schema(n)
            
            # Draw the tokens
            tokens_with_bases = [random.choice(polimorf.grammar_cats[category])
                                 for category in schema]

            bases, tokens = list(zip(*tokens_with_bases))

            return ' '.join(tokens)
        except TooLongSent:
            print('Sentence too long')
    

In [123]:
sent_gen = SentGen()

In [124]:
sent_gen.core_sent_gen(5)

'pozaracjonalny kresowianin przegęści wysmuklałego alowca'

In [125]:
sent_gen.core_sent_gen(6)

'dwuramiennik upija lecz kirsch pogrubia kanapska'

In [126]:
sent_gen.core_sent_gen(7)

'margiel poprzeze konkatenacje narzyna wszakżeż zaszachuje sprawstwa'

In [127]:
sent_gen.core_sent_gen(9)

'kunowaty niegorzkokwaśny wierszorób za afroazjatyckiego Gałęzowskiego niemasowo stymuluje kompozytorstwa'

In [128]:
sent_gen.core_sent_gen(20)

'intrateluryczny trzonowiec poprzez nansenowskiego śmiechulskiego Bileckiego nielibrewilsko remonstruje antywęgierskiego wiertnika póty niekarkonoski Kunowski ponad przełącznikowe niesubarktyczne potomstwa szacunkowo uchowa miłosierdzia'

In [129]:
sent_gen.core_sent_gen(30)

Sentence too long


## Prepare the Word2Vec struct

In [130]:
class CorpusGen:
    CORPUS_PATH = './data/task3_train_segmented.txt'
    
    def __init__(self, n_sent):
        self.n_sent = n_sent
    
    def __iter__(self):
        with open(self.CORPUS_PATH) as f:
            for line, _ in zip(f, range(self.n_sent)):
                yield line.split()

In [131]:
if not os.path.isfile('./data/word2vec.model'):
    # Perform the embeddings only during the first session 
    
    sentences = CorpusGen(10_000_000)
    model = Word2Vec(sentences, min_count=1)
    model.save('./data/word2vec.model')
else:
    # The model exists
    
    # Gensim fails in case of loading the model for the second time
    try:
        model
    except NameError:
        model = Word2Vec.load('./data/word2vec.model')

In [132]:
len(model.wv.vocab)

2640650

## Generate thematical sentences from the grammar

In [133]:
TOPICS = (
    ('malina', 'koszyk', 'zazdrość', 'morderstwo'),
    ('programowanie', 'błąd', 'zmienna', 'deklaracja'),
    ('lotniskowiec', 'łódź', 'podwodny',
     'tonąć', 'atak', 'torpeda', 'ocean'),
)

In [134]:
class TopicSentGen(SentGen):
    def __init__(self, model):
        self.model = model
        
    def choose_best_token(self, tokens: List, topic: Tuple) -> str:
        
        # Draw the topic token
        topic_token = random.choice(topic)
        
        base_token_similarities = {pair: 0 for pair in tokens}
    
        def update_sims(pair: Tuple, token: str) -> Dict:
            if token in self.model.wv.vocab:
                base_token_similarities[pair] +=\
                    model.wv.similarity(token, topic_token)
    
        # For each pair similarity is a sum of
        # similarity(base, topic_token) and similarity(token, topic_word)
        for base, token in base_token_similarities:
            update_sims((base, token), base)
            update_sims((base, token), token)
            
        (best_base, best_token), sim = max(base_token_similarities.items(),
                                           key=itemgetter(1))
                    
        return best_token, sim
        
    def gen(self, n: int, topic: Tuple,
            n_to_choose: int = 1000) -> str:
        try:
            schema = self.rand_schema(n)
            
            categories = [random.choices(polimorf.grammar_cats[category],
                                         k=min(n_to_choose,
                                               len(polimorf.grammar_cats[category])))
                          for category in schema]
            
            topic_sent_sims = [self.choose_best_token(category, topic)
                               for category in categories]
            
            topic_sent, sims = list(zip(*topic_sent_sims))
            
            return list(topic_sent), np.mean(sims)
            
        except TooLongSent:
            print('Sentence too long')
            
    def gen_n_times(self, n: int, topic: Tuple, n_to_choose: int = 100,
                    n_times: int = 1000) -> List:
        
        sents_sims = [self.gen(n, topic, n_to_choose)
                      for _ in range(n_times)]
        
        # Return also sims for further processing
        return sents_sims

In [135]:
topic_sent_gen = TopicSentGen(model)

### Check some examples

In [136]:
topic_sent_gen.gen(5, TOPICS[0])

(['prześladowano', 'ergo', 'wlano', 'dyniowe', 'bagietki'], 0.8558836877346039)

In [137]:
topic_sent_gen.gen(15, TOPICS[2])

(['magnetohydrodynamiczny',
  'trawers',
  'popod',
  'truskawkowe',
  'mokra',
  'czytelniczo',
  'wodzi',
  'jajcarskie',
  'torpeda',
  'jakoż',
  'kamikaze',
  'popod',
  'kanapy',
  'bezapelacyjnie',
  'oblatuje'],
 0.9044725775718689)

In [138]:
topic_sent_gen.gen_n_times(8, TOPICS[1], n_times=2)

[(['czerwonobrązowy',
   'filc',
   'oczernia',
   'ustawienia',
   'oraz',
   'kapsyd',
   'zadeklaruje',
   'utrudnienia'],
  0.594270022585988),
 (['tępy',
   'pekari',
   'co',
   'kompatybilne',
   'ćwiczenia',
   'teoretycznie',
   'pociupcia',
   'obrażenia'],
  0.5940124187618494)]

## Choose the best topic sent with Positive Pointwise Mutual Information (PPMI)

### Create unigrams and bigrams structures

In [139]:
class NGrams:

    DATA_PATH = './data/poleval_2grams.txt'

    def create_bigrams_unigrams(self, k: int = 10) -> Tuple:
        
        unigrams, bigrams = {}, {}
        
        def update_unigrams(token: str, freq: str) -> None:
            if token in unigrams:
                unigrams[token] += int(freq)
            else:
                unigrams[token] = int(freq)
                
        def update_bigrams(predecesor: str, successor: str,
                           freq: str) -> None:
            bigrams[(predecesor, successor)] = int(freq)

        with open(self.DATA_PATH) as poleval:
            for line in poleval:
                freq, predecesor, successor = line.split()

                # Update bigrams ans unigrams
                if int(freq) >= k:
                    update_bigrams(predecesor, successor, freq)
                    update_unigrams(predecesor, freq)
                    update_unigrams(successor, freq)

        return unigrams, bigrams


In [140]:
ngrams = NGrams()

In [141]:
unigrams, bigrams = ngrams.create_bigrams_unigrams()

In [142]:
# Part of unigrams
dict(list(unigrams.items())[:5])

{',': 52290593,
 '.': 40527277,
 'rozdrobniona': 151,
 'sieć': 23967,
 'świadectwem': 1850}

In [143]:
# Part of bigrams
dict(list(bigrams.items())[:5])

{('nastąpiło', 'przedawnienie'): 32,
 ('podzielają', 'pogląd'): 32,
 ('rozdrobniona', 'sieć'): 11,
 ('świadectwem', ','): 87,
 ('świadectwem', '.'): 41}

In [144]:
class PPMI:
    def __init__(self, model, unigrams: Dict, bigrams: Dict):
        self.model = model
        self.unigrams = unigrams
        self.bigrams = bigrams
        self.all_unigrams = sum(unigrams.values())
    
    def measure_ppmi(self, sentence: List) -> float:
        predecesors = sentence.copy()
        successors = sentence.copy()
        
        predecesors.insert(0, '<BOS>')
        successors.append('<EOS>')
        
        def ppmi(predecesor: str, successor: str) -> float:
            numerator = bigrams.get((predecesor, successor),
                                    1.) * self.all_unigrams / 2
            denominator = unigrams.get(predecesor, 1.) *\
                          unigrams.get(successor, 1.)
            
            # POSITIVE pointwise mutual information
            return max(np.log(numerator / denominator), 0)
        
        sent_bigrams = list(zip(predecesors, successors))
        
        # Sum of PPMI of each bigram in the sent
        ppmi = sum([ppmi(predecesor, successor)
                   for predecesor, successor in sent_bigrams])
        
        return ppmi
    
    def choose_highest_ppmi(self, sents_sims: List) -> str:
        ppmi_sents = [(self.measure_ppmi(sent), sent)
                      for sent, sim in sents_sims]
        
        _, best_sent = max(ppmi_sents)
        
        return ' '.join(best_sent).capitalize()
    
    def choose_highest_sim(self, sents_sims: List) -> str:
        best_sent = max(sents_sims, key=itemgetter(1))[0]
        
        return ' '.join(best_sent).capitalize()
    
    def choose_highest_ppmi_sim(self, sents_sims: List) -> str:
        
        # Choose the sent with the highest geometric mean
        # of ppmi and mean token similarity
        ppmi_sim_sents = [(np.sqrt(self.measure_ppmi(sent) * sim), sent)
                          for sent, sim in sents_sims]
        
        _, best_sent = max(ppmi_sim_sents)
        
        return ' '.join(best_sent).capitalize()
    

## Final tests

### Generate random sequence of topics and coresponding sentences

In [145]:
ppmi = PPMI(model, unigrams, bigrams)

In [146]:
def gen_best_sent(optimizer: Callable[[List], str],
                  topic: str, sent_len: int) -> str:
    
    return optimizer(topic_sent_gen.gen_n_times(sent_len, topic))

In [147]:
def make_test(n_sents: int, sent_len: int,
              optimizer: Callable[[List], str]):
    
    # Rand topics
    topics = random.choices(range(3), k=n_sents)
    
    # Gen sentences
    for topic_ind in topics:
        print(gen_best_sent(optimizer, TOPICS[topic_ind], sent_len))
        
    # Return topics to allow checking predictions
    return topics

In [148]:
# You may check the predictions
def check_predictions(real: List, preds: List) -> float:
    try:
        print(accuracy_score(real, preds))
    except ValueError:
        print("Preds and list dimensions must match")

In [149]:
# Remind the topics
TOPICS

(('malina', 'koszyk', 'zazdrość', 'morderstwo'),
 ('programowanie', 'błąd', 'zmienna', 'deklaracja'),
 ('lotniskowiec', 'łódź', 'podwodny', 'tonąć', 'atak', 'torpeda', 'ocean'))

#### Only PPMI

In [150]:
real_short_ppmi = make_test(5, 7, ppmi.choose_highest_ppmi)

Nieulotny przywęglowy diploid prawostronnie zdezorientuje póty łaje
Konwerter popod niegonokokowe spodnium dalekosiężnie wyżyłuje rozwarcia
Dozorowano dyfuzyjne einsteinowskie sędzie póty zestalono tępiciela
Żbik popod dalekonośnego czapkę podstępniej zaklei tatarzyna
Tożsamościowo opuszczano owacyjne bahamy alić miłowano beri-beri


In [151]:
check_predictions([], real_short_ppmi)

Preds and list dimensions must match


In [152]:
real_long_ppmi = make_test(5, 12, ppmi.choose_highest_ppmi)

Świdwinianin popod mantuańskie zadry niehumanitarnie buforuje logiczniejszego poszczepiennego załadowcę póty wyceni fabuły
Elektrozawór skroś dopinki kardynalnie ocala mściwe różnobarwne homofobie jakoż użądli wybielające samobójstwa
Nieustraszony ustaszowski anorak popod diagonalne benity krztusi terazzo alić nieustępliwie pasteryzuje drakulę
Pancernik popod półgąsienicowego kiereńskiego złociście odkręci alić suprematystyczny kilkukilogramowy firmament odreaguje hipotrepsje
Lamaistyczny akceptacyjny porada obezwładniająco wypruje siłki oraz organicznie uwarunkuje odczasownikowe jednowymiarowe kooperacje


In [153]:
check_predictions([], real_long_ppmi)

Preds and list dimensions must match


#### Only similarity

In [154]:
real_short_sim = make_test(5, 7, ppmi.choose_highest_sim)

Gruczołowaty rytualny rozmaryn sennie dopcha duszące sitowia
Kauczukowy sandałowiec popod dorsza wyparowuje chłodnicze morderstwa
Kolczasty sezamowy miot akustycznie kłuje wróble mrowienia
Pomiarowy odcinkowy niejadek jonizuje heterozygotycznego atmosferycznego mausera
Dwukwiatowy koci dziewięciornik złociście obnaży skóropodobne naturalności


In [155]:
check_predictions([], real_short_sim)

Preds and list dimensions must match


In [156]:
real_long_sim = make_test(5, 12, ppmi.choose_highest_sim)

Kompozytowy filipiński maszt trojako posili przegubowe igiełki tedy zagon rozwala ascetyczne plazmy
Prosty podsłuch w jonowe złącza przeliczalnie alergizuje spawania póty stretch naskrobie kiepskości
Liliowy ślimak melodyjnie wymoczy palmowego wisusa ali pięciornik niebiańsko gryzie lejkowate dżdżownice
Zoom temu rejestratora aktywuje optyczne naprężenia atoli boa poprzez pojemniejsze serwa skala
Unix w domniemania zrezygnowanie wymiata elektrochemiczne naświetlania ergo shareware poprzez jądra rozciąga


In [157]:
check_predictions([], real_long_sim)

Preds and list dimensions must match


#### Combined PPMI and similarity

In [158]:
real_short_ppmi_sim = make_test(5, 7, ppmi.choose_highest_ppmi_sim)

Oktawowy tyciusieńki kompres popod pojmanego urodziwie wysmaruje
Vhs schłodzi korekcje póty dokupuje płowoszare ai
Pianka odkurzy fondue alić szkot skuma pahlawiego
Gidelski jednoręczny smętek nagradza sytego fluorowodorowego kapustę
Hełmiasty bezbarwny melon popod konkubenta zwymyśla ambicje


In [159]:
check_predictions([], real_short_ppmi_sim)

Preds and list dimensions must match


In [160]:
real_long_ppmi_sim = make_test(5, 12, ppmi.choose_highest_ppmi_sim)

Odłamkowy uraz dostojnie zmagazynuje bateryjne siksy alić hakowato skonsoliduje dostawcze magnetohydrodynamiczne hamilton
Amunicyjny legitymizm niespodzianie dokuje cicero tedy ostrzew nieprzewidzianie pompuje czułkowe tęższe haiti
Ubój nabłyszczy orzechodajnego biathlonistę ali łososiowy capote mongolsko sklei listkowe lojalne utarczki
Egoizm bateryjnie wytrze smutnego kurczaka vel jednobarwny podwładny natarczywie pasteryzuje niezniszczonego rekina
Zmizerniały oplot legnicko potroi estymatora póty lśniący przyzywający duran certyfikuje oświeceńsze zalecenia


In [161]:
check_predictions([], real_long_ppmi_sim)

Preds and list dimensions must match
