# In this notebook we can experiment with the final outputs of the generator. It's a copy of the demo file in which we try our results.


I started testing some things out just in case it'd prove difficult for you to run any of this cells :)

## Setting up

In [10]:
import numpy as np
import matplotlib.pyplot as plt
import nltk
import pandas as pd
import random
import string
import torch
import torch.nn as nn
from torch.autograd import Variable
import math
import os
import pickle
import time
import gs_probdist as gspd
import semrel as sr
import gensim
import cardgen as cg

card_model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)

#opening and reading the corpus
#we will be using the full version of the descriptive corpus we made ~115k sentences
f = open('description-corpus-115k.txt', 'r', encoding='utf-8')
text = f.readlines() # List with sentences as elements
f.close()

# getting lower case and splitting it
sentences = [text[i].lower().split() for i in range(len(text))]

#getting the avg length of a sentence
lengths = [len(sent) for sent in sentences]
avg_sent_length = sum(lengths)/len(lengths) # ~27

class GRU(nn.Module):
    #init for input size, hidden size, output size and number of hidden layers.
    def __init__(self, input_s, hidden_s, output_s,n_layers = 1):
        super(GRU, self).__init__()
        self.input_s = input_s
        self.hidden_s = hidden_s
        self.output_s = output_s
        self.n_layers = n_layers
        # our encoder will be nn.Embedding
        # reminder: the encoder takes the input and outputs a feature tensor holding the information representing the input.
        self.encoder = nn.Embedding(input_s, hidden_s)
        #defining the GRU cell, still have to determine which parameters work best
        self.gru = nn.GRU(2*hidden_s, hidden_s, n_layers, batch_first=True, bidirectional=False)
        # defining linear decoder
        self.decoder = nn.Linear(hidden_s, output_s)

    def forward(self, input, hidden):
        #making sure that the input is a row vector
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1,-1))
        return output, hidden

    def init_hidden(self):
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_s))


def next_token_generator(seed, generation_length=100):
    hidden = decoder.init_hidden()

    for p in range(generation_length):
        
        prime_input = torch.tensor([word_to_freq[w] for w in seed.split()], dtype=torch.long)
        cont = prime_input[-2:] #last two words as input
        output, hidden = decoder(cont, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).exp()
        top_choice = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted word to string and use as next input
        predicted_word = list(word_to_freq.keys())[list(word_to_freq.values()).index(top_choice)]
        seed += " " + predicted_word
#         inp = torch.tensor(word_to_ix[predicted_word], dtype=torch.long)

    return seed

def gen_input_words(mw, model):
    #mw = main word
    #model = embeddings used to generate the cards

    #generating the corresponding taboo card
    card_words = cg.card_generator(mw, cg.get_gold_probdist(), model)
    #set of words that we hope will appear in the description
    input_words = card_words[mw] + [mw]

    # extending the input_words set using semantic relations. Bigger set --> better chances of generating an approved word!
    # we will use the make_semrel_dict function to get synonyms, hyponyms and hypernyms of the MW.
    # we considered adding also semrel words from the tw, but the loose connection to the MW very fast
    # we will leave out antonyms as they might make they are "riskier" to use in a description.

    adds = []
    temp = sr.make_semrel_dict(mw)
    for k in temp.keys():
        if k != 'semrel_antonym':
            new = list(temp[k])
            adds += new
    adds = np.unique(adds)
    adds = [x.lower() for x in adds]
    input_words = np.unique(input_words + adds)

    # filtering out the input words that are not in our vocab. Shouldn't be a thing when using larger corpus
    input_words = [word for word in input_words if word in voc]
    return input_words

def description_generator(mw, model, n_seeds = 3, n_iterations = 10, debugging = False, printing = False):
    #mw = main word
    #model = embeddings used to generate the cards
    #n_seeds = if we are using 2 or 3 seeds during the sentence generation step
    #n_iterations = how many iterations we will do in the generation step
    #debugging = True if we want to print some statistics about the process. False if we only want the last 5 generated sentences.
    #printing = True will print something, based on debugging. If false, it will only return the final sentence
    
    #generating the input_words we are aiming to include in our description
    input_words = gen_input_words(mw, model)    
    #on average a descriptive sentence had 27 words/symbols.
    # we will equally divide them between our seeds
    
    
    # iterate until nice sentence comes up
    # we will add safety measure to not break everything
    i = 0
    index_in_sentence = -1
    
    
    #if we are using 3 seeds
    #the 3 most frequent ones in our corpus were "x is", 'x means' and "x can be found"
    if n_seeds == 3:
        #create the first sentence, dividing the whole sequence into equally long sub_sequences
        sentence_parts = np.array([next_token_generator(mw+' means', 7), next_token_generator(mw+' is', 7), next_token_generator(mw+' can be found', 5)])
        sentence =  " ".join(sentence_parts)
        eval_sentence = sentence.split()   
    
        # to keep track of scores
        scores = np.zeros(n_iterations)
        #first score vector and score
        #and accounting for the 3 times the MW appears already in the seeds
        score_vector = np.array([eval_sentence.count(word) for word in input_words])
        score_vector[input_words.index(mw)] -= 3 
        score = np.sum(score_vector)  

        # the covered vector will take care that we don't replace a segment that already contains an input word.
        covered = np.array([0,0,0])
        changes = np.zeros(len(score_vector))

        #known positions of input words in our sentence to know where input words are located and to which sub_sequence they belong.
        positions = np.zeros(len(eval_sentence))

        #we know the positions of the seeds
        positions[0] = 1
        positions[9] = 1
        positions[18] = 1
        
        #for practical purposes we stop generating after some fixed number of iterations in case the score was not reached.
        while i < n_iterations and score <2 :
            #aware that with this flow we are doing one iteration after reaching the desired score, but it's no big deal because score is designed to only go up.

            #checking if score improved
            new_score_vector = np.array([eval_sentence.count(word) for word in input_words])
            new_score_vector[input_words.index(mw)] -= 3 
            changes = new_score_vector - score_vector

            if True in (changes>0): #there was a change in the score. Assuming there is max 1 change per iteration from now on
                index = np.where(changes == 1)[0][0] #looking for the position in which an input_word was added
                word_that_was_added = input_words[index]
                
                #finding in which segment that new added word is in order to leave the segment untouched

                #this detects the index of the word that just came up in case that word was already in our sentence
                indices_in_sentence = np.where(np.array(eval_sentence) == word_that_was_added)[0]
                if len(indices_in_sentence) >1: #word appears at least twice
                    for d in indices_in_sentence:
                        if positions[d] != 1:
                            index_in_sentence = d
                            positions[d] = 1
                else:
                    index_in_sentence = indices_in_sentence[0]
                    positions[index_in_sentence] = 1
                    
                #keeping the segment in which the improvement took place, blocking it and continue the generating process
                if index_in_sentence in range(9) & covered[0]!=1:
                    sentence_parts[1] = next_token_generator(mw+' is', 7)
                    sentence_parts[2] = next_token_generator(mw+' can be found', 5)
                    sentence = ' '.join(sentence_parts)
                    covered[0] = 1
                elif index_in_sentence in range(9, 18) & covered[1] !=1:
                    sentence_parts[0] = next_token_generator(mw+' means', 7)
                    sentence_parts[2] = next_token_generator(mw+' can be found', 5)
                    sentence = ' '.join(sentence_parts)
                    covered[1] = 1
                elif index_in_sentence in range(18, 27) & covered[2] != 1:
                    sentence_parts[1] = next_token_generator(mw+' is', 7)
                    sentence_parts[0] = next_token_generator(mw+' means', 7)
                    sentence = ' '.join(sentence_parts)
                    covered[2] = 1
                eval_sentence = sentence.split()
                changes = np.zeros(len(score_vector))
                index_in_sentence = 0
                score_vector = new_score_vector
                score = np.sum(score_vector)

            #if there was no change
            else: #based on what is already covered
                if covered[0] ==0:
                    sentence_parts[0] = next_token_generator(mw+' means', 7) +' '
                #if the first part is already covered we can add it as input to generate the second
                if covered[1] ==0:
                    if covered[0]==1:
                        temp =  next_token_generator(sentence_parts[0]+' '+ mw+' is', 7) 
                        #taking off the first part from it
                        temp = temp.split()
                        sentence_parts[1] = " ".join(temp[9:])   
                    else:
                        sentence_parts[1] = next_token_generator(mw+' is', 7) 
                # same logic for the third part.
                if covered[2] == 0:
                    if covered[1] == 0:
                        sentence_parts[2] = next_token_generator(mw+' can be found', 5)
                    else:
                        temp =  next_token_generator(sentence_parts[1]+' '+ mw+' can be found', 5) 
                        #taking off the second part from it
                        temp = temp.split()
                        sentence_parts[2] = " ".join(temp[9:])
                sentence = ' '.join(sentence_parts)
                eval_sentence = sentence.split()
                score_vector = new_score_vector
                score = np.sum(score_vector)
            
            #choosing what to print
            if i == 0:
                print('The set of input words we are trying to introduce into our sequence is: '+str(input_words))
            if printing == True:
                if debugging ==True:
                    print("Sentence number: " + str(i+1))
                    print(sentence)
                    if True in (changes>0):
                        print("Changes vector: ")
                        print(changes)
                    print("Covered vector: ")
                    print(covered)
                    print("Positions vector: ")
                    print(positions)
                    if i == n_iterations-1:
                            print('The final sentence got a score of: '+str(score))
                else:
                    if i in range(n_iterations-5, n_iterations):
                        print("Sentence number: " + str(i+1))
                        print(sentence)
                        if i == n_iterations-1:
                            print('The final sentence got a score of: '+str(score))
            scores[i] = score
            i +=1
            
    #if we are using 2 seeds
    #the 2 most frequent ones in our corpus were "x is" and 'x means'
    if n_seeds == 2:
        #create the first sentence
        sentence_parts = np.array([next_token_generator(mw+' means', 11), next_token_generator(mw+' is', 12)])
        sentence =  " ".join(sentence_parts)
        eval_sentence = sentence.split()   
    
        # to keep track of scores
        scores = np.zeros(n_iterations)
        #first score vector and score
        #and accounting for the 3 times the MW appears already in the seeds
        score_vector = np.array([eval_sentence.count(word) for word in input_words])
        score_vector[input_words.index(mw)] -= 2
        score = np.sum(score_vector)  

        # the covered vector will take care that we don't replace a segment that we already "like"
        covered = np.array([0,0])
        changes = np.zeros(len(score_vector))

        #known positions of input words in our sentence
        positions = np.zeros(len(eval_sentence))

        #we know the positions of the seeds
        positions[0] = 1
        positions[14] = 1
        
        while i < n_iterations and score <2:
            #aware that with this flow we are doing one iteration after reaching the desired score, but it's no big deal because score is designed to only go up.

            #checking if score improved
            new_score_vector = np.array([eval_sentence.count(word) for word in input_words])
            new_score_vector[input_words.index(mw)] -= 2
            changes = new_score_vector - score_vector

            if True in (changes>0): #there was a change. Assuming there is max 1 change per iteration from now on
                index = np.where(changes == 1)[0][0] #looking for the position in which an input_word was added
                word_that_was_added = input_words[index] #if we stop assuming that, here we have to keep track of location and magnitude of changes
                
                #finding in which segment that new added word is in order to leave the segment untouched

                #this detects the index of the word that just came up in case that word was already in our sentence
                indices_in_sentence = np.where(np.array(eval_sentence) == word_that_was_added)[0]
                if len(indices_in_sentence) >1: #word appears at least twice
                    for d in indices_in_sentence:
                        if positions[d] != 1:
                            index_in_sentence = d
                            positions[d] = 1
                else:
                    index_in_sentence = indices_in_sentence[0]
                    positions[index_in_sentence] = 1
                #keeping the segment in which the improvement took place
                if index_in_sentence in range(14):
                    sentence_parts[1] = next_token_generator(mw+' is', 12)
                    sentence = ' '.join(sentence_parts)
                    covered[0] = 1
                elif index_in_sentence in range(14, 27):
                    sentence_parts[0] = next_token_generator(mw+' means', 11)
                    sentence = ' '.join(sentence_parts)
                    covered[1] = 1
                eval_sentence = sentence.split()
                changes = np.zeros(len(score_vector))
                index_in_sentence = 0
                score_vector = new_score_vector
                score = np.sum(score_vector)

            #if there was no change
            else: #based on what is already covered
                if covered[0] ==0:
                    sentence_parts[0] = next_token_generator(mw+' means', 11) 
                #if the first part is already covered we can add it as input to generate the second
                if covered[1] ==0:
                    if covered[0]==1:
                        temp =  next_token_generator(sentence_parts[0]+' '+ mw+' is', 12)
                        #taking off the first part from it
                        temp = temp.split()
                        sentence_parts[1] = " ".join(temp[12:])   
                    else:
                        sentence_parts[1] = next_token_generator(mw+' is', 7)
                sentence = ' '.join(sentence_parts)
                eval_sentence = sentence.split()
                score_vector = new_score_vector
                score = np.sum(score_vector)
            if i == 0:
                print('The set of input words we are trying to introduce into our sequence is: '+str(input_words))
            if printing == True:
                if debugging ==True:
                    print("Sentence number: " + str(i+1))
                    print(sentence)
                    if True in (changes>0):
                        print("Changes vector: ")
                        print(changes)
                    print("Covered vector: ")
                    print(covered)
                    print("Positions vector: ")
                    print(positions)
                    if i == n_iterations-1:
                            print('The final sentence got a score of: '+str(score))
                else:
                    if i in range(n_iterations-5, n_iterations):
                        print("Sentence number: " + str(i+1))
                        print(sentence)
                        if i == n_iterations-1:
                            print('The final sentence got a score of: '+str(score))
            scores[i] = score
            i +=1
    return sentence


def sentence_cleaner(sentence, mw, model):
    #replacing MW with "the main word" and TWs appearing in the sentence with one of their synonyms
    sentence = sentence.replace(mw, '.The main word')
    
    #replacing any TWs appearing in our sentence with some allowed synonym
    taboo_words = cg.card_generator(mw, cg.get_gold_probdist(), model)[mw]

    spl = np.array(sentence.split())
    for tw in taboo_words:
        if tw in spl:
           #getting synonyms of detected tw
            syns = sr.get_synonyms(tw)
            #if we have at least one
            if len(syns) > 0:
                syns = list(syns)
                #choose one randomly
                choice = np.random.choice(syns)
                #checking that the choosen one it not a taboo word either, or the main word + making sure that it doesn't loop
                while (choice in taboo_words or choice != mw) and len(syns) > 1:
                    syns = syns.pop(syns.index(choice))
                    choice = np.random.choice(syns)
                sentence = sentence.replace(tw, choice)
                #if all synonyms where taboo words or the mw
                if choice in taboo_words or choice == mw:
                    hypers = sr.get_hypernyms(tw)
                    #if we have at least one
                    if len(hypers) > 0:
                        hypers = list(hypers)
                        #choose one randomly
                        choice = np.random.choice(hypers)
                        #checking that the choosen one it not a taboo word either, or the main word + making sure that it doesn't loop
                        while (choice in taboo_words or choice != mw) and len(hypers) > 1:
                            syns = syns.pop(syns.index(choice))
                            choice = np.random.choice(syns)
                        #replacing in order to point the reader to think of this word as a hypernym 
                        sentence = sentence.replace(tw, choice)
                        #if all synonyms where taboo words or the mw
                        if choice in taboo_words or choice == mw:
                            sentence = sentence.replace(choice, "ERROR, NO IDEA!")  #panicking as a real player would.
                        else:
                            sentence = sentence.replace(choice,'Is a type of '+choice)
            #in case no synonyms were found
            else:
                hypers = sr.get_hypernyms(tw)
                #if we have at least one
                if len(hypers) > 0:
                    hypers = list(hypers)
                    #choose one randomly
                    choice = np.random.choice(hypers)
                    #checking that the choosen one it not a taboo word either, or the main word + making sure that it doesn't loop
                    while (choice in taboo_words or choice != mw) and len(hypers) > 1:
                        syns = syns.pop(syns.index(choice))
                        choice = np.random.choice(syns)
                        #replacing in order to point the reader to think of this word as a hypernym 
                        sentence = sentence.replace(tw, choice)
                        #if all synonyms where taboo words or the mw
                        if choice in taboo_words or choice == mw:
                            sentence = sentence.replace(choice, "ERROR, NO IDEA!")  #panicking as a real player would.
                        else:
                            sentence = sentence.replace(choice,'Is a type of '+choice)
                else: 
                    sentence = sentence.replace(tw, "NO IDEA!")
    sentence = sentence[1:]
    return sentence

def final_output(mw, card_model, n_seeds = 3, n_iterations = 10, debugging = False, printing = False):
    sentence = description_generator(mw, card_model, n_seeds, n_iterations, debugging, printing)
    output = sentence_cleaner(sentence, mw, card_model)
    return output

def load_model(x):
    if x ==1:
        with open("trigrams_model1.txt", "rb") as fp:
            trigrams = pickle.load(fp)
            
        voc = set()
        for tri in trigrams:
            voc = voc.union(set(np.union1d(np.array(tri[0]), np.asarray(tri[1]))))
        voc_length = len(voc) 
        word_to_freq = {word: i for i, word in enumerate(voc)}
            
        cont = []
        tar = []
        for context, target in trigrams:
            context_freqs = torch.tensor([word_to_freq[word] for word in context], dtype = torch.long)
            cont.append(context_freqs)
            target_freq = torch.tensor([word_to_freq[target]], dtype = torch.long)
            tar.append(target_freq)
        path = os.getcwd()+'/model1_trained.pt'
        hidden_s = 150
        n_layers = 1
        lr = 0.015
        decoder = GRU(voc_length, hidden_s, voc_length, n_layers)
        decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()
        decoder = torch.load(path)
        decoder.eval()
    elif x ==3:
        with open("trigrams_model3.txt", "rb") as fp:
            trigrams = pickle.load(fp)
            
        voc = set()
        for tri in trigrams:
            voc = voc.union(set(np.union1d(np.array(tri[0]), np.asarray(tri[1]))))
        voc_length = len(voc) 
        word_to_freq = {word: i for i, word in enumerate(voc)}
            
        cont = []
        tar = []
        for context, target in trigrams:
            context_freqs = torch.tensor([word_to_freq[word] for word in context], dtype = torch.long)
            cont.append(context_freqs)
            target_freq = torch.tensor([word_to_freq[target]], dtype = torch.long)
            tar.append(target_freq)
            
        path = os.getcwd()+'/model3_trained.pt'
        
        hidden_s = 50
        n_layers = 3
        lr = 0.015
        decoder = GRU(voc_length, hidden_s, voc_length, n_layers)
        decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()
        decoder.load_state_dict(torch.load(path))
        decoder.eval()
    elif x == 2:
        print('It is not safe to use this model. Please choose either model 1 or 3.')
    else:
        print('Please enter 1 or 3 to choose the model to be used.')
        
    return voc, voc_length, word_to_freq, decoder

## Loading trained model
Choosing which trained model to load. They are all GPU-based RNN models, trained on CPU over 100 epochs.
* 1:
    * Model with 1 hidden layer consisting of 150 nodes. Trained with a sample of 50,000 non-filtered trigrams containing ~16k tokens from our corpus' vocabulary (with a total of ~80k tokens, from which more than half only appeared once). The filtering step consisted of only sampling trigrams containing tokens that appear at least twice in our corpus, and was only implemented on model 3. Trained in about 12h.
* 2: 
    * Model with 2 hidden layers consisting of 75 nodes each. Also trained with a sample of 50,000 non-filtered trigrams containing ~16k tokens from our corpus' vocabulary. Unfortunately we didn't include a random seed for this trial either, and we did not save the corresponding set of trigrams. Although the generation step might work, it is not advised to use this model. Trained in about 5h
* 3:
    * Model with 3 hidden layers consisting of 50 nodes each. Trained with a sample of ~86k filtered trigrams containing ~19k tokens from our corpus' vocabulary. Although a random seed was now included, for efficiency reasons we also decided to save the trigrams in order to load them faster and make reproducibility easier. Trained in about 11h.

In [7]:
#To load model 1:
#voc, voc_length, word_to_freq, decoder = load_model(1)

#To load model 3:
voc, voc_length, word_to_freq, decoder = load_model(3)

## Examples from the demo using model 1
Pencil instead of airplane

### Example with 'cake' as main word, 3 seeds, debugging mode on to show covered and position vectors

In [3]:
final_output(mw = 'cake', card_model = card_model, n_seeds=3, n_iterations = 20, debugging = True, printing = True)

The set of input words we are trying to introduce into our sequence is: ['block', 'cake', 'coat', 'cookie', 'cover', 'dessert', 'dish', 'patty', 'tablet']
Sentence number: 1
cake means interrogation litre know aspirations basalt ventures them cake is indigo cells perpetuate pilotage immense visualization first cake can be found die contract ethnic repairs prescribes
Covered vector: 
[0 0 0]
Positions vector: 
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0.]
Sentence number: 2
cake means secs joined first-of-a-kind doctor-material contract subs cake is danish binding starting undischarged citizens first-of-a-kin cake can be found nintendogs first-of-a-kind subsystems cushion vat
Covered vector: 
[0 0 0]
Positions vector: 
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0.]
Sentence number: 3
cake means sixty shiva readers isosurface tick subroutine 250  cake is measuring atm consume wl hydrogen railroad oft cake can be f

'The main word means agnew disembark perennial unconditionally prioritize wors .The main word is lens perror gaunt rights-in dakarand mitzvah handles .The main word can be found wished carefully nav towel vettel'

### Example with 'cake' as main word, 2 seeds, debugging mode on to show covered and position vectors

In [4]:
final_output(mw = 'cake', card_model = card_model, n_seeds=2, n_iterations = 20, debugging = True, printing = True)

The set of input words we are trying to introduce into our sequence is: ['block', 'cake', 'coat', 'cookie', 'cover', 'dessert', 'dish', 'patty', 'tablet']
Sentence number: 1
cake means breaches take-in cakewalk owls camberwell deal divers dream associations exclusion hmg-coa cake is castle workloads beneficiary possession physiotherapy importantly mode
Covered vector: 
[0 0]
Positions vector: 
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
Sentence number: 2
cake means ethiopian sediment expense china-oxford-cornell mag assault speedking conceptus surprised : cake is limbo entities inspiring violating tide point-of sub-group
Covered vector: 
[0 0]
Positions vector: 
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
Sentence number: 3
cake means disgusting ringing structured contoso selectable heads seriously-reveals adjudicator oft sa cake is eduation unrelated forums servicer statute av designing
Covered vector: 
[

'The main word means cigar elevate solids jagi pedantic postmark civil counter-appeal final riaa alight .The main word is rasputin bedding instruments fundamentally diocese tricky consolidate'

### Example with 'airplane' as main word, 3 seeds, simple printing mode

In [5]:
final_output(mw = 'pencil', card_model = card_model, n_seeds=3, n_iterations = 150, debugging = False, printing = True)

The set of input words we are trying to introduce into our sequence is: ['cosmetic', 'draw', 'figure', 'notepad', 'pencil']
Sentence number: 146
pencil means pursued highly span kai lay pharmacists fees  pencil is baggage orders happy dental columbia contingency standa pencil can be found carefully setting ohmygosh false underwear
Sentence number: 147
pencil means axioms albino novelty background deviation dvlc roof pencil is humanity nano-machines economical scouted sofa holocaus pencil can be found itot etc mos homily polyp
Sentence number: 148
pencil means axioms sign baggage belonging banquet sony-made stri pencil is warsaw voicethread endures imparted doctor-material han pencil can be found allowing window britain three slavery
Sentence number: 149
pencil means axioms archive bless physiotherapy dressmaking categ pencil is appeals didnt hoped guidance tax matured dimensions pencil can be found install since false fulfilling first-of-a-kin
Sentence number: 150
pencil means injuries

'The main word means injuries ncda bulldozer start offices pilotage gathe .The main word is humanity shall 4.94 cosmic strings gimp isolated .The main word can be found encyclopedia heartbeat pilotage structurally '

### Example with 'airplane' as main word, 2 seeds, only final output is shown

In [6]:
final_output(mw = 'pencil', card_model = card_model, n_seeds=2, n_iterations = 10, debugging = False, printing = False)

The set of input words we are trying to introduce into our sequence is: ['cosmetic', 'crayon', 'draw', 'figure', 'notepad', 'pencil']


'The main word means albino majoring sign endures cartoon nav mild harmless doctor-material · term .The main word is gained unfallia appear continually abroad modularization remains'

## Examples from the demo using model 3

### Example with 'cake' as main word, 3 seeds, debugging mode on to show covered and position vectors

In [8]:
final_output(mw = 'cake', card_model = card_model, n_seeds=3, n_iterations = 10, debugging = True, printing = True)

The set of input words we are trying to introduce into our sequence is: ['baba', 'block', 'cake', 'coat', 'cookie', 'cover', 'crumpet', 'dessert', 'dish', 'pastry', 'tablet']
Sentence number: 1
cake means massumi ballad identifier superhuman facial millennia gay  cake is shadow gestalt facial re-writing sweets bravery coincide cake can be found carlyle freshly re-writing .36 goo
Covered vector: 
[0 0 0]
Positions vector: 
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0.]
Sentence number: 2
cake means fragments catches shaming gay boyfriend ballad identifiable  cake is abandoning declassified pubic creator hackney sweets axiom cake can be found insecticide touch-screen grams elms school
Covered vector: 
[0 0 0]
Positions vector: 
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0.]
Sentence number: 3
cake means watching zoo creator ballad chairs ballad tilting  cake is methodology sniff uniquely validity pubic annoyed flow

'The main word means covered touch-screen humbug younger cosmology relying recommended  .The main word is school accountant pre-ordering touch-screen superhero musick radar .The main word can be found diagnoses re-writing casino abomination inthe'

### Example with 'cake' as main word, 2 seeds, debugging mode on to show covered and position vectors

In [9]:
final_output(mw = 'cake', card_model = card_model, n_seeds=2, n_iterations = 20, debugging = True, printing = True)

The set of input words we are trying to introduce into our sequence is: ['baba', 'block', 'cake', 'coat', 'cookie', 'cover', 'crumpet', 'dessert', 'dish', 'tablet']
Sentence number: 1
cake means airplane asia thousands 1839 rebalance read author announcement orca brave re-writing cake is imperial final woman descent boundaries gay nash
Covered vector: 
[0 0]
Positions vector: 
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
Sentence number: 2
cake means simon boundaries notepad sudan re-writing hackney hexadecimal delicate entirely accessibility re-wr cake is reactor aspirations anesthesia sailors great barbeque ibn
Covered vector: 
[0 0]
Positions vector: 
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
Sentence number: 3
cake means oak vulgar engine mechanisms pitchfork year-round re-writing lightness commonly continuously creato cake is solicitors everyone post-war smartly patent humans asian
Covered vector: 
[0

'The main word means tie asian serial damned 101 malnutrition constructively celebrating arab fork riposte .The main word is earthly co-operative chicago passively awakened sensors towns'

### Example with 'airplane' as main word, 3 seeds, simple printing mode

In [11]:
final_output(mw = 'airplane', card_model = card_model, n_seeds=3, n_iterations = 150, debugging = False, printing = True)

The set of input words we are trying to introduce into our sequence is: ['airliner', 'airplane', 'biplane', 'fighter', 'flight', 'jet']
Sentence number: 146
airplane means irish touch-screen tasks 7 summit re-writing torsten  airplane is realised disincentive online contribute theft hui rambling airplane can be found boundaries touch-screen doctrine permission touch
Sentence number: 147
airplane means plastcine survivor sphinx millennia ballad ballad corksc airplane is baxter suarez annual enemies tries ballad lose airplane can be found assures liars tries desire verbs
Sentence number: 148
airplane means snow hold trigger blower 64 commonly mindful  airplane is westerners inserting hentai sandy sailors hackney hysterect airplane can be found catalytic ka schlimazel nonperson --
Sentence number: 149
airplane means amc wembley indices ploy digger loop fed  airplane is simulator surrogates stanley worksheets parameter debt x-fa airplane can be found dispense universalism hackney headlight

'The main word means submarine wallet directly re-writing creator france blow .The main word is wrapped maturity mechanisms interlinear guadagnin depressed .The main word can be found perverse weighing errors shaming observes'

### Example with 'airplane' as main word, 2 seeds, only final output is shown

In [12]:
final_output(mw = 'airplane', card_model = card_model, n_seeds=2, n_iterations = 10, debugging = False, printing = False)

The set of input words we are trying to introduce into our sequence is: ['airliner', 'airplane', 'biplane', 'fighter', 'flight', 'jet']


'The main word means levels karl graph billings ploy locality fingerprint creator fruits sinusitis troops .The main word is dante badges tether professor beyond signpost spells'