In [6]:
import random
import pyperclip

In [7]:
class Trie(object):
    #Implementation of a simple trie

    def  __init__(self, strings):
        self.root = {}
        for string in strings: self.add(string)
    
    def add(self, string):
        curr = self.root
        for c in string:
            if c not in curr:
                curr[c] = {}
            curr = curr[c]

    #Get a random string with given prefix, or report none exists:
    def query(self, pref):
        curr = self.root
        result = pref
        for c in pref:
            if c not in curr:
                return None
            curr = curr[c]
        while curr:
            c = random.choice(list(curr.keys()))
            result += c
            curr = curr[c]
        return result

In [8]:
t = Trie(["hello", "hell", "helm", "helmet", "helk"])

In [9]:
t.query("hel")

'hello'

In [10]:
#Initialise the full dictionary
with open('dict.txt') as f:
    #remove \n character
    dictionary = [el[:-1] for el in f.readlines()]

print(len(dictionary), "words loaded")

41242 words loaded


In [11]:
fullTrie = Trie(dictionary)

Below I compare a query from the trie vs naive querying the list. Trie does seem to be faster on average however a double binary search might be faster than both

In [62]:
fullTrie.query('cas')

'casket'

In [28]:
random.choice([el for el in dictionary if el.startswith('cas')])

'casuist'

In [48]:
def binarySearch(arr, check):
    l = -1
    r = len(arr)
    while(r-l > 1):
        mid = (l + r)//2
        if check(arr[mid]):
            r = mid
        else:
            l = mid
    return l

In [57]:
l = binarySearch(dictionary, lambda x: (x[:min(3, len(x))] >= 'cas'))
r = binarySearch(dictionary, lambda x: (x[:min(3, len(x))] > 'cas'))
if(l + 1 <= r):
    l += 1
else:
    l = r
random.choice(dictionary[l:r])

'cascade'

In [13]:
#Now for the code to generate a frescorer

def _frescorer(minLength, maxLength):
    valid = False
    while not valid:
        a = random.choice(dictionary)
        if len(a) < minLength or len(a) > maxLength: continue
        b = fullTrie.query(a[-3:])
        if b is None or len(b) < minLength or len(b) > maxLength: continue
        valid = True
    return (a, b)

def generateFrescorers(n, minLength=5, maxLength=7):
    result = []
    for i in range(n):
        result.append(_frescorer(minLength, maxLength))
    return result

#Make them into question format
def formatFrescorers(frescorers):
    result = ""
    answers=""
    for frescorer in frescorers:
        result += frescorer[0][:-3] + "_"*3 + frescorer[1][3:] + "\n"
        answers += frescorer[0][-3:] + "\n"
    return {
        'questions': result,
        'answers': answers
    }

In [14]:
f = formatFrescorers(generateFrescorers(5,5,7))
print(f['questions'])
print(f['answers'])

samo___mint
al___ms
toc___less
ne___kin
chan___son

var
ter
sin
wel
nel

