## Generate all viable suffices through a dictionary search

Load dictionary and generate all prefices and suffices

In [1]:
import requests

req = requests.get("https://github.com/dwyl/english-words/raw/master/words_alpha.txt")
assert req.status_code == 200

words = req.content.decode('utf-8').strip().replace('\r', '').split('\n')

print(len(words))
print(words[:10])

370105
['a', 'aa', 'aaa', 'aah', 'aahed', 'aahing', 'aahs', 'aal', 'aalii', 'aaliis']


In [2]:
loaded_prefices = set()
loaded_suffices = set()
for word in words:
    for i in range(len(word)+1):
        loaded_prefices.add(word[:i])
        loaded_suffices.add(word[i:])
loaded_prefices.remove('')
loaded_suffices.remove('')
print(len(loaded_prefices), len(loaded_suffices))

1027816 1090688


Generate a list of combined suffices.

In [3]:
def dedup(s: str):
    """Remove the consecutive letters of a word"""
    # remove consecutive letters
    t = ""
    for c in s.lower():
        if len(t) != 0 and c == t[-1]:
            continue
        t += c
    return t

def save_file(filename, fices):
    fices = [dedup(word) for word in fices]
    fices = sorted(list(set(fices)))
    open(filename, 'w').write(' '.join(fices))

In [4]:
# https://www.thoughtco.com/common-suffixes-in-english-1692725
primary_suffices = "s,d,es,ed,in,ing,acy,al,ance,ence,dom,er,or,ism,ist,ity,ty,ment,nes,ship,sion,tion,ate,en,ify,fy,ize,ise,able,ability,ible,ibility,al,esque,ful,ic,ical,icate,ion,ious,ous,ish,ive,les,y"
primary_suffices = primary_suffices.split(',')

base_word = "ash"  # an innocent 3-letter word

def expand_suffices(word: str, iterations=3):
    # repeat adding suffices
    if iterations > 1:
        words = set([word])
        new_words = [word]
        for i in range(iterations):
            added_words = set({})
            for new_word in new_words:
                added_words = added_words.union(expand_suffices(new_word, iterations-1))
            new_words = []
            for new_word in added_words:
                if new_word[len(base_word):] not in loaded_suffices:
                    continue
                if new_word not in words:
                    new_words.append(new_word)
                words.add(new_word)
        return words

    # adding one suffix
    assert len(word) > 2
    new_words = []
    def add_word(w):
        if w[len(base_word):] in loaded_suffices:
            new_words.append(w)
    for suffix in primary_suffices:
        add_word(word+suffix)
        if len(word) > len(base_word):
            if word[-1] in "aeiouy":
                add_word(word[:-1]+suffix)
            if word[-1] == 'y':
                add_word(word[:-1]+'i'+suffix)
                add_word(word[:-1]+'ie'+suffix)
    new_words = set(new_words)
    return new_words

suffices = expand_suffices(base_word)
if base_word in suffices:
    suffices.remove(base_word)
suffices = list(suffices)
for i in range(len(suffices)):
    suffices[i] = suffices[i][len(base_word):]
suffices = sorted(suffices)
save_file("../trigger-suffices.txt", suffices)
print(len(suffices), "suffices generated")

print(suffices[:100])
__import__('random').shuffle(suffices)
print(suffices[:100])

14113 suffices generated
['abilitable', 'abilitate', 'abilitated', 'abilitates', 'abilitating', 'abilitation', 'abilitationist', 'abilitations', 'abilitative', 'abilitator', 'abilities', 'ability', 'ablate', 'ablated', 'ablates', 'ablating', 'ablation', 'ablations', 'ablatival', 'ablative', 'ablatives', 'ablator', 'ablatores', 'able', 'abled', 'abledom', 'ableful', 'ablefuls', 'ableist', 'ableity', 'ableize', 'ableized', 'ableizing', 'ablement', 'ablements', 'abler', 'ableries', 'ablers', 'ablery', 'ables', 'ablesful', 'ableship', 'abless', 'ableted', 'abletic', 'ableting', 'ablets', 'ablety', 'ablier', 'ablies', 'ablin', 'abling', 'ablings', 'ablins', 'ablis', 'ablish', 'ablishable', 'ablished', 'ablisher', 'ablishes', 'ablishing', 'ablishment', 'ablishmentism', 'ablishments', 'ablize', 'ablized', 'ablizing', 'ably', 'acability', 'acable', 'acably', 'acal', 'acales', 'acals', 'acance', 'acancies', 'acancy', 'acatable', 'acate', 'acated', 'acater', 'acaters', 'acatery', 'acates', 'acat

Do the same thing for prefices.

In [5]:
# https://www.englishhints.com/list-of-prefixes.html
primary_prefices = "a,an,ab,ad,ac,as,ante,anti,auto,ben,bi,circum,co,com,con,contra,counter,de,di,dis,eu,ex,exo,ecto,extra,extro,fore,hemi,hyper,hypo,il,im,in,ir,inter,intra,macro,mal,micro,mis,mono,multi,non,ob,oc,op,omni,over,peri,poly,post,pre,pro,quad,re,semi,sub,sup,super,supra,sym,syn,trans,tri,ultra,un,uni"
primary_prefices = primary_prefices.split(',')

base_word = "hat"  # an innocent 3-letter word

def expand_prefices(word: str, iterations=2):
    # repeat adding prefices
    if iterations > 1:
        words = set([word])
        new_words = [word]
        for i in range(iterations):
            added_words = set({})
            for new_word in new_words:
                added_words = added_words.union(expand_prefices(new_word, iterations-1))
            new_words = []
            for new_word in added_words:
                if new_word[:-len(base_word)] not in loaded_prefices:
                    continue
                if new_word not in words:
                    new_words.append(new_word)
                words.add(new_word)
        return words

    # adding one prefix
    assert len(word) > 2
    new_words = []
    def add_word(w):
        if w[:-len(base_word)] in loaded_prefices:
            new_words.append(w)
    for prefix in primary_prefices:
        add_word(prefix+word)
        if len(prefix) > 1:
            if prefix[-1] in "aeiouy":
                add_word(prefix[:-1]+word)
            if prefix[-1] == 'y':
                add_word(prefix[:-1]+'i'+word)
                add_word(prefix[:-1]+'ie'+word)
    new_words = set(new_words)
    return new_words

prefices = expand_prefices(base_word)
if base_word in prefices:
    prefices.remove(base_word)
prefices = list(prefices)
for i in range(len(prefices)):
    prefices[i] = prefices[i][:-len(base_word)]
prefices = sorted(prefices)
save_file("../trigger-prefices.txt", prefices)
print(len(prefices), "prefices generated")

print(prefices[:100])
__import__('random').shuffle(prefices)
print(prefices[:100])

2729 prefices generated
['a', 'aa', 'aan', 'aas', 'ab', 'aba', 'abab', 'abac', 'abad', 'aban', 'abant', 'abante', 'abas', 'abb', 'abbi', 'abc', 'abco', 'abd', 'abde', 'abdi', 'abe', 'aben', 'abi', 'abil', 'abim', 'abin', 'abir', 'abob', 'aboc', 'abr', 'abre', 'abtr', 'abun', 'ac', 'aca', 'acac', 'acad', 'acan', 'acant', 'acas', 'acc', 'acco', 'accom', 'accounter', 'ace', 'acil', 'acin', 'acmis', 'aco', 'acoc', 'acom', 'acon', 'acop', 'acounter', 'acr', 'acre', 'actr', 'ad', 'ada', 'adac', 'adad', 'adan', 'adc', 'adco', 'adcon', 'add', 'adde', 'addi', 'addis', 'ade', 'adeu', 'adi', 'adin', 'adir', 'admis', 'admon', 'adob', 'adop', 'adpr', 'adpre', 'adpro', 'adr', 'adre', 'adultr', 'adun', 'ae', 'afor', 'afore', 'ahem', 'ahyp', 'ail', 'aim', 'ain', 'air', 'amacr', 'amal', 'amicr', 'amicro', 'amis', 'amon']
['intersem', 'overre', 'bas', 'exin', 'malpr', 'ba', 'autore', 'forean', 'autop', 'monop', 'bene', 'nonuni', 'hemib', 'semiaut', 'resyn', 'macro', 'intracon', 'syndi', 'superpre', 'per