# Morpheme Finder
[TOC]


## Import & Define Env Variables

In [332]:
from collections import defaultdict
from tqdm import tqdm
from requests import request, ConnectionError
from json import loads
from random import sample
from math import ceil
import pycrfsuite
import re

word_dict = defaultdict(None)
label_func = defaultdict(None)
known_prefixes = set()
known_suffixes = set()

EVQR_AFFIX = '<evqr.affix>'
PREFIX_AND_SUFFIX = '<prefix.and.suffix>'
VOWEL = '<vowel>'
CELEX_WORD_ROOT = '<celex.word.root>'

CROSS_VALIDATION_FOLD = 5

In [333]:
try:
    with open('.env.json') as f:
        ENV_VARIABLES = loads(f.read())
        f.close()
except FileNotFoundError:
    ENV_VARIABLES = {'DATA_DIR': 'C:\\'}
DATA_DIR = ENV_VARIABLES['DATA_DIR']
FTP_DIR = 'http://m106.nthu.edu.tw/~s106062341/morpheme_finder_data/'

### Class Word

In [334]:
class Word:

    @staticmethod
    def create_synonym_postfix(word, delete=None, append=None):
        return f'{word}{f"--{delete}--" if delete is not None else ""}{f"++{append}++" if append is not None else ""}'

    @staticmethod
    def create_synonym_prefix(word, delete=None, append=None):
        return f'{f"--{delete}--" if delete is not None else ""}{f"++{append}++" if append is not None else ""}{word}'

    @staticmethod
    def letter_cmp(a, b):
        divider = 0
        for i, (letter_a, letter_b) in enumerate(zip(a, b)):
            if letter_a != letter_b:
                divider = i
        return min(divider, len(a), len(b))

    def __init__(self, text, affix_list):
        self.text = text
        self.affix_list = affix_list
        self.synonym = defaultdict(None)
        self.label = defaultdict(None)

    @property
    def count(self):
        return sum([c for c in self.synonym.values()])

    def create_label(self, label_name, *args):
        if label_name not in label_func:
            return False
        self.label[label_name] = label_func[label_name](self, *args)
        return True

## Data Accessing
### first provide a method to access files either in local storage or in FTP

In [335]:
def get_file(filename: str, callback: classmethod) -> bool:
    try:
        with open(f'{DATA_DIR}{filename}', 'r') as f:
            callback(f.read())
            f.close()
            return True
    except FileNotFoundError:
        try:
            res = request('GET', f'{FTP_DIR}{filename}')
            res.encoding = 'Big5'
            callback(res.text)
            return True
        except ConnectionError:
            print('HTTP connection failed')
            return False
        except Exception as e:
            print(f'Load failed: {e}')
            return False

### Load Data
includes:
1. *EVQR.word.and.affix.txt'*
2. *prefixes.txt*
3. *suffixes.txt*

In [30]:
def evqr_word_and_suffix_callback(content):
    for line in content.split('\n')[1:-1]:
        word, *affix_list = line.replace('-', '').split(' ')[:-1]
        word_dict[word] = (Word(word, affix_list))
if get_file('EVQR.word.and.affix.txt', evqr_word_and_suffix_callback):
    print('Load EVQR.word.and.affix.txt done')

Load EVQR.word.and.affix.txt done


In [31]:
def prefix_callback(content):
    for line in content.split('\n')[1:-1]:
        known_prefixes.update(filter(lambda x: len(x) > 0, line[:-1].strip().replace('-', '').split(', ')))
if get_file('prefixes.txt', prefix_callback):
    print('Load prefixes done')

Load prefixes & suffixes done


In [336]:
def suffix_callback(content):
    for line in content.split('\n'):
        known_suffixes.update(filter(lambda x: len(x) > 0, line[:-1].strip().replace('-', '').split(', ')))
    
if get_file('suffixes.txt', suffix_callback):
    print('Load suffixes done')

Load suffixes done


In [337]:
bad_celex = []
def celex_word_and_root_callback(content):
    for line in content.split('\r\n'):
        word, *affix_list = line.split(' ')
        if word == ''.join(affix_list):
            word_dict[word] = Word(word, affix_list)
        else:
            bad_celex.append(line)
if get_file('CELEX.word.and.root.txt', celex_word_and_root_callback):
    print(f'Load CELEX.word.and.root.txt done [{len(word_dict.keys())} / {len(bad_celex)}]')

Load CELEX.word.and.root.txt done [11770 / 8296]


In [338]:
def word_roots_callback(content):
    for line in content.split('\n'):
        prefixes = line.split('\t')[0].split(', ')
        prefixes = [re.sub(r'[^A-z]', '', prefix) for prefix in prefixes]
        if '' not in prefixes:
            known_prefixes.update(prefixes)
if get_file('word_roots.txt', word_roots_callback):
    print('Load word_roots.txt done')

Load word_roots.txt done


## Labelize Word
### Mapping Label Function
because different label has its label function respectively

In [339]:
def evqr_affix(word):
    text = word.text
    label = [0] * len(text)
    pos = 0
    for affix in word.affix_list:
        if affix.lower() in text:
            label[text.find(affix, pos)] = 1 if pos != 0 else 0
            pos = text.find(affix, pos) + len(affix)
        else:
            k = Word.letter_cmp(text[pos:], affix)
            if k > 1:
                label[pos] = 1 if pos != 0 else 0
                pos += 1

    return [t for t in zip(text, label)]

def vowel(word):
    vowels = {"a", "e", "i", "o", "u"}
    return [(letter, int(letter in vowels)) for letter in word.text]

def prefix_and_suffix(word):
    word_len = len(word.text)
    label = [0] * word_len

    for i in range(word_len):
        pattern = word.text[:word_len - 1 - i]
        if pattern in known_prefixes:
            label[len(pattern)] = 1

    for i in range(word_len):
        pattern = word.text[i + 1:]
        if pattern in known_suffixes:
            label[i+1] = 1

    return [t for t in zip(word.text, label)]

def celex_word_root(word):
    text = word.text
    label = [0] * len(text)
    pos = 0
    for affix in word.affix_list:
        prev_pos = text.find(affix, pos)
        label[prev_pos] = 1 if pos != 0 else 0
        pos = prev_pos + len(affix)

    return [t for t in zip(text, label)]
        
label_func[EVQR_AFFIX] = evqr_affix
label_func[VOWEL] = vowel
label_func[PREFIX_AND_SUFFIX] = prefix_and_suffix
label_func[CELEX_WORD_ROOT] = celex_word_root
print('Mapping done')

Mapping done


### Create Label for each Word

In [340]:
for word in tqdm(word_dict.values()):
    # if not word.create_label(EVQR_AFFIX):
    #     print('Failed at label with EVQR.affix')
    # if not word.create_label(VOWEL):
    #     print('Failed at label with Vowel')
    # if not word.create_label(PREFIX_AND_SUFFIX):
    #     print('Failed at label with prefix & suffix')
    if not word.create_label(CELEX_WORD_ROOT):
        print('Failed at combining labels')
print('Label done')

100%|██████████| 11770/11770 [00:00<00:00, 186015.25it/s]

Label done





In [341]:
prepared_word = []
for word in tqdm(word_dict.values()):
    prepared_word.append(word.label[CELEX_WORD_ROOT])
for word in prepared_word[:100]:
    print(''.join([w for w, _ in word]), word)

100%|██████████| 11770/11770 [00:00<00:00, 1330861.00it/s]

aback [('a', 0), ('b', 1), ('a', 0), ('c', 0), ('k', 0)]
abaft [('a', 0), ('b', 0), ('a', 1), ('f', 0), ('t', 0)]
abandonment [('a', 0), ('b', 0), ('a', 0), ('n', 0), ('d', 0), ('o', 0), ('n', 0), ('m', 1), ('e', 0), ('n', 0), ('t', 0)]
abasement [('a', 0), ('b', 0), ('a', 0), ('s', 0), ('e', 0), ('m', 1), ('e', 0), ('n', 0), ('t', 0)]
abatement [('a', 0), ('b', 0), ('a', 0), ('t', 0), ('e', 0), ('m', 1), ('e', 0), ('n', 0), ('t', 0)]
abduction [('a', 0), ('b', 0), ('d', 0), ('u', 0), ('c', 0), ('t', 0), ('i', 1), ('o', 0), ('n', 0)]
abeam [('a', 0), ('b', 1), ('e', 0), ('a', 0), ('m', 0)]
abed [('a', 0), ('b', 1), ('e', 0), ('d', 0)]
aberrant [('a', 0), ('b', 0), ('e', 1), ('r', 0), ('r', 0), ('a', 1), ('n', 0), ('t', 0)]
abjection [('a', 0), ('b', 0), ('j', 0), ('e', 0), ('c', 0), ('t', 0), ('i', 1), ('o', 0), ('n', 0)]
abjectly [('a', 0), ('b', 0), ('j', 0), ('e', 0), ('c', 0), ('t', 0), ('l', 1), ('y', 0)]
ablaze [('a', 0), ('b', 1), ('l', 0), ('a', 0), ('z', 0), ('e', 0)]
abnegate




## Training
### features creator
based on
1. prev & after letter

In [342]:
def create_char_features(word, i):
    pre = ''
    suf = ''
    for j in range(i):
        pre += word[j][0]
    for j in range(i, len(word)):
        suf += word[j][0]
    features = [
        'bias',
        'char=' + word[i][0],
        
        f'prefix={1 if pre in known_prefixes else 0}',
        f'suffix={1 if suf in known_suffixes else 0}',
        # f'vowel={1 if word[i][0] in {"a", "e", "i", "o", "u"} else 0}'
    ]
    
    if i >= 1:
        features.extend([
            'char-1=' + word[i-1][0],
            'char-1:0=' + word[i-1][0] + word[i][0],
        ])
    else:
        features.append("BOS")

    if i >= 2:
        features.extend([
            'char-2=' + word[i-2][0],
            'char-2:0=' + word[i-2][0] + word[i-1][0] + word[i][0],
            'char-2:-1=' + word[i-2][0] + word[i-1][0],
        ])
        
    if i + 1 < len(word):
        features.extend([
            'char+1=' + word[i+1][0],
            'char:+1=' + word[i][0] + word[i+1][0],
        ])
    else:
        features.append("EOS")
        
    if i + 2 < len(word):
        features.extend([
            'char+2=' + word[i+2][0],
            'char:+2=' + word[i][0] + word[i+1][0] + word[i+2][0],
            'char+1:+2=' + word[i+1][0] + word[i+2][0],
        ])
    
    return features


def create_word_features(prepared_word):
    return [create_char_features(prepared_word, i) for i in range(len(prepared_word))]


def create_word_labels(prepared_word):
    return [str(part[1]) for part in prepared_word]

### create k-fold cross validation
we split all data into 5 folds here

In [343]:
sample_range = set(range(len(word_dict.values())))
sample_set_size = ceil(len(sample_range) / CROSS_VALIDATION_FOLD)
sample_list = []
selected_samples = set()
for i in range(CROSS_VALIDATION_FOLD - 1):
    samples = set(sample(sample_range, sample_set_size))
    sample_list.append(samples)
    sample_range.difference_update(samples)
sample_list.append(set(sample_range))

### interface of using pycrfsuite

In [344]:
def train(folds):
    trainer = pycrfsuite.Trainer(verbose=False)
    for fold in tqdm(folds):
        for idx in fold:
            trainer.append(create_word_features(prepared_word[idx]),
                           create_word_labels(prepared_word[idx]))

    trainer.set_params({
        'c1': 1.0,
        'c2': 1e-3,
        'max_iterations': 50,
        'feature.possible_transitions': True
    })
    trainer.train('word-segmentation.crfsuite')


def test(fold):
    tagger = pycrfsuite.Tagger()
    tagger.open('word-segmentation.crfsuite')
    score = 0
    bad_data = []
    for word in fold:
        if word != ''.join(word_dict[word].affix_list):
            bad_data.append(word)
            continue
        w = word.replace(" ", "")
        prediction = tagger.tag(create_word_features(w))
        complete = ""
        for i, p in enumerate(prediction):
            if int(p) >= 1:
                complete += " " + w[i]
            else:
                complete += w[i]
        if complete == ' '.join(word_dict[word].affix_list):
            score += 1
        else:
            print(f'{word} -> {complete}, {word_dict[word].affix_list}')
    return score / (len(fold) - len(bad_data)), bad_data



## Implement
run 5 times of train & test here

In [345]:
word_list = list(word_dict.values())
scores = []
all_bad_data = []
for test_set_idx in range(len(sample_list)):
    test_fold = [word_list[idx].text for idx in sample_list[test_set_idx]]
    train_folds = sample_list[:test_set_idx] + sample_list[(test_set_idx+1):]
    train(train_folds)
    score, bad_data = test(test_fold)
    scores.append(score * 100)
    all_bad_data += bad_data
print(f'{scores}\navg = {sum(scores) / len(scores)}, max = {max(scores)}, min = {min(scores)}\nbad data = {len(all_bad_data)}')

100%|██████████| 4/4 [00:01<00:00,  2.94it/s]


really -> re al ly, ['real', 'ly']
ablaze -> ablaze, ['a', 'blaze']
recallable -> recall able, ['re', 'call', 'able']
recharge -> recharge, ['re', 'charge']
recite -> recite, ['re', 'cite']
acclimate -> acclimate, ['ac', 'climate']
accredit -> accredit, ['ac', 'credit']
accustom -> accustom, ['ac', 'custom']
acquit -> acquit, ['ac', 'quit']
acrobatically -> acrobatic al ly, ['acrobat', 'ic', 'ally']
acrobatics -> acrobatic s, ['acrobat', 'ic', 's']
actionable -> action able, ['act', 'ion', 'able']
across -> across, ['a', 'cross']
addition -> addition, ['add', 'ition']
adjournment -> ad journ ment, ['adjourn', 'ment']
reflexive -> re flexive, ['reflex', 'ive']
ado -> ado, ['a', 'do']
adrift -> adrift, ['a', 'drift']
adverbially -> adver bial ly, ['ad', 'verb', 'ial', 'ly']
aerodynamic -> aero dynam ic, ['aero', 'dynamic']
afar -> afar, ['a', 'far']
reissue -> re is sue, ['re', 'issue']
rejoicings -> re joicing s, ['rejoicing', 's']
aforementioned -> a foremention ed, ['a', 'fore', 'ment

  0%|          | 0/4 [00:00<?, ?it/s]


firetrap -> firetrap, ['fire', 'trap']
fisherman -> fisher man, ['fish', 'er', 'man']
fishery -> fish er y, ['fish', 'ery']
flinty -> flin ty, ['flint', 'y']
fluoride -> fluoride, ['fluor', 'ide']
foolscap -> fool scap, ['fool', 's', 'cap']
forefather -> fore fath er, ['fore', 'father']
foremost -> foremost, ['fore', 'most']
forename -> forename, ['fore', 'name']
forestry -> forestry, ['forest', 'ry']
forthright -> forthright, ['forth', 'right']
fourthly -> fourth ly, ['four', 'th', 'ly']
fowlpest -> fowlpest, ['fowl', 'pest']
fractional -> fract ion al, ['fraction', 'al']
freepost -> freepost, ['free', 'post']
freshwater -> fresh wat er, ['fresh', 'water']
frilly -> fril ly, ['frill', 'y']
frontiersman -> front ier sman, ['frontier', 's', 'man']
fruition -> fruition, ['fruit', 'ion']
furtherance -> fur ther ance, ['further', 'ance']
gangster -> gang st er, ['gang', 'ster']
gaseous -> gase ous, ['gas', 'eous']
gaslight -> gaslight, ['gas', 'light']
gentlefolks -> gentle folk s, ['gent

100%|██████████| 4/4 [00:01<00:00,  2.92it/s]


abaft -> abaft, ['ab', 'aft']
reaper -> reaper, ['reap', 'er']
rebukingly -> re buking ly, ['rebuking', 'ly']
rebuttal -> re buttal, ['re', 'butt', 'al']
recantation -> re cant ation, ['recant', 'ation']
recklessness -> reck less ness, ['reckless', 'ness']
accordingly -> according ly, ['accord', 'ing', 'ly']
reconditeness -> re condite ness, ['recondite', 'ness']
recorder -> re cord er, ['record', 'er']
redeploy -> redeploy, ['re', 'deploy']
redeployment -> redeploy ment, ['re', 'deploy', 'ment']
addiction -> add ict ion, ['addict', 'ion']
reefer -> reefer, ['reef', 'er']
addressee -> ad dress ee, ['address', 'ee']
admiralty -> ad miral ty, ['admiral', 'ty']
adrenalin -> adrenalin, ['ad', 'renal', 'in']
refreshingly -> re fre shing ly, ['refreshing', 'ly']
refreshment -> re fresh ment, ['refresh', 'ment']
advancement -> ad vance ment, ['advance', 'ment']
aeronautical -> aeron aut ic al, ['aeronaut', 'ical']
affectionately -> affectionate ly, ['affect', 'ion', 'ate', 'ly']
reindeer -> r

  0%|          | 0/4 [00:00<?, ?it/s]


indubitable -> indubit able, ['in', 'dubitable']
inequity -> inequity, ['in', 'equity']
infertile -> infert ile, ['in', 'fertile']
infidelity -> in fidel ity, ['in', 'fidelity']
informant -> in form ant, ['inform', 'ant']
innately -> in nate ly, ['innate', 'ly']
inorganic -> inorgan ic, ['in', 'organ', 'ic']
inorganically -> inorganic al ly, ['in', 'organ', 'ic', 'ally']
insatiate -> ins atiate, ['in', 'satiate']
insipidly -> in sipid ly, ['insipid', 'ly']
insipidness -> in sipid ness, ['insipid', 'ness']
insistent -> insistent, ['insist', 'ent']
inspectorate -> inspect orate, ['inspect', 'or', 'ate']
instrumentality -> in strument al ity, ['instrument', 'al', 'ity']
integument -> integument, ['in', 'tegument']
intemperately -> intemperate ly, ['in', 'temper', 'ate', 'ly']
intrinsically -> intrin sic al ly, ['intrinsic', 'ally']
inviolate -> inviolate, ['in', 'violate']
involvement -> in volve ment, ['involve', 'ment']
irrational -> ir ration al, ['ir', 'rational']
irrationality -> ir

100%|██████████| 4/4 [00:01<00:00,  2.91it/s]


reaffirm -> reaf firm, ['re', 'af', 'firm']
reafforest -> reaf forest, ['re', 'af', 'forest']
realignment -> realign ment, ['re', 'align', 'ment']
aberrant -> aberrant, ['ab', 'err', 'ant']
reanimate -> reanimate, ['re', 'animate']
abortionist -> abortion ist, ['abort', 'ion', 'ist']
rearm -> rearm, ['re', 'arm']
abreast -> abreast, ['a', 'breast']
rearmost -> rearmost, ['rear', 'most']
absurdly -> ab surd ly, ['absurd', 'ly']
abuse -> abuse, ['ab', 'use']
recapture -> re capt ure, ['re', 'capture']
recreate -> recre ate, ['re', 'create']
recruitment -> re cruit ment, ['recruit', 'ment']
additionally -> addition al ly, ['add', 'ition', 'al', 'ly']
refit -> refit, ['re', 'fit']
reflector -> re flect or, ['reflect', 'or']
adrenal -> adrenal, ['ad', 'renal']
refresher -> re fresh er, ['refresh', 'er']
regimentals -> regimental s, ['regiment', 'al', 's']
aeronautics -> aeron autic s, ['aeronaut', 'ic', 's']
aerospace -> aerospace, ['aero', 'space']
reincarnate -> re in carn ate, ['re', 'in

  0%|          | 0/4 [00:00<?, ?it/s]


indelicate -> in delic ate, ['in', 'delicate']
indent -> ind ent, ['in', 'dent']
indolently -> in dolent ly, ['indolent', 'ly']
inelegantly -> inelegant ly, ['in', 'elegant', 'ly']
infallible -> in fall ible, ['in', 'fallible']
infinite -> in fin ite, ['in', 'finite']
infraction -> in fract ion, ['infract', 'ion']
infrared -> in frared, ['infra', 'red']
ingratiatingly -> in gratiating ly, ['ingratiating', 'ly']
inquiringly -> in quiring ly, ['inquiring', 'ly']
inquisitorial -> in quisitor ial, ['inquisitor', 'ial']
insecure -> insecure, ['in', 'secure']
insentient -> insentient, ['in', 'sentient']
insidiously -> in sidious ly, ['insidious', 'ly']
insistently -> insistent ly, ['insist', 'ent', 'ly']
intercommunicate -> inter communic ate, ['inter', 'communicate']
internally -> internal ly, ['intern', 'al', 'ly']
internee -> inter nee, ['intern', 'ee']
interpretation -> inter pret ation, ['interpret', 'ation']
interpreter -> inter pret er, ['interpret', 'er']
intertwine -> intert wine, 

100%|██████████| 4/4 [00:01<00:00,  2.83it/s]


realign -> realign, ['re', 'align']
reappear -> reappear, ['re', 'appear']
abroad -> abroad, ['a', 'broad']
reassure -> reassure, ['re', 'as', 'sure']
recall -> recall, ['re', 'call']
recklessly -> reck less ly, ['reckless', 'ly']
accompany -> accompany, ['ac', 'company']
acetylene -> acetylene, ['acetyl', 'ene']
recoverable -> re cover able, ['recover', 'able']
recurve -> recurve, ['re', 'curve']
acoustically -> acoustic al ly, ['acoustic', 'ally']
acrobatic -> acrobatic, ['acrobat', 'ic']
redeemable -> re deem able, ['redeem', 'able']
adamantine -> ada mant ine, ['adamant', 'ine']
adamantly -> ada mant ly, ['adamant', 'ly']
redundantly -> redun dant ly, ['redundant', 'ly']
reeducate -> reeducate, ['re', 'educate']
adequately -> ad equate ly, ['adequate', 'ly']
reflection -> re flect ion, ['reflect', 'ion']
admiringly -> ad miring ly, ['admiring', 'ly']
admonishingly -> ad mon ishing ly, ['admonishing', 'ly']
refraction -> re fract ion, ['refract', 'ion']
refractory -> re fract ory, [

  0%|          | 0/4 [00:00<?, ?it/s]


forefinger -> fore fing er, ['fore', 'finger']
foretell -> foretell, ['fore', 'tell']
forever -> forever, ['for', 'ever']
foundry -> foun dry, ['found', 'ry']
foureyes -> foureye s, ['four', 'eyes']
fragrantly -> fra grant ly, ['fragrant', 'ly']
frankincense -> frankincense, ['frank', 'incense']
freelance -> freelance, ['free', 'lance']
freestanding -> freestanding, ['free', 'standing']
freestyle -> freestyle, ['free', 'style']
freethinking -> freethink ing, ['free', 'thinking']
frilled -> frilled, ['frill', 'ed']
frogmarch -> frogmarch, ['frog', 'march']
furthermost -> fur ther most, ['further', 'most']
fusilier -> fusilier, ['fusil', 'ier']
gasworks -> ga sworks, ['gas', 'works']
generically -> generic al ly, ['generic', 'ally']
genitalia -> genitalia, ['genital', 'ia']
gifted -> gifted, ['gift', 'ed']
gigantically -> gigantic al ly, ['gigantic', 'ally']
gloweringly -> glowering ly, ['glower', 'ing', 'ly']
gluey -> gluey, ['glue', 'y']
gooey -> gooe y, ['goo', 'ey']
grandeur -> gran

100%|██████████| 4/4 [00:01<00:00,  2.96it/s]


abed -> abed, ['a', 'bed']
reaphook -> reaphook, ['reap', 'hook']
rearrange -> rearrange, ['re', 'arrange']
absenteeism -> absentee ism, ['absent', 'ee', 'ism']
abysmally -> aby smal ly, ['abysmal', 'ly']
reciprocally -> re ciprocal ly, ['reciprocal', 'ly']
reconcilement -> re concile ment, ['reconcile', 'ment']
acknowledge -> ack nowledge, ['ac', 'knowledge']
acknowledgement -> ack nowledge ment, ['ac', 'knowledge', 'ment']
reduplicate -> red uplic ate, ['re', 'duplicate']
additional -> addition al, ['add', 'ition', 'al']
adventuresome -> adventure some, ['ad', 'venture', 'some']
regain -> regain, ['re', 'gain']
adverbial -> adverb ial, ['ad', 'verb', 'ial']
adversely -> adver se ly, ['adverse', 'ly']
regenerate -> regener ate, ['re', 'generate']
aerodynamically -> aero dynamic al ly, ['aero', 'dynamic', 'ally']
rehabilitate -> re habi litate, ['re', 'habilitate']
reiterate -> reiter ate, ['re', 'iterate']
aforesaid -> a foresaid, ['a', 'fore', 'said']
remissness -> re miss ness, ['re

icepick -> icepick, ['ice', 'pick']
idiotically -> idiotic al ly, ['idiot', 'ic', 'ally']
illegal -> illegal, ['il', 'legal']
illiterate -> il liter ate, ['il', 'literate']
illogically -> illogic al ly, ['il', 'logic', 'al', 'ly']
imbalance -> imbal ance, ['im', 'balance']
immaculately -> im maculate ly, ['immaculate', 'ly']
immigrate -> im mi grate, ['im', 'migrate']
immodest -> im mod est, ['im', 'modest']
immutable -> im mut able, ['im', 'mutable']
impairment -> im pair ment, ['impair', 'ment']
imperiously -> im perious ly, ['imperious', 'ly']
impermanent -> im per manent, ['im', 'permanent']
impersonate -> im per son ate, ['im', 'person', 'ate']
impishly -> im pish ly, ['imp', 'ish', 'ly']
imploringly -> im ploring ly, ['imploring', 'ly']
impolitic -> im polit ic, ['im', 'politic']
importantly -> im portant ly, ['important', 'ly']
imposingly -> im posing ly, ['imposing', 'ly']
impossible -> im poss ible, ['im', 'possible']
impractical -> im pract ic al, ['im', 'practical']
impressi