In [2]:
'''
BT 3 - Model training and classification.ipynb
Author: Jingchuan Shi
Acknowledgments: Qi Ge and Asst. Prof. Ahmed Qureshi
Created 2019/9/8, last modified 2019/9/13 at University of Alberta.
All Rights Reserved.
'''

# Load relevant modules.
from nltk.corpus import wordnet as wn
import numpy as np
import spacy
import json
import time
nlp = spacy.load("en_vectors") # Model en_vectors_web_lg of SpaCy with a pre-defined shortcut.

In [16]:
# List of core verbs and their corresponding weights.
knowledge_words = ['list', 'name', 'define', 'repeat', 'state', 'label', 'recall', 'identify', 'reproduce', 'describe', 'recognize', 'select', 'record', 'match', 'relate', 'memorize', 'outline', 'quote', 'enumerate', 'write', 'tell', 'recite', 'cite', 'duplicate', 'read', 'order', 'tabulate', 'draw', 'review', 'indicate', 'underline', 'arrange', 'know', 'point', 'count', 'collect', 'meet', 'study', 'trace', 'find', 'index', 'locate', 'show', 'visualize', 'examine', 'copy', 'sequence', 'acquire', 'retell', 'view', 'observe', 'tally', 'imitate', 'follow']
knowledge_weights = [20, 18, 16, 15, 15, 14, 14, 13, 12, 12, 11, 10, 10, 9, 9, 9, 6, 6, 6, 6, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
comprehension_words = ['explain', 'describe', 'discuss', 'paraphrase', 'restate', 'summarize', 'translate', 'convert', 'review', 'express', 'estimate', 'identify', 'generalize', 'interpret', 'locate', 'give', 'distinguish', 'extend', 'predict', 'recognize', 'defend', 'classify', 'infer', 'report', 'illustrate', 'rewrite', 'select', 'contrast', 'differentiate', 'compare', 'indicate', 'exemplify', 'observe', 'elaborate', 'associate', 'visualize', 'articulate', 'clarify', 'subtract', 'approximate', 'interpolate', 'tell', 'detail', 'outline', 'cite', 'picture', 'interact', 'conclude', 'characterize', 'add', 'factor', 'compute', 'match', 'schedule', 'order', 'sketch', 'draw', 'define', 'operate', 'arrange', 'group', 'extrapolate', 'diagram', 'interrelate', 'represent', 'trace', 'shop', 'suggest', 'understand']
comprehension_weights = [19, 18, 18, 14, 13, 13, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 8, 8, 7, 7, 7, 6, 5, 5, 5, 5, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
application_words = ['demonstrate', 'use', 'apply', 'solve', 'illustrate', 'dramatize', 'practise', 'employ', 'operate', 'sketch', 'prepare', 'show', 'compute', 'relate', 'construct', 'interpret', 'discover', 'change', 'produce', 'manipulate', 'schedule', 'modify', 'predict', 'complete', 'choose', 'classify', 'translate', 'determine', 'examine', 'calculate', 'investigate', 'draw', 'write', 'protect', 'derive', 'chart', 'alphabetize', 'simulate', 'process', 'provide', 'capture', 'project', 'transcribe', 'organize', 'shop', 'establish', 'attain', 'graph', 'assign', 'allocate', 'convert', 'experiment', 'exercise', 'diminish', 'make', 'develop', 'ascertain', 'tabulate', 'depreciate', 'subscribe', 'implement', 'handle', 'transfer', 'factor', 'avoid', 'expose', 'express', 'perform', 'sequence', 'acquire', 'administer', 'personalize', 'adapt', 'plot', 'customize', 'interview', 'paint', 'explore', 'utilize', 'report', 'figure', 'price', 'coordinate', 'simplify', 'consult', 'maintain', 'deliver', 'extend', 'imitate', 'guide', 'conduct', 'multiply', 'build', 'code', 'contribute', 'obtain', 'model', 'compare', 'divide', 'exhibit', 'tally', 'inform', 'diagram', 'expand', 'amend', 'engineer', 'control', 'assess', 'concatenate', 'execute', 'convey', 'articulate', 'restructure', 'criticize', 'appraise', 'participate', 'generalize', 'instruct', 'follow', 'act', 'screen', 'debate', 'question', 'select', 'include', 'dissect', 'retrieve', 'inspect', 'prove', 'inventory', 'respond', 'comply', 'collect']
application_weights = [18, 17, 17, 17, 15, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
analysis_words = ['compare', 'contrast', 'distinguish', 'analyze', 'differentiate', 'separate', 'examine', 'diagram', 'infer', 'categorize', 'experiment', 'discriminate', 'select', 'appraise', 'relate', 'test', 'question', 'classify', 'identify', 'outline', 'illustrate', 'subdivide', 'investigate', 'debate', 'criticize', 'calculate', 'inventory', 'prioritize', 'correlate', 'explain', 'inspect', 'detect', 'dissect', 'manage', 'audit', 'characterize', 'order', 'deduce', 'limit', 'connect', 'diagnose', 'document', 'proofread', 'discover', 'ensure', 'optimize', 'maximize', 'confirm', 'divide', 'transform', 'figure', 'prepare', 'file', 'determine', 'train', 'solve', 'survey', 'group', 'minimize', 'interrupt', 'explore', 'blueprint', 'arrange', 'query', 'edit', 'prove', 'isolate', 'reconcile', 'troubleshoot', 'sketch', 'create', 'summarize', 'dramatize', 'employ', 'inquire', 'link', 'abstract', 'establish', 'organize', 'compute', 'devise', 'moderate', 'delegate', 'research', 'model', 'practise', 'operate', 'demonstrate', 'schedule', 'check', 'use', 'chunk', 'choose', 'scrutinize', 'chart', 'apply', 'allow', 'extrapolate', 'recognize', 'show', 'modify', 'administer', 'review', 'change', 'monitor', 'direct', 'corroborate', 'produce', 'negotiate', 'probe', 'accept', 'design', 'interpret', 'extract', 'manipulate', 'focus', 'write', 'predict', 'resolve']
analysis_weights = [20, 19, 17, 17, 13, 12, 12, 10, 10, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
synthesis_words = ['design', 'create', 'formulate', 'plan', 'compose', 'construct', 'develop', 'combine', 'assemble', 'propose', 'devise', 'arrange', 'organize', 'collect', 'rearrange', 'prepare', 'reconstruct', 'invent', 'generate', 'modify', 'write', 'categorize', 'rewrite', 'relate', 'compile', 'revise', 'reorganize', 'summarize', 'manage', 'generalize', 'integrate', 'explain', 'produce', 'originate', 'tell', 'incorporate', 'facilitate', 'hypothesize', 'substitute', 'specify', 'improve', 'format', 'correspond', 'model', 'depict', 'synthesize', 'refer', 'comply', 'enhance', 'import', 'overhaul', 'animate', 'predict', 'adapt', 'cultivate', 'code', 'join', 'handle', 'anticipate', 'portray', 'express', 'budget', 'cope', 'debug', 'perform', 'communicate', 'outline', 'prescribe', 'initiate', 'network', 'program', 'lecture', 'dictate', 'advise', 'document', 'gather', 'derive', 'abstract', 'expand', 'establish', 'collaborate', 'conduct', 'contribute', 'coordinate', 'compare', 'speculate', 'simulate', 'progress', 'forecast', 'instruct', 'structure', 'intervene', 'frame', 'measure', 'estimate', 'recommend', 'negotiate', 'consolidate', 'choose', 'contrast', 'imagine', 'individualize', 'recognize', 'solve', 'roleplay', 'review', 'arbitrate', 'teach', 'supervise', 'assess', 'counsel', 'exchange', 'brief', 'reinforce', 'unify', 'pretend', 'update', 'validate']
synthesis_weights = [20, 19, 18, 17, 16, 16, 13, 12, 12, 11, 10, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
evaluation_words = ['judge', 'appraise', 'evaluate', 'support', 'assess', 'select', 'justify', 'compare', 'rate', 'conclude', 'value', 'defend', 'estimate', 'choose', 'critique', 'argue', 'measure', 'recommend', 'discriminate', 'decide', 'interpret', 'criticize', 'contrast', 'rank', 'predict', 'explain', 'summarize', 'score', 'grade', 'revise', 'relate', 'verify', 'test', 'validate', 'attach', 'determine', 'describe', 'convince', 'prescribe', 'consider', 'release', 'counsel', 'hire', 'prioritize', 'deduce', 'enforce', 'advise', 'motivate', 'core', 'uphold', 'resolve', 'reconcile', 'discuss', 'authenticate', 'review', 'monitor', 'weigh', 'debate', 'diagnose', 'infer', 'mediate', 'prove', 'use', 'preserve', 'access', 'consolidate']
evaluation_weights = [21, 17, 17, 15, 15, 14, 14, 13, 13, 12, 10, 10, 10, 9, 9, 9, 9, 9, 8, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
knowledge_words_spacy = nlp(r'list name define repeat state label recall identify reproduce describe recognize select record match relate memorize outline quote enumerate write tell recite cite duplicate read order tabulate draw review indicate underline arrange know point count collect meet study trace find index locate show visualize examine copy sequence acquire retell view observe tally imitate follow')
comprehension_words_spacy = nlp(r'explain describe discuss paraphrase restate summarize translate convert review express estimate identify generalize interpret locate give distinguish extend predict recognize defend classify infer report illustrate rewrite select contrast differentiate compare indicate exemplify observe elaborate associate visualize articulate clarify subtract approximate interpolate tell detail outline cite picture interact conclude characterize add factor compute match schedule order sketch draw define operate arrange group extrapolate diagram interrelate represent trace shop suggest understand')
application_words_spacy = nlp(r'demonstrate use apply solve illustrate dramatize practise employ operate sketch prepare show compute relate construct interpret discover change produce manipulate schedule modify predict complete choose classify translate determine examine calculate investigate draw write protect derive chart alphabetize simulate process provide capture project transcribe organize shop establish attain graph assign allocate convert experiment exercise diminish make develop ascertain tabulate depreciate subscribe implement handle transfer factor avoid expose express perform sequence acquire administer personalize adapt plot customize interview paint explore utilize report figure price coordinate simplify consult maintain deliver extend imitate guide conduct multiply build code contribute obtain model compare divide exhibit tally inform diagram expand amend engineer control assess concatenate execute convey articulate restructure criticize appraise participate generalize instruct follow act screen debate question select include dissect retrieve inspect prove inventory respond comply collect')
analysis_words_spacy = nlp(r'compare contrast distinguish analyze differentiate separate examine diagram infer categorize experiment discriminate select appraise relate test question classify identify outline illustrate subdivide investigate debate criticize calculate inventory prioritize correlate explain inspect detect dissect manage audit characterize order deduce limit connect diagnose document proofread discover ensure optimize maximize confirm divide transform figure prepare file determine train solve survey group minimize interrupt explore blueprint arrange query edit prove isolate reconcile troubleshoot sketch create summarize dramatize employ inquire link abstract establish organize compute devise moderate delegate research model practise operate demonstrate schedule check use chunk choose scrutinize chart apply allow extrapolate recognize show modify administer review change monitor direct corroborate produce negotiate probe accept design interpret extract manipulate focus write predict resolve')
synthesis_words_spacy = nlp(r'design create formulate plan compose construct develop combine assemble propose devise arrange organize collect rearrange prepare reconstruct invent generate modify write categorize rewrite relate compile revise reorganize summarize manage generalize integrate explain produce originate tell incorporate facilitate hypothesize substitute specify improve format correspond model depict synthesize refer comply enhance import overhaul animate predict adapt cultivate code join handle anticipate portray express budget cope debug perform communicate outline prescribe initiate network program lecture dictate advise document gather derive abstract expand establish collaborate conduct contribute coordinate compare speculate simulate progress forecast instruct structure intervene frame measure estimate recommend negotiate consolidate choose contrast imagine individualize recognize solve roleplay review arbitrate teach supervise assess counsel exchange brief reinforce unify pretend update validate')
evaluation_words_spacy = nlp(r'judge appraise evaluate support assess select justify compare rate conclude value defend estimate choose critique argue measure recommend discriminate decide interpret criticize contrast rank predict explain summarize score grade revise relate verify test validate attach determine describe convince prescribe consider release counsel hire prioritize deduce enforce advise motivate core uphold resolve reconcile discuss authenticate review monitor weigh debate diagnose infer mediate prove use preserve access consolidate')
wordlists = [knowledge_words, comprehension_words, application_words, analysis_words, synthesis_words, evaluation_words]
wordlists_spacy = [knowledge_words_spacy, comprehension_words_spacy, application_words_spacy, analysis_words_spacy, synthesis_words_spacy, evaluation_words_spacy]
weights = [knowledge_weights, comprehension_weights, application_weights, analysis_weights, synthesis_weights, evaluation_weights]
namelist = ['knowledge', 'comprehension', 'application', 'analysis', 'synthesis', 'evaluation']
# Paths to related input and output files. Please modify the master path to your own.
master_path = '/Users/ferax/bin/'
new_source_path = master_path + 'BTverblist_new.txt'
result_path = master_path + 'BTclassification_result.txt'
result_verify_path = master_path + 'BTresult_verify.txt'

In [4]:
# This simple cost is based on the differences between resultant acceptance/entry rates and expected rates.
# Feel free to define your own cost function.
def Cost(acc, ent):
    cost = w1 * (acc - a_0) * (acc - a_0) + w2 * (ent - e_0) * (ent - e_0)
    return cost

In [5]:
# For sorting purposes only.
def getkey1(somelist):
    return somelist[0]

In [6]:
# For sorting purposes only.
def getkey2(somelist):
    return somelist[1]

In [31]:
# Compute similarity between two individual words, currently supporting vector and wordnet hypernym tree similarity.
# Notice each wordnet similarity costs about 10 times more time to compute than its vector counterpart.
def Similarity(target, core, method = 'word_embedding'):
    score = 0
    if method == 'word_embedding':
        if target.has_vector:
            score = target.similarity(core) # Cosine similarity.
    elif method == 'wordnet':
        for syn1 in wn.synsets(target.text, pos = 'v'):
            for syn2 in wn.synsets(core.text, pos = 'v'):
                score = max(score, syn1.wup_similarity(syn2)) # The synset with best similarity is chosen as representative.
    else:
        pass
    return score

In [8]:
# Determines whether a word belongs to each of the 6 domains, based on the list of individual similarities.
# Threshold, K and cutoff are the three main parameters of this model.
def Compute_final_score(scores, threshold, K, cutoff):
    final_score = [0 for i in range(6)]
    neighbor_count = 0
    for score in sorted(scores, key = getkey1, reverse = True):
        if neighbor_count >= K:
            break
        if score[0] >= threshold:
            final_score[score[1]] += score[2]
            neighbor_count += 1
    for i in range(6):
        if final_score[i] >= cutoff:
            final_score[i] = 1
        else:
            final_score[i] = 0
    return final_score

In [32]:
# Run a single classification session.
def Session(data, THRESHOLD, K, CUTOFF, save = False):
    if save:
        print('Classification started.')
    else:
        print('\nNew session started.')
    print('Parameters: threshold = {0}, K = {1}, cutoff = {2}'.format(THRESHOLD, K, CUTOFF))
    
    # Initialization.
    start = time.time()
    result = {} # The variable 'result' is a dict with each of its value itself being a dict.
    accept_count = 0 # Acceptance rate is the proportion of words that get admitted into at least one domain.
    entry_count = 0 # Entry rate is the average number of domains a word belongs to.
    entries = [0 for i in range(7)]
    gains = [0 for i in range(6)]
    processed = 0
    
    # The core classification process, applied to each verb in sequential order.
    for verb in data:
        result[verb.text] = dict.fromkeys(namelist) # The keys of 'result' are words to be classified.
        subscores = []
        accepted = 0
        entry = 0
        for i in range(6): # Compute similarity between the target word and each of the core verbs.
            for j in range(len(wordlists_spacy[i])):
                subscores.append([Similarity(verb, wordlists_spacy[i][j]), i, weights[i][j]])
        final_score = Compute_final_score(subscores, THRESHOLD, K, CUTOFF)
        for i in range(6): # Record result.
            if final_score[i]:
                result[verb.text][namelist[i]] = 1 # result[verb.text] is a dict with 6 entries, corresponding to 6 classification results of the word.
                accepted = 1
                entry += 1
                gains[i] += 1
            else:
                result[verb.text][namelist[i]] = 0
        accept_count += accepted
        entry_count += entry
        entries[entry] += 1
        processed += 1
    acceptance_rate = accept_count / processed
    entry_rate = entry_count / processed
    
    # Save results into files or print them.
    if save: # No training.
        print('\nClassification complete.')
    else:
        print('\nSession complete.')
    print('Acceptance rate: {}'.format(acceptance_rate))
    print('Entry rate: {}'.format(entry_rate))
    if save:
        for i in range(7):
            print('# of words with {0} entries: {1}'.format(i, entries[i]))
        for i in range(6):
            print('# of entries acquired by domain {0}: {1}'.format(namelist[i], gains[i]))
        with open(result_path, 'w') as rf: # rf is the final result of the session written in json format for better legibility.
            rf.write(json.dumps(result, indent = 4))
        with open(result_verify_path, 'w') as rvf: # rvf records the exact same thing, but more concisely. It's used for validation only.
            for final_word in list(result.keys()):
                rvf.write(final_word)
                for i in range(6):
                    if result[final_word][namelist[i]]:
                        rvf.write(' ' + str(i))
                rvf.write('\n')
    else: # Training.
        cost = Cost(acceptance_rate, entry_rate)
        print('Cost function: {}'.format(cost))
    finish = time.time()
    print('Time elapsed: {} seconds'.format(finish - start))
    if save == False:
        return cost

In [33]:
# The training function based on linear search.
def Train(data, threshold, K, cutoff, cost_record, threshold_step):
    t_c = threshold
    t_p = threshold + threshold_step
    t_m = threshold - threshold_step
    k_c = K
    k_p = K + 1 # Notice that K and cutoff can only take nonnegative integer values.
    k_m = K - 1
    c_c = cutoff
    c_p = cutoff + 1
    c_m = cutoff - 1
    todo_list = [(t_c, k_c, c_c), (t_p, k_c, c_c), (t_m, k_c, c_c), (t_c, k_p, c_c), (t_c, k_m, c_c), (t_c, k_c, c_p), (t_c, k_c, c_m), (t_c, k_p, c_p), (t_c, k_m, c_p), (t_c, k_p, c_m), (t_c, k_m, c_m)]
    local_record = []
    for param_set in todo_list: # Run a session for each parameter set that have unknown cost function.
        if param_set in list(cost_record.keys()):
            local_record.append([param_set, cost_record[param_set]])
        else:
            newcost = Session(data, param_set[0], param_set[1], param_set[2])
            cost_record[param_set] = newcost
            local_record.append([param_set, newcost])
    local_record = sorted(local_record, key = getkey2) 
    champion = local_record[0] # Record and return the parameter set with lowest cost function.
    print('\nOptimal parameters for this epoch: threshold = {0}, K = {1}, cutoff = {2} with cost function {3}'.format(champion[0][0], champion[0][1], champion[0][2], champion[1]))
    return champion[0]

In [34]:
# Load data and preprocess with wordnet. Wordnet is used to filter rubbish words out because en_vectors includes vectors even for some of the rubbish.
with open(new_source_path, 'r') as f:
    verbs = [line.replace('\n', '') for line in f.readlines()]
wn_verbs = [] 
for verb in verbs:
    if wn.synsets(verb, pos = 'v') != []:
        wn_verbs.append(verb)
print('# of words entered: {}'.format(len(verbs)))
print('# of verbs recognized by WordNet: {}'.format(len(wn_verbs)))
virtual_sentence = ''
for wn_verb in wn_verbs:
    virtual_sentence += wn_verb
    virtual_sentence += ' '
spacy_obj = nlp(virtual_sentence)
valid_count = 0
valid_verbs = []
for word in spacy_obj:
    if word.has_vector:
        valid_count += 1
        valid_verbs.append(word)
print('# of verbs eligible for classification: {}'.format(valid_count))

# of words entered: 6124
# of verbs recognized by WordNet: 3079
# of verbs eligible for classification: 3058


In [17]:
# Trains the model and run an extra session that saves the classification result with the locally best parameters with respect to the cost function.
print('Training started.')
cost_record = {}
previous = (THRESHOLD_0, K_0, CUTOFF_0)
threshold_step = threshold_step_0
threshold = THRESHOLD_0
K = K_0
cutoff = CUTOFF_0

while threshold_step >= epsilon:
    step_result = Train(valid_verbs, threshold, K, cutoff, cost_record, threshold_step)
    threshold = step_result[0]
    K = step_result[1]
    cutoff = step_result[2]
    if previous == step_result:
        threshold_step *= 0.618
    previous = step_result

print('\nTraining complete. Current parameters: threshold = {0}, K = {1}, cutoff = {2}'.format(threshold, K, cutoff))
Session(valid_verbs, threshold, K, cutoff, save = True)

Training started.

New session started.
Parameters: threshold = 0.24, K = 16, cutoff = 17

Session complete.
Acceptance rate: 0.7854807063440157
Entry rate: 1.2442773054283847
Cost function: 11
Time elapsed: 167.89760208129883 seconds

New session started.
Parameters: threshold = 0.27999999999999997, K = 16, cutoff = 17

Session complete.
Acceptance rate: 0.7223675604970569
Entry rate: 1.1412688031393068
Cost function: 11
Time elapsed: 148.40939378738403 seconds

New session started.
Parameters: threshold = 0.19999999999999998, K = 16, cutoff = 17

Session complete.
Acceptance rate: 0.8260300850228908
Entry rate: 1.3106605624591237
Cost function: 11
Time elapsed: 152.5761752128601 seconds

New session started.
Parameters: threshold = 0.24, K = 17, cutoff = 17

Session complete.
Acceptance rate: 0.8113145846958797
Entry rate: 1.3570961412688032
Cost function: 10
Time elapsed: 150.34040594100952 seconds

New session started.
Parameters: threshold = 0.24, K = 15, cutoff = 17

Session comp


Session complete.
Acceptance rate: 0.8109875735775016
Entry rate: 1.355461085676913
Cost function: 10
Time elapsed: 156.5937488079071 seconds

New session started.
Parameters: threshold = 0.23947403570017667, K = 17, cutoff = 17

Session complete.
Acceptance rate: 0.8132766514061478
Entry rate: 1.3597122302158273
Cost function: 10
Time elapsed: 159.99266600608826 seconds

Optimal parameters for this round: threshold = 0.24, K = 17, cutoff = 17 with cost function 10

New session started.
Parameters: threshold = 0.24032504593729082, K = 17, cutoff = 17

Session complete.
Acceptance rate: 0.8109875735775016
Entry rate: 1.356115107913669
Cost function: 10
Time elapsed: 156.38572788238525 seconds

New session started.
Parameters: threshold = 0.23967495406270917, K = 17, cutoff = 17

Session complete.
Acceptance rate: 0.8122956180510137
Entry rate: 1.3587311968606932
Cost function: 10
Time elapsed: 157.20842385292053 seconds

Optimal parameters for this round: threshold = 0.24, K = 17, cuto

KeyboardInterrupt: 

In [59]:
testnlp = nlp(r'contrast differentiate list outline translate investigate evaluate grade')
testnlp[2].similarity(testnlp[0])

0.04791261