In [2]:
'''
BT 1 - Source verb analysis based on Word Embedding.ipynb
Author: Jingchuan Shi
Acknowledgments: Asst. Prof. Ahmed Qureshi
Created 2019/9/6, last modified 2019/9/17 at University of Alberta.
All Rights Reserved.
'''

# Load relevant modules.
import numpy as np
import spacy
nlp = spacy.load("en_vectors") # Model en_vectors_web_lg of SpaCy with a pre-defined shortcut.

In [7]:
# The list of verbs pre-labelled with corresponding Bloom's Taxonomy domains.
knowledge_words = nlp(r'list name define repeat state label recall identify reproduce describe recognize select record match relate memorize outline quote enumerate write tell recite cite duplicate read order tabulate draw review indicate underline arrange know point count collect meet study trace find index locate show visualize examine copy sequence acquire retell view observe tally imitate follow')
comprehension_words = nlp(r'explain describe discuss paraphrase restate summarize translate convert review express estimate identify generalize interpret locate give distinguish extend predict recognize defend classify infer report illustrate rewrite select contrast differentiate compare indicate exemplify observe elaborate associate visualize articulate clarify subtract approximate interpolate tell detail outline cite picture interact conclude characterize add factor compute match schedule order sketch draw define operate arrange group extrapolate make_sense diagram interrelate represent trace shop suggest understand')
application_words = nlp(r'demonstrate use apply solve illustrate dramatize practise employ operate sketch prepare show compute relate construct interpret discover change produce manipulate schedule modify predict complete choose classify translate determine examine calculate investigate draw write protect derive chart alphabetize simulate process provide capture project transcribe organize shop establish attain graph assign allocate convert experiment exercise diminish make develop ascertain tabulate depreciate subscribe implement handle transfer factor avoid expose express perform sequence acquire administer personalize adapt plot customize interview paint explore utilize report round_off figure price carry_out coordinate simplify consult maintain deliver extend imitate guide back_up conduct multiply build code contribute obtain model compare divide follow_up exhibit tally inform diagram expand amend engineer control assess concatenate execute convey articulate restructure criticize appraise participate generalize instruct follow act screen debate question select include dissect retrieve inspect prove inventory respond comply collect')
analysis_words = nlp(r'compare contrast distinguish analyze differentiate separate examine diagram infer categorize experiment discriminate select appraise relate test question classify identify outline illustrate point out subdivide investigate debate criticize calculate inventory prioritize correlate explain inspect detect dissect manage audit characterize order deduce limit connect diagnose document proofread discover ensure optimize maximize confirm divide transform figure prepare file determine train size_up solve lay_out survey group minimize interrupt explore blueprint arrange query edit prove isolate reconcile troubleshoot sketch create summarize dramatize employ inquire link abstract establish organize compute devise set_up moderate delegate research model practise operate demonstrate schedule check use chunk choose scrutinize chart apply allow extrapolate recognize show modify administer review change monitor direct corroborate produce negotiate probe accept design interpret extract manipulate focus write predict resolve')
synthesis_words = nlp(r'design create formulate plan compose construct develop combine assemble propose devise arrange organize collect rearrange prepare reconstruct invent generate modify write categorize rewrite relate compile revise reorganize set_up summarize manage generalize integrate explain produce originate tell incorporate facilitate hypothesize substitute specify improve format correspond model depict synthesize refer comply enhance import overhaul animate predict adapt cultivate code join handle anticipate portray express budget cope debug perform communicate outline prescribe initiate network program lecture dictate advise document gather derive abstract expand establish collaborate conduct contribute coordinate compare speculate simulate progress forecast instruct structure intervene frame measure estimate recommend negotiate consolidate choose contrast imagine individualize recognize solve roleplay review arbitrate teach supervise assess counsel exchange make_up brief reinforce unify pretend update validate')
evaluation_words = nlp(r'judge appraise evaluate support assess select justify compare rate conclude value defend estimate choose critique argue measure recommend discriminate decide interpret criticize contrast rank predict explain summarize score grade revise relate verify test validate attach determine describe convince prescribe consider release counsel hire prioritize deduce enforce advise motivate core uphold resolve reconcile discuss authenticate review monitor weigh debate diagnose infer mediate prove use preserve access consolidate')
wordlists = [knowledge_words, comprehension_words, application_words, analysis_words, synthesis_words, evaluation_words]
knowledge_high = nlp(r'list name define repeat state label recall identify reproduce describe recognize select record')
knowledge_low = nlp(r'match relate memorize outline quote enumerate write tell recite cite duplicate read order tabulate draw review indicate underline arrange know point count collect meet study trace find index locate show visualize examine copy sequence acquire retell view observe tally imitate follow')
comprehension_high = nlp(r'explain describe discuss paraphrase restate summarize translate convert review express estimate identify generalize interpret locate give distinguish')
comprehension_low = nlp(r'extend predict recognize defend classify infer report illustrate rewrite select contrast differentiate compare indicate exemplify observe elaborate associate visualize articulate clarify subtract approximate interpolate tell detail outline cite picture interact conclude characterize add factor compute match schedule order sketch draw define operate arrange group extrapolate make_sense diagram interrelate represent trace shop suggest understand')
application_high = nlp(r'demonstrate use apply solve illustrate dramatize practise employ operate sketch prepare show compute relate construct interpret')
application_low = nlp(r'discover change produce manipulate schedule modify predict complete choose classify translate determine examine calculate investigate draw write protect derive chart alphabetize simulate process provide capture project transcribe organize shop establish attain graph assign allocate convert experiment exercise diminish make develop ascertain tabulate depreciate subscribe implement handle transfer factor avoid expose express perform sequence acquire administer personalize adapt plot customize interview paint explore utilize report round_off figure price carry_out coordinate simplify consult maintain deliver extend imitate guide back_up conduct multiply build code contribute obtain model compare divide follow_up exhibit tally inform diagram expand amend engineer control assess concatenate execute convey articulate restructure criticize appraise participate generalize instruct follow act screen debate question select include dissect retrieve inspect prove inventory respond comply collect')
analysis_high = nlp(r'compare contrast distinguish analyze differentiate separate examine diagram infer')
analysis_low = nlp(r'categorize experiment discriminate select appraise relate test question classify identify outline illustrate point out subdivide investigate debate criticize calculate inventory prioritize correlate explain inspect detect dissect manage audit characterize order deduce limit connect diagnose document proofread discover ensure optimize maximize confirm divide transform figure prepare file determine train size_up solve lay_out survey group minimize interrupt explore blueprint arrange query edit prove isolate reconcile troubleshoot sketch create summarize dramatize employ inquire link abstract establish organize compute devise set_up moderate delegate research model practise operate demonstrate schedule check use chunk choose scrutinize chart apply allow extrapolate recognize show modify administer review change monitor direct corroborate produce negotiate probe accept design interpret extract manipulate focus write predict resolve')
synthesis_high = nlp(r'design create formulate plan compose construct develop combine assemble propose devise arrange organize collect')
synthesis_low = nlp(r'rearrange prepare reconstruct invent generate modify write categorize rewrite relate compile revise reorganize set_up summarize manage generalize integrate explain produce originate tell incorporate facilitate hypothesize substitute specify improve format correspond model depict synthesize refer comply enhance import overhaul animate predict adapt cultivate code join handle anticipate portray express budget cope debug perform communicate outline prescribe initiate network program lecture dictate advise document gather derive abstract expand establish collaborate conduct contribute coordinate compare speculate simulate progress forecast instruct structure intervene frame measure estimate recommend negotiate consolidate choose contrast imagine individualize recognize solve roleplay review arbitrate teach supervise assess counsel exchange make_up brief reinforce unify pretend update validate')
evaluation_high = nlp(r'judge appraise evaluate support assess select justify compare rate conclude value defend estimate')
evaluation_low = nlp(r'choose critique argue measure recommend discriminate decide interpret criticize contrast rank predict explain summarize score grade revise relate verify test validate attach determine describe convince prescribe consider release counsel hire prioritize deduce enforce advise motivate core uphold resolve reconcile discuss authenticate review monitor weigh debate diagnose infer mediate prove use preserve access consolidate')
wordlists_high = [knowledge_high, comprehension_high, application_high, analysis_high, synthesis_high, evaluation_high]
wordlists_low = [knowledge_low, comprehension_low, application_low, analysis_low, synthesis_low, evaluation_low]

# Initialization.
vector_count = np.array([0 for i in range(6)])
total_count = 0
vector_mean = np.array([[0 for j in range(300)] for i in range(6)], dtype = np.float64)
vector_dev = np.array([[0 for j in range(6)] for i in range(6)], dtype = np.float64)
high_count = np.array([0 for i in range(6)])
high_mean = np.array([[0 for j in range(300)] for i in range(6)], dtype = np.float64)
high_dev = np.array([[0 for j in range(6)] for i in range(6)], dtype = np.float64)
low_count = np.array([0 for i in range(6)])
low_mean = np.array([[0 for j in range(300)] for i in range(6)], dtype = np.float64)
low_dev = np.array([[0 for j in range(6)] for i in range(6)], dtype = np.float64)

# Compute vector averages.
for i in range(6):
    varsum = 0
    for word in wordlists[i]:
        if word.has_vector == True:
            vector_mean[i] = vector_mean[i] + word.vector
            vector_count[i] += 1
            total_count += 1
    vector_mean[i] = vector_mean[i] / vector_count[i]
    for word in wordlists_high[i]:
        if word.has_vector == True:
            high_mean[i] = high_mean[i] + word.vector
            high_count[i] += 1
    high_mean[i] = high_mean[i] / high_count[i]
    for word in wordlists_low[i]:
        if word.has_vector == True:
            low_mean[i] = low_mean[i] + word.vector
            low_count[i] += 1
    low_mean[i] = low_mean[i] / low_count[i]

# Compute between-group differences.
for i in range(6):
    for j in range(i+1, 6):
        varsum = 0
        diff = vector_mean[i] - vector_mean[j]
        for entry in diff:
            varsum += entry * entry
        vector_dev[i][j] = np.sqrt(varsum)
        vector_dev[j][i] = vector_dev[i][j]
        varsum = 0
        diff = high_mean[i] - high_mean[j]
        for entry in diff:
            varsum += entry * entry
        high_dev[i][j] = np.sqrt(varsum)
        high_dev[j][i] = high_dev[i][j]
        varsum = 0
        diff = low_mean[i] - low_mean[j]
        for entry in diff:
            varsum += entry * entry
        low_dev[i][j] = np.sqrt(varsum)
        low_dev[j][i] = low_dev[i][j]
        
# Compare the mean differences between cognitive levels with respect to distance in levels.
d_total = np.array([[0 for j in range(3)] for i in range(5)], dtype = np.float64)
for i in range(5):
    for j in range(5 - i):
        d_total[i][0] += vector_dev[j][j + i + 1]
        if j == 0:
            d_total[i][1] = vector_dev[j][j + i + 1]
            d_total[i][2] = vector_dev[j][j + i + 1]
        else:
            d_total[i][1] = max(d[i][1], vector_dev[j][j + i + 1])
            d_total[i][2] = min(d[i][2], vector_dev[j][j + i + 1])
    d_total[i][0] /= (5 - i)
d_high = np.array([[0 for j in range(3)] for i in range(5)], dtype = np.float64)
for i in range(5):
    for j in range(5 - i):
        d_high[i][0] += high_dev[j][j + i + 1]
        if j == 0:
            d_high[i][1] = high_dev[j][j + i + 1]
            d_high[i][2] = high_dev[j][j + i + 1]
        else:
            d_high[i][1] = max(d[i][1], high_dev[j][j + i + 1])
            d_high[i][2] = min(d[i][2], high_dev[j][j + i + 1])
    d_high[i][0] /= (5 - i)
d_low = np.array([[0 for j in range(3)] for i in range(5)], dtype = np.float64)
for i in range(5):
    for j in range(5 - i):
        d_low[i][0] += low_dev[j][j + i + 1]
        if j == 0:
            d_low[i][1] = low_dev[j][j + i + 1]
            d_low[i][2] = low_dev[j][j + i + 1]
        else:
            d_low[i][1] = max(d[i][1], low_dev[j][j + i + 1])
            d_low[i][2] = min(d[i][2], low_dev[j][j + i + 1])
    d_low[i][0] /= (5 - i)
    
# Presentation of results.
print('Total # of words: ', end = '')
print(total_count)
print('# of words in each level: ', end = '')
print(vector_count)
print('\nMean difference matrix for all words:')
print(vector_dev)
print('\nAverage / Max / Min of mean difference for cognitive levels at distance k (all words):')
print('k = 1: ', end = '')
print(d_total[0][0], d_total[0][1], d_total[0][2])
print('k = 2: ', end = '')
print(d_total[1][0], d_total[1][1], d_total[1][2])
print('k = 3: ', end = '')
print(d_total[2][0], d_total[2][1], d_total[2][2])
print('k = 4: ', end = '')
print(d_total[3][0], d_total[3][1], d_total[3][2])
print('k = 5: ', end = '')
print(d_total[4][0], d_total[4][1], d_total[4][2])
print('\nMean difference matrix for high frequency words:')
print(high_dev)
print('\nAverage / Max / Min of mean difference for cognitive levels at distance k (high frequency words):')
print('k = 1: ', end = '')
print(d_high[0][0], d_high[0][1], d_high[0][2])
print('k = 2: ', end = '')
print(d_high[1][0], d_high[1][1], d_high[1][2])
print('k = 3: ', end = '')
print(d_high[2][0], d_high[2][1], d_high[2][2])
print('k = 4: ', end = '')
print(d_high[3][0], d_high[3][1], d_high[3][2])
print('k = 5: ', end = '')
print(d_high[4][0], d_high[4][1], d_high[4][2])
print('\nMean difference matrix for low frequency words:')
print(low_dev)
print('\nAverage / Max / Min of mean difference for cognitive levels at distance k (low frequency words):')
print('k = 1: ', end = '')
print(d_low[0][0], d_low[0][1], d_low[0][2])
print('k = 2: ', end = '')
print(d_low[1][0], d_low[1][1], d_low[1][2])
print('k = 3: ', end = '')
print(d_low[2][0], d_low[2][1], d_low[2][2])
print('k = 4: ', end = '')
print(d_low[3][0], d_low[3][1], d_low[3][2])
print('k = 5: ', end = '')
print(d_low[4][0], d_low[4][1], d_low[4][2])


Total # of words: 561
# of words in each level: [ 54  69 133 121 118  66]

Mean difference matrix for all words:
[[0.         1.15502887 1.34243391 1.31132155 1.58943319 1.62824838]
 [1.15502887 0.         1.09023041 0.94523899 1.17300731 1.2951828 ]
 [1.34243391 1.09023041 0.         0.64854971 0.66654029 1.2129651 ]
 [1.31132155 0.94523899 0.64854971 0.         0.83013841 1.06498423]
 [1.58943319 1.17300731 0.66654029 0.83013841 0.         1.26755463]
 [1.62824838 1.2951828  1.2129651  1.06498423 1.26755463 0.        ]]

Average / Max / Min of mean difference for cognitive levels at distance k (all words):
k = 1: 0.9983004061053095 1.2675546309113535 0.6485497089239736
k = 2: 1.004799357503706 1.3424339144864632 0.6665402948481126
k = 3: 1.232431319336796 1.3113215461881553 1.1730073119238273
k = 4: 1.4423079905346587 1.5894331860210271 1.2951827950482906
k = 5: 1.6282483789073388 1.6282483789073388 1.6282483789073388

Mean difference matrix for high frequency words:
[[0.         2.3