In [11]:
'''
BT 4 - Validation and reflection.ipynb
Author: Jingchuan Shi
Acknowledgments: Asst. Prof. Ahmed Qureshi
Created 2019/9/10, last modified 2019/9/14 at University of Alberta.
All Rights Reserved.
'''

# Load relevant modules.
import numpy as np
import spacy
nlp = spacy.load("en_vectors") # Model en_vectors_web_lg of SpaCy with a pre-defined shortcut.

In [58]:
# The list of verbs pre-labelled with corresponding Bloom's Taxonomy domains.
knowledge_words = ['list', 'name', 'define', 'repeat', 'state', 'label', 'recall', 'identify', 'reproduce', 'describe', 'recognize', 'select', 'record', 'match', 'relate', 'memorize', 'outline', 'quote', 'enumerate', 'write', 'tell', 'recite', 'cite', 'duplicate', 'read', 'order', 'tabulate', 'draw', 'review', 'indicate', 'underline', 'arrange', 'know', 'point', 'count', 'collect', 'meet', 'study', 'trace', 'find', 'index', 'locate', 'show', 'visualize', 'examine', 'copy', 'sequence', 'acquire', 'retell', 'view', 'observe', 'tally', 'imitate', 'follow']
comprehension_words = ['explain', 'describe', 'discuss', 'paraphrase', 'restate', 'summarize', 'translate', 'convert', 'review', 'express', 'estimate', 'identify', 'generalize', 'interpret', 'locate', 'give', 'distinguish', 'extend', 'predict', 'recognize', 'defend', 'classify', 'infer', 'report', 'illustrate', 'rewrite', 'select', 'contrast', 'differentiate', 'compare', 'indicate', 'exemplify', 'observe', 'elaborate', 'associate', 'visualize', 'articulate', 'clarify', 'subtract', 'approximate', 'interpolate', 'tell', 'detail', 'outline', 'cite', 'picture', 'interact', 'conclude', 'characterize', 'add', 'factor', 'compute', 'match', 'schedule', 'order', 'sketch', 'draw', 'define', 'operate', 'arrange', 'group', 'extrapolate', 'diagram', 'interrelate', 'represent', 'trace', 'shop', 'suggest', 'understand']
application_words = ['demonstrate', 'use', 'apply', 'solve', 'illustrate', 'dramatize', 'practise', 'employ', 'operate', 'sketch', 'prepare', 'show', 'compute', 'relate', 'construct', 'interpret', 'discover', 'change', 'produce', 'manipulate', 'schedule', 'modify', 'predict', 'complete', 'choose', 'classify', 'translate', 'determine', 'examine', 'calculate', 'investigate', 'draw', 'write', 'protect', 'derive', 'chart', 'alphabetize', 'simulate', 'process', 'provide', 'capture', 'project', 'transcribe', 'organize', 'shop', 'establish', 'attain', 'graph', 'assign', 'allocate', 'convert', 'experiment', 'exercise', 'diminish', 'make', 'develop', 'ascertain', 'tabulate', 'depreciate', 'subscribe', 'implement', 'handle', 'transfer', 'factor', 'avoid', 'expose', 'express', 'perform', 'sequence', 'acquire', 'administer', 'personalize', 'adapt', 'plot', 'customize', 'interview', 'paint', 'explore', 'utilize', 'report', 'figure', 'price', 'coordinate', 'simplify', 'consult', 'maintain', 'deliver', 'extend', 'imitate', 'guide', 'conduct', 'multiply', 'build', 'code', 'contribute', 'obtain', 'model', 'compare', 'divide', 'exhibit', 'tally', 'inform', 'diagram', 'expand', 'amend', 'engineer', 'control', 'assess', 'concatenate', 'execute', 'convey', 'articulate', 'restructure', 'criticize', 'appraise', 'participate', 'generalize', 'instruct', 'follow', 'act', 'screen', 'debate', 'question', 'select', 'include', 'dissect', 'retrieve', 'inspect', 'prove', 'inventory', 'respond', 'comply', 'collect']
analysis_words = ['compare', 'contrast', 'distinguish', 'analyze', 'differentiate', 'separate', 'examine', 'diagram', 'infer', 'categorize', 'experiment', 'discriminate', 'select', 'appraise', 'relate', 'test', 'question', 'classify', 'identify', 'outline', 'illustrate', 'subdivide', 'investigate', 'debate', 'criticize', 'calculate', 'inventory', 'prioritize', 'correlate', 'explain', 'inspect', 'detect', 'dissect', 'manage', 'audit', 'characterize', 'order', 'deduce', 'limit', 'connect', 'diagnose', 'document', 'proofread', 'discover', 'ensure', 'optimize', 'maximize', 'confirm', 'divide', 'transform', 'figure', 'prepare', 'file', 'determine', 'train', 'solve', 'survey', 'group', 'minimize', 'interrupt', 'explore', 'blueprint', 'arrange', 'query', 'edit', 'prove', 'isolate', 'reconcile', 'troubleshoot', 'sketch', 'create', 'summarize', 'dramatize', 'employ', 'inquire', 'link', 'abstract', 'establish', 'organize', 'compute', 'devise', 'moderate', 'delegate', 'research', 'model', 'practise', 'operate', 'demonstrate', 'schedule', 'check', 'use', 'chunk', 'choose', 'scrutinize', 'chart', 'apply', 'allow', 'extrapolate', 'recognize', 'show', 'modify', 'administer', 'review', 'change', 'monitor', 'direct', 'corroborate', 'produce', 'negotiate', 'probe', 'accept', 'design', 'interpret', 'extract', 'manipulate', 'focus', 'write', 'predict', 'resolve']
synthesis_words = ['design', 'create', 'formulate', 'plan', 'compose', 'construct', 'develop', 'combine', 'assemble', 'propose', 'devise', 'arrange', 'organize', 'collect', 'rearrange', 'prepare', 'reconstruct', 'invent', 'generate', 'modify', 'write', 'categorize', 'rewrite', 'relate', 'compile', 'revise', 'reorganize', 'summarize', 'manage', 'generalize', 'integrate', 'explain', 'produce', 'originate', 'tell', 'incorporate', 'facilitate', 'hypothesize', 'substitute', 'specify', 'improve', 'format', 'correspond', 'model', 'depict', 'synthesize', 'refer', 'comply', 'enhance', 'import', 'overhaul', 'animate', 'predict', 'adapt', 'cultivate', 'code', 'join', 'handle', 'anticipate', 'portray', 'express', 'budget', 'cope', 'debug', 'perform', 'communicate', 'outline', 'prescribe', 'initiate', 'network', 'program', 'lecture', 'dictate', 'advise', 'document', 'gather', 'derive', 'abstract', 'expand', 'establish', 'collaborate', 'conduct', 'contribute', 'coordinate', 'compare', 'speculate', 'simulate', 'progress', 'forecast', 'instruct', 'structure', 'intervene', 'frame', 'measure', 'estimate', 'recommend', 'negotiate', 'consolidate', 'choose', 'contrast', 'imagine', 'individualize', 'recognize', 'solve', 'roleplay', 'review', 'arbitrate', 'teach', 'supervise', 'assess', 'counsel', 'exchange', 'brief', 'reinforce', 'unify', 'pretend', 'update', 'validate']
evaluation_words = ['judge', 'appraise', 'evaluate', 'support', 'assess', 'select', 'justify', 'compare', 'rate', 'conclude', 'value', 'defend', 'estimate', 'choose', 'critique', 'argue', 'measure', 'recommend', 'discriminate', 'decide', 'interpret', 'criticize', 'contrast', 'rank', 'predict', 'explain', 'summarize', 'score', 'grade', 'revise', 'relate', 'verify', 'test', 'validate', 'attach', 'determine', 'describe', 'convince', 'prescribe', 'consider', 'release', 'counsel', 'hire', 'prioritize', 'deduce', 'enforce', 'advise', 'motivate', 'core', 'uphold', 'resolve', 'reconcile', 'discuss', 'authenticate', 'review', 'monitor', 'weigh', 'debate', 'diagnose', 'infer', 'mediate', 'prove', 'use', 'preserve', 'access', 'consolidate']
wordlists = [knowledge_words, comprehension_words, application_words, analysis_words, synthesis_words, evaluation_words]
namelist = ['knowledge', 'comprehension', 'application', 'analysis', 'synthesis', 'evaluation']
# Paths to related input and output files. Please modify the master path to your own.
master_path = '/Users/ferax/bin/'
result_path = master_path + 'BTresult_verify.txt'

In [59]:
# Initialization.
result = {}
total = 0
subtotals = [0 for i in range(6)]
prob_matrix = [[0 for i in range(6)] for j in range(6)]
# Load classification results.
with open(result_path, 'r') as rf:
    for line in rf.readlines():
        tokens = line.replace('\n', '').split(' ')
        result[tokens[0]] = {'0': 0, '1': 0, '2': 0, '3': 0, '4': 0, '5': 0}
        total += 1
        for i in range(6):
            if str(i) in tokens[1:]:
                result[tokens[0]][str(i)] = 1
                subtotals[i] += 1

In [60]:
# Computation of mutual influence, defined as how more or less likely a word is going to belong to a certain domain, D_j, given that it already belongs to D_i.
for i in range(6):
    for j in range(6):
        if i == j:
            prob_matrix[i][j] = None
        else:
            count_ij = 0
            for word in list(result.keys()):
                if result[word][str(i)] and result[word][str(j)]:
                    count_ij += 1
            prob_matrix[i][j] = round((count_ij * total) / (subtotals[i] * subtotals[j]), 3)
d = [[0 for j in range(3)] for i in range(5)] #Mean / max / min mutual influence between all the domains at distance k in the original taxonomy.
for i in range(5):
    for j in range(5 - i):
        d[i][0] += prob_matrix[j][i + j + 1]
        if j == 0:
            d[i][1] = prob_matrix[j][j + i + 1]
            d[i][2] = prob_matrix[j][j + i + 1]
        else:
            d[i][1] = max(d[i][1], prob_matrix[j][j + i + 1])
            d[i][2] = min(d[i][2], prob_matrix[j][j + i + 1])
    d[i][0] /= 5 - i
for i in range(5):
    for j in range(3):
        d[i][j] = round(d[i][j], 3)

# Presentation of results.
print('Average / Max / Min of mutual influence for cognitive levels at distance k:')
print('k = 1: ', end = '')
print(d[0][0], d[0][1], d[0][2])
print('k = 2: ', end = '')
print(d[1][0], d[1][1], d[1][2])
print('k = 3: ', end = '')
print(d[2][0], d[2][1], d[2][2])
print('k = 4: ', end = '')
print(d[3][0], d[3][1], d[3][2])
print('k = 5: ', end = '')
print(d[4][0], d[4][1], d[4][2])
print('\nMutual influence matrix:')
prob_matrix

Average / Max / Min of mutual influence for cognitive levels at distance k:
k = 1: 0.818 1.532 0.452
k = 2: 1.075 1.783 0.485
k = 3: 0.494 0.716 0.347
k = 4: 0.861 1.327 0.396
k = 5: 0.678 0.678 0.678

Mutual influence matrix:


[[None, 1.532, 0.485, 0.716, 0.396, 0.678],
 [1.532, None, 0.618, 1.783, 0.347, 1.327],
 [0.485, 0.618, None, 0.697, 0.928, 0.419],
 [0.716, 1.783, 0.697, None, 0.793, 1.102],
 [0.396, 0.347, 0.928, 0.793, None, 0.452],
 [0.678, 1.327, 0.419, 1.102, 0.452, None]]

In [61]:
# Expanded lists are core words unioned with newly classified words of the same domain.
expanded_0 = knowledge_words
expanded_1 = comprehension_words
expanded_2 = application_words
expanded_3 = analysis_words
expanded_4 = synthesis_words
expanded_5 = evaluation_words
expanded_lists = [expanded_0, expanded_1, expanded_2, expanded_3, expanded_4, expanded_5]
for word in list(result.keys()):
    for i in range(6):
        if result[word][str(i)]:
            expanded_lists[i].append(word)
virtual_sentences = ['' for i in range(6)]
nlp_objs = []
for i in range(6):
    for word in expanded_lists[i]:
        virtual_sentences[i] += word + ' '
    nlp_objs.append(nlp(virtual_sentences[i]))

# Initialization.
vector_count = np.array([0 for i in range(6)])
vector_mean = np.array([[0 for j in range(300)] for i in range(6)], dtype = np.float64)
vector_dev = np.array([[0 for j in range(6)] for i in range(6)], dtype = np.float64)

# The same thing done in BT 1 - Source verb analysis based on Word Embedding.ipynb, this time on the expanded lists.
for i in range(6):
    varsum = 0
    for word in nlp_objs[i]:
        if word.has_vector == True:
            vector_mean[i] = vector_mean[i] + word.vector
            vector_count[i] += 1
    vector_mean[i] = vector_mean[i] / vector_count[i]
    for word in nlp_objs[i]:
        if word.has_vector == True:
            diff = word.vector - vector_mean[i]
            for entry in diff:
                varsum += entry * entry
    vector_dev[i][i] = np.sqrt(varsum) / vector_count[i]

for i in range(6):
    for j in range(i+1, 6):
        varsum = 0
        diff = vector_mean[i] - vector_mean[j]
        for entry in diff:
            varsum += entry * entry
        vector_dev[i][j] = np.sqrt(varsum)
        vector_dev[j][i] = vector_dev[i][j]

d = np.array([[0 for j in range(3)] for i in range(5)], dtype = np.float64)
for i in range(5):
    for j in range(5 - i):
        d[i][0] += vector_dev[j][j + i + 1]
        if j == 0:
            d[i][1] = vector_dev[j][j + i + 1]
            d[i][2] = vector_dev[j][j + i + 1]
        else:
            d[i][1] = max(d[i][1], vector_dev[j][j + i + 1])
            d[i][2] = min(d[i][2], vector_dev[j][j + i + 1])
    d[i][0] /= (5 - i)
    
# Presentation of results.
print('# of words in each level: ', end = '')
print(vector_count)
print('\nStandard deviation and mean difference matrix:')
print(vector_dev)
print('\nAverage / Max / Min of mean difference for cognitive levels at distance k:')
print('k = 1: ', end = '')
print(d[0][0], d[0][1], d[0][2])
print('k = 2: ', end = '')
print(d[1][0], d[1][1], d[1][2])
print('k = 3: ', end = '')
print(d[2][0], d[2][1], d[2][2])
print('k = 4: ', end = '')
print(d[3][0], d[3][1], d[3][2])
print('k = 5: ', end = '')
print(d[4][0], d[4][1], d[4][2])


# of words in each level: [289 466 827 553 869 505]

Standard deviation and mean difference matrix:
[[0.32826869 1.3981321  1.43794338 1.63332216 1.76944473 1.75853892]
 [1.3981321  0.24800449 1.28503834 1.25316605 1.5057859  1.05397619]
 [1.43794338 1.28503834 0.20452765 0.84101285 0.70213442 1.30977094]
 [1.63332216 1.25316605 0.84101285 0.24863197 1.01251786 1.45819961]
 [1.76944473 1.5057859  0.70213442 1.01251786 0.19980222 1.5221591 ]
 [1.75853892 1.05397619 1.30977094 1.45819961 1.5221591  0.2436866 ]]

Average / Max / Min of mean difference for cognitive levels at distance k:
k = 1: 1.2117720507841834 1.5221591018036675 0.8410128544890878
k = 2: 1.2128608643082242 1.4581996070198378 0.702134423723043
k = 3: 1.4829596649214078 1.6333221581082449 1.3097709386315666
k = 4: 1.411710464003241 1.7694447341009194 1.0539761939055627
k = 5: 1.7585389201479138 1.7585389201479138 1.7585389201479138
