#### MP based asynchronous testing

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import multiprocessing as mp
import TestPool_Unit
from shutil import copyfile
import numpy as np

In [3]:
from collections import defaultdict

In [4]:
def Evaluate(result_arr):
    print('Files Processed: ', len(result_arr))
    recalls = []
    recalls_of_word = []
    precisions = []
    precisions_of_words = []
    fully_Correct_l = 0
    fully_Correct_w = 0
    for entry in result_arr:
        (word_match, lemma_match, n_dcsWords, n_output_nodes) = entry
        recalls.append(lemma_match/n_dcsWords)
        recalls_of_word.append(word_match/n_dcsWords)

        precisions.append(lemma_match/n_output_nodes)
        precisions_of_words.append(word_match/n_output_nodes)
        if lemma_match == n_dcsWords:
            fully_Correct_l += 1
        if word_match == n_dcsWords:
            fully_Correct_w += 1
    print('Avg. Micro Recall of Lemmas: {}'.format(np.mean(np.array(recalls))))
    print('Avg. Micro Recall of Words: {}'.format(np.mean(np.array(recalls_of_word))))
    print('Avg. Micro Precision of Lemmas: {}'.format(np.mean(np.array(precisions))))
    print('Avg. Micro Precision of Words: {}'.format(np.mean(np.array(precisions_of_words))))
    rl = np.mean(np.array(recalls))
    pl = np.mean(np.array(precisions))
    print('F-Score of Lemmas: ', (2*pl*rl)/(pl+rl))
    print('Fully Correct Lemmawise: {}'.format(fully_Correct_l/len(recalls_of_word)))
    print('Fully Correct Wordwise: {}'.format(fully_Correct_w/len(recalls_of_word)))
    print('[{:0.2f}, {:0.2f}, {:0.2f}, {:0.2f}, {:0.2f}, {:0.2f}, {:0.2f}]'.format(100*np.mean(np.array(recalls)), 100*np.mean(np.array(recalls_of_word)), 100*np.mean(np.array(precisions)), \
           100*np.mean(np.array(precisions_of_words)), 100*(2*pl*rl)/(pl+rl), 100*fully_Correct_l/len(recalls_of_word),\
           100*fully_Correct_w/len(recalls_of_word)))

In [12]:
modelFile = 'outputs/train_t2789415023871/nnet_e1_i400.p' #BR2
_dump = False
if _dump:
    _outFile = 'outputs/dump_predictions/BR2_NLoss'
else:
    _outFile = None

# Backup the model file
copyfile(modelFile, modelFile + '.bk')

# Create Queue, Result array
queue = mp.Queue()
result_arr = []

# Start 6 workers - 8 slows down the pc
proc_count = 6
procs = [None]*proc_count
for i in range(proc_count):
    vpid = i
    procs[i] = mp.Process(target = TestPool_Unit.pooled_Test, args = \
                          (modelFile, vpid, queue, '../NewData/skt_dcs_DS.bz2_4K_bigram_rfe_heldout/', int(9600/proc_count), _dump, _outFile))
# Start Processes
for i in range(proc_count):
    procs[i].start()

Child process with vpid:0, pid:4932 started.
Child process with vpid:1, pid:4933 started.
Child process with vpid:2, pid:4938 started.
Child process with vpid:3, pid:4943 started.
Child process with vpid:4, pid:4944 started.
Child process with vpid:5, pid:4947 started.
Keep Prob = 0.6, Dropout = 0.4
vpid:1: Range is 1600 -> 3200 / 9577
Keep Prob = 0.6, Dropout = 0.4
vpid:5: Range is 8000 -> 9600 / 9577
Keep Prob = 0.6, Dropout = 0.4
vpid:0: Range is 0 -> 1600 / 9577
Keep Prob = 0.6, Dropout = 0.4
vpid:3: Range is 4800 -> 6400 / 9577
Keep Prob = 0.6, Dropout = 0.4
vpid:2: Range is 3200 -> 4800 / 9577
Keep Prob = 0.6, Dropout = 0.4
vpid:4: Range is 6400 -> 8000 / 9577


In [29]:
# Fetch partial results
while not queue.empty():
    result_arr.append(queue.get())
# Evaluate results till now
Evaluate(result_arr)

Files Processed:  8043
Avg. Micro Recall of Lemmas: 0.8647977788996555
Avg. Micro Recall of Words: 0.7351168084504492
Avg. Micro Precision of Lemmas: 0.8200023294654187
Avg. Micro Precision of Words: 0.6978388400928252
F-Score of Lemmas:  0.841804543688
Fully Correct Lemmawise: 0.43777197563098347
Fully Correct Wordwise: 0.18115131169961457
[86.48, 73.51, 82.00, 69.78, 84.18, 43.78, 18.12]


In [28]:
# Check status
for i in range(proc_count):
    p = procs[i]
    print('Process with\t vpid: {}\t ->\t pid: {}\t ->\t running status: {}'.format(i, p.pid, p.is_alive()))

Process with	 vpid: 0	 ->	 pid: 4932	 ->	 running status: False
Process with	 vpid: 1	 ->	 pid: 4933	 ->	 running status: False
Process with	 vpid: 2	 ->	 pid: 4938	 ->	 running status: False
Process with	 vpid: 3	 ->	 pid: 4943	 ->	 running status: False
Process with	 vpid: 4	 ->	 pid: 4944	 ->	 running status: False
Process with	 vpid: 5	 ->	 pid: 4947	 ->	 running status: False


In [27]:
# Properly Join
for i in range(proc_count):
    procs[i].join()

In [26]:
# Force Terminate
for p in procs:
    p.terminate()

In [24]:
np.mean([a[2] for a in result_arr])

6.7729026036644164

In [27]:
result_arr2 = [list(x) for x in result_arr]

recalls = defaultdict(list)
for i in range(len(result_arr2)):
    result_arr2[i][0] /= result_arr2[i][2]
    result_arr2[i][1] /= result_arr2[i][2]
    recalls[result_arr2[i][2]].append(result_arr2[i][1])

for c in sorted(recalls.keys()):
    print('C: {}, Samples: {}, Mean: {}'.format(c, len(recalls[c]), np.mean(recalls[c])))

C: 1, Samples: 199, Mean: 0.7537688442211056
C: 2, Samples: 428, Mean: 0.8259345794392523
C: 3, Samples: 293, Mean: 0.8612059158134243
C: 4, Samples: 260, Mean: 0.8096153846153846
C: 5, Samples: 646, Mean: 0.8328173374613003
C: 6, Samples: 2004, Mean: 0.8733366600133068
C: 7, Samples: 2653, Mean: 0.8951052716601151
C: 8, Samples: 1885, Mean: 0.8915119363395225
C: 9, Samples: 714, Mean: 0.886710239651416
C: 10, Samples: 208, Mean: 0.8764423076923078
C: 11, Samples: 92, Mean: 0.8922924901185771
C: 12, Samples: 75, Mean: 0.8522222222222222
C: 13, Samples: 24, Mean: 0.842948717948718
C: 14, Samples: 19, Mean: 0.8759398496240601
C: 15, Samples: 16, Mean: 0.8333333333333334
C: 16, Samples: 8, Mean: 0.890625
C: 17, Samples: 14, Mean: 0.8403361344537814
C: 18, Samples: 8, Mean: 0.8333333333333334
C: 19, Samples: 5, Mean: 0.8315789473684211
C: 20, Samples: 2, Mean: 0.925
C: 21, Samples: 4, Mean: 0.8809523809523809
C: 22, Samples: 2, Mean: 0.7954545454545454
C: 23, Samples: 3, Mean: 0.7826086956