In [1]:
#This notebook sumarrizes character/word error rates (with 95% CIs) across all held-out data for both train/test partitions

In [2]:
import numpy as np
import scipy.io
import os

#point this towards the top level dataset directory
rootDir = os.path.expanduser('~') + '/handwritingBCIData/'

#point this towards the code directory
repoDir = os.getcwd() + '/'

#defines which datasets to process
dataDirs = ['t5.2019.05.08','t5.2019.11.25','t5.2019.12.09','t5.2019.12.11','t5.2019.12.18',
            't5.2019.12.20','t5.2020.01.06','t5.2020.01.08','t5.2020.01.13','t5.2020.01.15']

#summarize performance for both train/test partitions and for three versions: 
#'Raw' (no language model), 'Bigram LM' (kaldi bigram language model only), and 'Bigram LM + GPT-2 Rescore' (kaldi model + GPT-2)
cvParts = ['HeldOutBlocks', 'HeldOutTrials']
resultsDir = ['RNNTrainingSteps/Step5_RNNInference','RNNTrainingSteps/Step6_ApplyBigramLM','RNNTrainingSteps/Step7_GPT2Rescore']
resultsNames = ['Raw','Bigram LM','Bigram LM + GPT-2 Rescore']

In [3]:
#Summarize character error rate and word error rate across all sessions
for cvPart in cvParts:
    for resultIdx in range(len(resultsDir)):
        allErrCounts = []

        for dataDir in dataDirs:
            filePath = rootDir + resultsDir[resultIdx] + '/' + cvPart + '/' + dataDir + '_errCounts.mat'
            if not os.path.isfile(filePath):
                continue
            
            dat = scipy.io.loadmat(filePath)
            cvPartFile = scipy.io.loadmat(rootDir+'RNNTrainingSteps/trainTestPartitions_'+cvPart+'.mat')
            valIdx = cvPartFile[dataDir+'_test']

            if len(valIdx)==0:
                continue

            valIdx = valIdx[0,:]
            allErrCounts.append(np.stack([dat['charCounts'][0,valIdx],
                                 dat['charErrors'][0,valIdx],
                                 dat['wordCounts'][0,valIdx],
                                 dat['wordErrors'][0,valIdx]],axis=0).T)

        if allErrCounts==[]:
            continue
            
        concatErrCounts = np.squeeze(np.concatenate(allErrCounts, axis=0))
        cer = 100*(np.sum(concatErrCounts[:,1]) / np.sum(concatErrCounts[:,0]))
        wer = 100*(np.sum(concatErrCounts[:,3]) / np.sum(concatErrCounts[:,2]))
        
        #compute 95% CI using bootstrap resampling
        nResamples = 10000
        resampledRates = np.zeros([nResamples,2])
        for n in range(nResamples):
            resampleIdx = np.random.randint(0,concatErrCounts.shape[0],[concatErrCounts.shape[0]])
            resampledRates[n,0] = 100*(np.sum(concatErrCounts[resampleIdx,1]) / np.sum(concatErrCounts[resampleIdx,0]))
            resampledRates[n,1] = 100*(np.sum(concatErrCounts[resampleIdx,3]) / np.sum(concatErrCounts[resampleIdx,2]))

        charCI = np.percentile(resampledRates[:,0],[2.5, 97.5])
        wordCI = np.percentile(resampledRates[:,1],[2.5, 97.5])
        
        print(cvPart + ' - ' + resultsNames[resultIdx])
        print('Character error rate: %1.2f%% (95%% CI = [%1.2f, %1.2f])' % (float(cer), float(charCI[0]), float(charCI[1])))
        print('Word error rate:      %1.2f%% (95%% CI = [%1.2f, %1.2f])' % (float(wer), float(wordCI[0]), float(wordCI[1])))
        print(' ')

HeldOutBlocks - Raw
Character error rate: 5.32% (95% CI = [4.81, 5.86])
Word error rate:      23.28% (95% CI = [21.27, 25.41])
 
HeldOutBlocks - Bigram LM
Character error rate: 1.69% (95% CI = [1.32, 2.10])
Word error rate:      6.10% (95% CI = [4.97, 7.25])
 
HeldOutBlocks - Bigram LM + GPT-2 Rescore
Character error rate: 0.90% (95% CI = [0.62, 1.23])
Word error rate:      3.21% (95% CI = [2.37, 4.11])
 
HeldOutTrials - Raw
Character error rate: 2.78% (95% CI = [2.20, 3.41])
Word error rate:      12.88% (95% CI = [10.28, 15.63])
 
HeldOutTrials - Bigram LM
Character error rate: 0.80% (95% CI = [0.44, 1.22])
Word error rate:      3.64% (95% CI = [2.11, 5.34])
 
HeldOutTrials - Bigram LM + GPT-2 Rescore
Character error rate: 0.34% (95% CI = [0.14, 0.61])
Word error rate:      1.97% (95% CI = [0.78, 3.41])
 
