In [1]:
#Computes the accuracies for the outputs from the EACL 2017 experiments on
#joint incremental utterance segmentation and disfluency detection
#this assumes the experiments are in simple_rnn_disf/rnn_disf_detection/experiments/
from __future__ import division
%matplotlib inline
import pandas as pd
import numpy as np
import sys
from collections import defaultdict
import matplotlib.pyplot as plt

from copy import deepcopy
sys.path.append("../../../../")
# from mumodo.mumodoIO import open_intervalframe_from_textgrid

In [2]:
#add the evaluation module functions
from deep_disfluency.evaluation.disf_evaluation import incremental_output_disfluency_eval_from_file
from deep_disfluency.evaluation.disf_evaluation import final_output_disfluency_eval_from_file
from deep_disfluency.evaluation.eval_utils import get_tag_data_from_corpus_file
from deep_disfluency.evaluation.eval_utils import rename_all_repairs_in_line_with_index
from deep_disfluency.evaluation.eval_utils import sort_into_dialogue_speakers
from deep_disfluency.evaluation.results_utils import convert_to_latex

In [3]:
# Get the locations of all needed files
# Assume we have the incremental output
experiment_dir = "../../../experiments"

partial_words = True  # No partial words in these experiments, removed
if partial_words:
    partial = '_partial'
else:
    partial = ''
#the evaluation files (as text files)
disf_dir = "../../../data/disfluency_detection/switchboard"
disfluency_files = [
                    disf_dir + "/swbd_disf_heldout{}_data_timings.csv".format(partial),
                    disf_dir + "/swbd_disf_test{}_data_timings.csv".format(partial)
                   ]
allsystemsfinal = [
                   ("033/epoch_45", 'RNN (joint task)'),
                   ("034/epoch_37", 'RNN (complex tags) (joint task)'),
                   ("035/epoch_6", 'LSTM (joint task)'),
                   ("036/epoch_15", 'LSTM (complex tags) (joint task)'),
                   ("037/epoch_6", 'LSTM (disf only)'),
                   ("038/epoch_8", 'LSTM (uttseg only)'),
                  ]

In [4]:
div_dir = "../../../data/disfluency_detection/swda_divisions_disfluency_detection"
good_asr_heldout = [line.strip("\n") for line in open(
        "{}/swbd_disf_heldout_ASR_good_ranges.text".format(div_dir))]
good_asr_test = [line.strip("\n") for line in open(
        "{}/swbd_disf_test_ASR_good_ranges.text".format(div_dir))]

# Incremental Evaluation (and creation of final output files)

In [5]:
# create final output files for the final output evaluation (and do incremental evaluation first:
# NB this takes a while! 5-10 mins per system
DO_INCREMENTAL_EVAL = True
VERBOSE = False
if DO_INCREMENTAL_EVAL:
    all_incremental_results = {}
    all_incremental_error_dicts = {}
    for system, system_name in allsystemsfinal:
        print "SYSTEM:", system, system_name
        #if 'complex' in system: break
        for timings_string in [
                               "", 
                               "_timings"
                              ]:  # without and with timings
            print "timings", timings_string!=""
            if timings_string == "_timings" and "disf only" in system_name:
                continue
            hyp_dir = experiment_dir + "/" + system
            #hyp_dir = experiment_dir
            for division, disf_file in zip(["heldout", "test"], disfluency_files):
                print "*" * 30, division, "*" * 30
                IDs, timings, words, pos_tags, labels = get_tag_data_from_corpus_file(disf_file)
                gold_data = {} #map from the file name to the data
                for dialogue,a,b,c,d in zip(IDs, timings, words, pos_tags, labels):
                    # if "asr" in division and not dialogue[:4] in good_asr: continue
                    gold_data[dialogue] = (a,b,c,d)
                inc_filename = hyp_dir + "/swbd_disf_{0}{1}{2}_data_output_increco".format(
                    division, partial, timings_string) + ".text"
                final_output_name = inc_filename.replace("_increco", "_final")
                results, error_analysis = incremental_output_disfluency_eval_from_file(
                                                     inc_filename,
                                                     gold_data,
                                                     utt_eval="disf only" not in system_name,
                                                     error_analysis=True,
                                                     word=True,
                                                     interval=True,
                                                     outputfilename=final_output_name
                                                                                      )
                if VERBOSE:
                    for k,v in results.items():
                        print k,v
                r_key = division + "_" + system + timings_string
                all_incremental_results[r_key] = deepcopy(results)
                if "heldout" in division:
                    # only do the error analyses on the heldout data
                    e_key = division + "_" + system + timings_string
                    all_incremental_error_dicts[e_key] = deepcopy(error_analysis)


SYSTEM: 033/epoch_45 RNN (joint task)
timings False
****************************** heldout ******************************
loading data ../../../data/disfluency_detection/switchboard/swbd_disf_heldout_partial_data_timings.csv
loaded 102 sequences
102 speakers
incremental output disfluency evaluation
word= True interval= True utt_eval= True




writing final output to file ../../../experiments/033/epoch_45/swbd_disf_heldout_partial_data_output_final.text
****************************** test ******************************
loading data ../../../data/disfluency_detection/switchboard/swbd_disf_test_partial_data_timings.csv
loaded 100 sequences
100 speakers
incremental output disfluency evaluation
word= True interval= True utt_eval= True
writing final output to file ../../../experiments/033/epoch_45/swbd_disf_test_partial_data_output_final.text
timings True
****************************** heldout ******************************
loading data ../../../data/disfluency_detection/switchboard/swbd_disf_heldout_partial_data_timings.csv
loaded 102 sequences
102 speakers
incremental output disfluency evaluation
word= True interval= True utt_eval= True
writing final output to file ../../../experiments/033/epoch_45/swbd_disf_heldout_partial_timings_data_output_final.text
****************************** test ******************************
loading

In [6]:
final = "No incremental results here"
if DO_INCREMENTAL_EVAL:
    print all_incremental_results.keys()
    # dummy results in non-joint tasks
    all_incremental_results['test_037/epoch_6']['t_t_detection_t/>_interval'] = float('nan')
    all_incremental_results['test_038/epoch_8']['t_t_detection_<rps_word'] = float('nan')
    all_incremental_results['test_038/epoch_8_timings']['t_t_detection_<rps_word'] = float('nan')
    
    display = dict()
    display['RNN joint task (+timing)'] = all_incremental_results['test_033/epoch_45_timings']
    display['RNN joint task (complex) (+timing)'] = all_incremental_results['test_034/epoch_37_timings']
    display['LSTM joint task (+timing)'] = all_incremental_results['test_035/epoch_6_timings']
    display['LSTM joint task (complex) (+timing)'] = all_incremental_results['test_036/epoch_15_timings']
    display['LSTM single uttseg task (+timing)'] = all_incremental_results['test_038/epoch_8_timings']
    
    display['RNN joint task'] = all_incremental_results['test_033/epoch_45']
    display['RNN joint task (complex)'] = all_incremental_results['test_034/epoch_37']
    display['LSTM joint task'] = all_incremental_results['test_035/epoch_6']
    display['LSTM joint task (complex)'] = all_incremental_results['test_036/epoch_15']
    display['LSTM single disfluency task'] = all_incremental_results['test_037/epoch_6']
    display['LSTM single uttseg task'] = all_incremental_results['test_038/epoch_8']
    
    final = convert_to_latex(display, eval_level=['word'], inc=True, utt_seg=False,
                             only_include=['t_t_detection_t/>_interval',
                                           't_t_detection_<rps_word',
                                           'edit_overhead_rel_word'])
    #final = final.drop(final.columns[[-2]], axis=1)
print "Part of table 4. in the paper"
final

['test_038/epoch_8', 'heldout_036/epoch_15_timings', 'heldout_034/epoch_37', 'heldout_034/epoch_37_timings', 'heldout_033/epoch_45', 'test_035/epoch_6_timings', 'test_037/epoch_6', 'heldout_038/epoch_8_timings', 'test_036/epoch_15_timings', 'heldout_035/epoch_6_timings', 'test_033/epoch_45_timings', 'test_035/epoch_6', 'heldout_038/epoch_8', 'heldout_033/epoch_45_timings', 'heldout_035/epoch_6', 'test_034/epoch_37', 'heldout_037/epoch_6', 'heldout_036/epoch_15', 'test_033/epoch_45', 'test_038/epoch_8_timings', 'test_034/epoch_37_timings', 'test_036/epoch_15']
Part of table 4. in the paper


Unnamed: 0,System (eval. method),TTD$_{tto}$ (time in s),TTD$_{rps}$ (word),EO (word)
0,LSTM joint task (transcript),0.458,1.006,13.089
1,LSTM joint task (+timing) (transcript),0.648,1.003,11.13
2,LSTM joint task (complex) (transcript),0.676,1.084,11.096
3,LSTM joint task (complex) (+timing) (transcript),0.931,1.081,9.231
4,LSTM single disfluency task (transcript),,1.0,7.098
5,LSTM single uttseg task (transcript),0.559,,8.818
6,LSTM single uttseg task (+timing) (transcript),0.835,,6.515
7,RNN joint task (transcript),0.531,1.017,11.5
8,RNN joint task (+timing) (transcript),0.715,1.011,10.357
9,RNN joint task (complex) (transcript),0.799,1.119,9.859


# Final output evaluation

In [9]:
# Get all the final output results, this can take 3 minutes per system
VERBOSE = False
all_results = {}
all_error_dicts = {}
for system, system_name in allsystemsfinal:
    print "SYSTEM:", system, system_name
    for timings_string in ["",
                           "_timings"
                          ]:  # without and with timings
        print "timings", timings_string!=""
        if timings_string == "_timings" and "disf only" in system_name:
            continue
        hyp_dir = experiment_dir
        for division, disf_file in zip(["heldout", "test"],disfluency_files):
            #if division == "heldout":
            #    continue
            print "*" * 30, division, "*" * 30
            IDs, timings, words, pos_tags, labels = get_tag_data_from_corpus_file(disf_file)
            f = open(disf_file)
            f.close()
            gold_data = {} #map from the file name to the data
            for dialogue,a,b,c,d in zip(IDs, timings, words, pos_tags, labels):
                # if "asr" in division and not dialogue[:4] in good_asr: continue
                d = rename_all_repairs_in_line_with_index(list(d))
                gold_data[dialogue] = (a,b,c,d)

            #the below does just the final output evaluation, assuming a final output file, faster
            hyp_file = '{0}/{1}/swbd_disf_{2}{3}{4}_data_output_final.text'.format(
                                                                            hyp_dir,
                                                                            system,
                                                                            division,
                                                                            partial,
                                                                            timings_string)

            word = True  # world-level analyses
            error = True # get an error analysis
            results,speaker_rate_dict,error_analysis = final_output_disfluency_eval_from_file(
                                                        hyp_file,
                                                        gold_data,
                                                        utt_eval="disf only" not in system_name,
                                                        error_analysis=error,
                                                        word=word,
                                                        interval=False,
                                                        outputfilename=None
                                                        )

            #the below does incremental and final output in one, also outputting the final outputs
            #derivable from the incremental output, takes quite a while
            if VERBOSE:
                for k,v in results.items():
                    print k,v
            r_key = division + "_" + system + timings_string
            all_results[r_key] = deepcopy(results)
            if "heldout" in division:
                # only do the error analyses on the heldout data
                e_key = division + "_" + system + timings_string
                all_error_dicts[e_key] = deepcopy(error_analysis)


SYSTEM: 033/epoch_45 RNN (joint task)
timings False
****************************** heldout ******************************
loading data ../../../data/disfluency_detection/switchboard/swbd_disf_heldout_partial_data_timings.csv
loaded 102 sequences
102 speakers
final output disfluency evaluation
word= True interval= False utt_eval= True
word
****************************** test ******************************
loading data ../../../data/disfluency_detection/switchboard/swbd_disf_test_partial_data_timings.csv
loaded 100 sequences
100 speakers
final output disfluency evaluation
word= True interval= False utt_eval= True
word
timings True
****************************** heldout ******************************
loading data ../../../data/disfluency_detection/switchboard/swbd_disf_heldout_partial_data_timings.csv
loaded 102 sequences
102 speakers
final output disfluency evaluation
word= True interval= False utt_eval= True
word
****************************** test ******************************
loading

In [10]:
display = dict()
# fill in dummy values
all_results['test_037/epoch_6']['f1_t/>_word'] = float('nan')
all_results['test_037/epoch_6']['NIST_SU_word'] = float('nan')
#all_results['test_037/epoch_6_timings']['f1_t/>_word'] = 0
#all_results['test_037/epoch_6_timings']['NIST_SU_word'] = 1000

display['RNN joint task (+timing)'] = all_results['test_033/epoch_45_timings']
display['RNN joint task (complex) (+timing)'] = all_results['test_034/epoch_37_timings']
display['LSTM joint task (+timing)'] = all_results['test_035/epoch_6_timings']
display['LSTM joint task (complex) (+timing)'] = all_results['test_036/epoch_15_timings']
display['LSTM single uttseg task (+timing)'] = all_results['test_038/epoch_8_timings']

display['RNN joint task'] = all_results['test_033/epoch_45']
display['RNN joint task (complex)'] = all_results['test_034/epoch_37']
display['LSTM joint task'] = all_results['test_035/epoch_6']
display['LSTM joint task (complex)'] = all_results['test_036/epoch_15']
display['LSTM single disfluency task'] = all_results['test_037/epoch_6']
display['LSTM single uttseg task'] = all_results['test_038/epoch_8']

final = convert_to_latex(display, eval_level=['word'], inc=False, utt_seg=False,
                         only_include=['f1_<rps_word',
                                       'f1_<e_word',
                                       'f1_t/>_word',
                                       'NIST_SU_word'
                                       
                                      ])
#final = final.drop(final.columns[[-2]], axis=1)
print "Part of table 2 in the paper"
final

Part of table 2 in the paper


Unnamed: 0,System (eval. method),$F_{rps}$ (per word),$F_{e}$ (per word),$F_{uttSeg}$ (per word),NIST SU (word)
0,LSTM joint task (transcript),0.713,0.838,0.685,64.639
1,LSTM joint task (+timing) (transcript),0.715,0.848,0.689,57.413
2,LSTM joint task (complex) (transcript),0.642,0.909,0.697,61.895
3,LSTM joint task (complex) (+timing) (transcript),0.652,0.909,0.7,56.987
4,LSTM single disfluency task (transcript),0.713,0.914,,
5,LSTM single uttseg task (transcript),0.0,0.0,0.732,56.493
6,LSTM single uttseg task (+timing) (transcript),0.0,0.0,0.74,50.511
7,RNN joint task (transcript),0.674,0.814,0.629,73.432
8,RNN joint task (+timing) (transcript),0.677,0.819,0.629,68.388
9,RNN joint task (complex) (transcript),0.627,0.907,0.671,64.213


# Joint vs. single task comparison

In [11]:
display = dict()
# fill in dummy values

display['LSTM single task'] = all_results['test_038/epoch_8_timings']
display['LSTM joint task'] = all_results['test_035/epoch_6_timings']
display['LSTM single disfluency task'] = all_results['test_037/epoch_6']


final = convert_to_latex(display, eval_level=['word'], inc=False, utt_seg=False,
                         only_include=['f1_<rps_word',
                                       'f1_<e_word',
                                       'f1_t/>_word',
                                       'NIST_SU_word'
                                       
                                      ])
#final = final.drop(final.columns[[-2]], axis=1)
print "Part of table 2 in the paper"
final

Part of table 2 in the paper


Unnamed: 0,System (eval. method),$F_{rps}$ (per word),$F_{e}$ (per word),$F_{uttSeg}$ (per word),NIST SU (word)
0,LSTM joint task (transcript),0.715,0.848,0.689,57.413
1,LSTM single disfluency task (transcript),0.713,0.914,,
2,LSTM single task (transcript),0.0,0.0,0.74,50.511


# Repair Error Analysis

In [30]:
# rps and rms errors
#Error analyses on exact match ('rms') and getting the right repair start ('rps')
target_tags = ['<rps']

for div,all_error in all_error_dicts.items():
    # print div, type(all_error)
   
    if type(all_error) == bool: continue
    if "test" in div: continue
    #if not 'TTO only' in div or "asr" in div: continue
    for tag, errors in all_error.items():
        if tag not in target_tags:
            continue
        print "*" * 30, div, tag, "*" * 30
        # print errors
        # continue
        #if not 'TTO only' in div or "asr" in div: continue
        error = {"TP" : {}, "FP" : {}, "FN": {} }
        for k,v in errors.items():
            #if k == "FP":
            #    continue
            # print k, len(v)
            typedict = defaultdict(int)
            lendict = defaultdict(int)
            for repair in v:

                #print repair.gold_context
                onset = ""
                if tag == "<rps" or tag == "<rms":
                    
                    
                    for i in range(0,len(repair.gold_context)):
                        if repair.gold_context[i] == "+|+":
                            onset = repair.gold_context[i+1]
                            break

                    word = onset.split("|")[0]
                    #if k == "FP":
                    #    onset = gold_onset
                    if "<e" in onset and not tag == "<e":
                        typedict["<e"]+=1
                    else:
                        if word in ["and","or","but","so","because","that","although"]:
                            typedict["CC"]+=1
                        elif word in ["i","we","they","im","ive","he","she","id"]:
                            typedict["subj"]+=1
                        elif word in ["you","the"] or "$" in word:
                            typedict["proper_other"]+=1
                        elif word in ["yeah","no","okay","yes","right","uh-huh"]:
                            typedict["ack"]+=1
                        elif word in ["it","its"]:
                            typedict["it"]+=1
                        else:
                            typedict[word]+=1
                
                if tag == "<rps" or tag == "<rms": # and not k == 'FP':
                    if k == "TP" and len(repair.reparandumWords) > 8:
                        # should not be getting any over 8 words
                        print "** overlength repair!"
                        print repair
                    lendict[len(repair.reparandumWords) + len(repair.interregnumWords)]+=1
                    repair_type = None
                    if repair.type:
                        repair_type = repair.type 
                        typedict[repair_type]+=1

            error[k]['len'] = deepcopy(lendict)
            error[k]['type'] = deepcopy(typedict)

                
        for mode in ['type', 'len']:
            #q1. THE RECALL RATES FOR VARIOUS GOLD REPAIR TYPES
            print mode, "*" * 30
            tps = error['TP'][mode]
            fns = error['FN'][mode]
            fps = error['FP'][mode]

            total_tps = 0
            total_fns = 0
            total_fps = 0
            top_n = 50
            all_items = list(set(tps.keys() + fns.keys()))
            # print all_items
            for k in sorted(all_items,  reverse=False):
                #print k, "*" * 30
                if mode == 'type' and k not in ["rep", "del", "sub"]:
                    continue
                recall_total = tps[k] + fns[k]
                recall = 0 if tps[k] == 0 else tps[k]/recall_total
                precision_total = tps[k] + fps[k]
                precision = 0 if tps[k] == 0 else tps[k]/precision_total
                fscore = 0 if precision == 0 or recall == 0 else (2 * (precision * recall))/(precision + recall)
                # print k, ':', tps[k], "out of", recall_total
                #print k, ':', tps[k], "out of", precision_total
                total_tps += tps[k]
                total_fns += fns[k]
                total_fps += fps[k]
                print " & ".join([str(k), "({0}/{1})".format(tps[k],recall_total), 
                                  '{0:.3f}'.format(fscore)]) + "\\\\"
                top_n-=1
                if top_n <= 0:
                    break
            print total_tps/(total_fns + total_tps)

            if False:
                #q2. ERROR TYPE SUMMARY
                print "*" * 30
                total = sum(fns.values()+tps.values())

                errormass = 0
                errortotal = 0
                top_n = 20
                for k,v in sorted(tps.items(),key= lambda x: x[1],reverse=True):
                    print k,"&",v,"&",'{0:.2f}'.format(v/total)
                    errormass +=(v/total * 100)
                    errortotal+=v
                    top_n-=1
                    if top_n <= 0: break
                print "total &",errortotal,"&",'{0:.2f}'.format(errormass)

****************************** heldout_035/epoch_6 <rps ******************************
type ******************************
del & (37/132) & 0.438\\
rep & (846/1022) & 0.906\\
sub & (647/1061) & 0.758\\
0.690744920993
len ******************************
0 & (1/1) & 0.006\\
1 & (992/1258) & 0.882\\
2 & (342/531) & 0.784\\
3 & (115/222) & 0.682\\
4 & (48/106) & 0.623\\
5 & (16/50) & 0.485\\
6 & (10/25) & 0.571\\
7 & (4/11) & 0.533\\
8 & (2/6) & 0.500\\
9 & (0/1) & 0.000\\
10 & (0/1) & 0.000\\
11 & (0/2) & 0.000\\
15 & (0/1) & 0.000\\
0.690744920993
****************************** heldout_036/epoch_15 <rps ******************************
type ******************************
del & (21/132) & 0.271\\
rep & (741/1022) & 0.829\\
sub & (470/1061) & 0.563\\
0.556207674944
len ******************************
0 & (0/1) & 0.000\\
1 & (934/1258) & 0.811\\
2 & (215/531) & 0.542\\
3 & (48/222) & 0.348\\
4 & (22/106) & 0.338\\
5 & (8/50) & 0.276\\
6 & (3/25) & 0.214\\
7 & (1/11) & 0.154\\
8 & (1/6) & 0.286\

# Utterance Segmentation Analysis

In [29]:
#Error analyses
error = {"TP" : {}, "FP" : {}, "FN": {} }
 
for div,all_error in all_error_dicts.items():
    if not "35" in div:
        continue
    print div, type(all_error)
    if type(all_error) == bool: continue
    if "test" in div: continue
    #if not 'TTO only' in div or "asr" in div: continue
    for tag,errors in all_error.items():
        print div, tag
        #if not 'TTO only' in div or "asr" in div: continue
        if not tag == "t/>": continue
        for k,v in errors.items():
    
            print ""
            #if k == "FP": continue
            print k, len(v)
            typedict = defaultdict(int)
            lendict = defaultdict(int)
            #print v[0]
            for repair in v:
                #if len(repair)==0: continue
                #print "*"
                #print repair
                #print repair.gold_context
                onset = ""
                if tag == "<rps":
                    
                    for i in rcange(0,len(repair.gold_context)):
                        if repair.gold_context[i] == "+|+":
                            onset = repair.gold_context[i+1]
                            break
                else:
                    gold_onset = ""
                    onset = ""
                    word = ""
                    if len(repair.gold_tags_right_context)>1:
                        gold_onset = repair.gold_tags_right_context[1]
                        onset = repair.tags_right_context[1]
                        word = repair.words_right_context[1]
                    #penult = repair.tags_left_context[-1]
                    #print repair
                    if k == "FP":
                        onset = gold_onset
                    if "<rps" in onset:
                        typedict["<rps"]+=1
                    elif "<e" in onset:
                        typedict["<e"]+=1
                    else:
                        if word in ["and","or","but","so","because","that","although"]:
                            typedict["CC"]+=1
                        elif word in ["i","we","they","im","ive","he","she","id"]:
                            typedict["subj"]+=1
                        elif word in ["you","the"] or "$" in word:
                            typedict["proper_other"]+=1
                        elif word in ["yeah","no","okay","yes","right","uh-huh"]:
                            typedict["ack"]+=1
                        elif word in ["it","its"]:
                            typedict["it"]+=1
                        else:
                            typedict[word]+=1
                         
                
                #if "<t" in repair.gold_context
                #for t in ["tt","cc","ct","tc"]:
                #    if "<" + t + ">" in onset:
                #        typedict[t]+=1
                #        if not t[0]=='t':
                #            print repair
                if tag == "<rps" and not k == 'FP':
                    lendict[repair.type]+=1
                error[k]['len'] = deepcopy(lendict)
                error[k]['type'] = deepcopy(typedict)

#tp = deepcopy(lendict)
#q1. THE RECALL RATES FOR VARIOUS GOLD REPAIRS
tp = error['TP']['len']
print tp
print error['FN']['len']
for k,v in sorted(error['FN']['len'].items()):
    print " & ".join([k, "({0})".format(v + tp[k]), 
                      '{0:.1f}'.format(100 * float(tp[k])/float(v+ tp[k]))]) + "\\\\"

tps = error['TP']['type']
fns = error['FN']['type']
fps = error['FP']['type']

total = sum(fns.values()+tps.values())

errormass = 0
errortotal = 0
for k,v in sorted(fns.items(),key= lambda x: x[1],reverse=True):
    print k,"&",v,"&",'{0:.2f}'.format(v/total * 100)
    errormass +=(v/total * 100)
    errortotal+=v
print "*" * 30
print "total &",errortotal,"&",'{0:.2f}'.format(errormass)

heldout_035/epoch_6 <type 'dict'>
heldout_035/epoch_6 <rps
heldout_035/epoch_6 <rms
heldout_035/epoch_6 <e
heldout_035/epoch_6 t/>

FP 2065

TP 3865

FN 1783
heldout_035/epoch_6_timings <type 'dict'>
heldout_035/epoch_6_timings <rps
heldout_035/epoch_6_timings <rms
heldout_035/epoch_6_timings <e
heldout_035/epoch_6_timings t/>

FP 1276

TP 3442

FN 2206
defaultdict(<type 'int'>, {})
defaultdict(<type 'int'>, {})
ack & 571 & 10.11
CC & 485 & 8.59
<e & 223 & 3.95
<rps & 199 & 3.52
subj & 191 & 3.38
 & 54 & 0.96
proper_other & 52 & 0.92
it & 44 & 0.78
if & 16 & 0.28
what & 14 & 0.25
my & 13 & 0.23
a & 12 & 0.21
thats & 11 & 0.19
do & 10 & 0.18
in & 10 & 0.18
when & 10 & 0.18
there & 9 & 0.16
sure & 8 & 0.14
maybe & 8 & 0.14
at & 8 & 0.14
theyre & 7 & 0.12
really & 7 & 0.12
is & 7 & 0.12
then & 6 & 0.11
yep & 5 & 0.09
actually & 5 & 0.09
to & 5 & 0.09
people & 5 & 0.09
for & 5 & 0.09
how & 5 & 0.09
of & 5 & 0.09
theres & 5 & 0.09
th- & 5 & 0.09
now & 4 & 0.07
where & 4 & 0.07
even & 4 & 0.

In [None]:
 #TODO for paper/future
# - check WER for ASR results and exclude those with high ones given they might have high overlap :(
# - need to adjust the time to detection scores based on the time it comes in from Increco?? 
#      Also for ttdetection can only use word ends unless we re-do the mapping- just needs explanation
# - delayed accuracy based on time, or not bother? do moving window instead and plot this over time- average moving window accuracy
# - error analysis plots
# 036- full task with LSTM- should improve massively over 034, which also needs re-running
# Reproduce 027 (with full training data, efficiently) and re-run with LSTM- not much time.
#Q2 TODO the extent to which the network is memorizing- need to plug these in with the repair gold standards