In [1]:
from tqdm import tqdm, trange
import numpy as np
import copy

data_dir = "/home/jack/Desktop/NN/clean/datasets/yelp"

In [2]:
def process_file(input_file, output_file):
    """
    The function process the data files for Delete & Generate and convert
    it for the Delete, Retrieve and Generate training by separating the content
    and attributes. It includes all the attribure words.
    
    Input_file: string : Path of the input file
    Output_file: string : Path of the output file 
    """
    
    with open(input_file) as fp:
        data = fp.read().splitlines()
    with open (output_file,"w") as out_fp:
        for x in tqdm(data):
            temp = x.split("<START>")
            con = temp[0].replace("<POS>","").replace("<NEG>","").replace("<CON_START>","")
            sen = temp[1].replace("<END>","")
            lt1 = con.split()
            lt2 = sen.split()
            att_tokens = [z for z in lt2 if z not in lt1]
            max_atts = 0
            if len(att_tokens) > max_atts:
                max_atts = len(att_tokens)
            att_words = " ".join(att_tokens)
            out_str = "<ATTR_WORDS> " + att_words + " <CON_START> " + con.strip() + " <START> " + sen.strip() + " <END>" + "\n"
            out_fp.write(out_str)
    

In [221]:
def process_file_v1(input_file, output_file, test = False):
    """
    The function process the data files for Delete & Generate and convert
    it for the Delete, Retrieve and Generate training by separating the content
    and attributes. 
    It randomly picks 70% of the attributes only to make the generation
    more realistic instead of just filling the blanks, which helps while generating
    sentences for test cases.
    
    Input_file: string : Path of the input file
    Output_file: string : Path of the output file 
    """
    with open(input_file) as fp:
        data = fp.read().splitlines()
    with open (output_file,"w") as out_fp:
        for x in tqdm(data):
            temp = x.split("<START>")
            con = temp[0].replace("<POS>","").replace("<NEG>","").replace("<CON_START>","")
            sen = temp[1].replace("<END>","")
            #print("con", con)
            #print("sen", sen)
            lt1 = con.split()
            org_lt1 = copy.deepcopy(lt1)
            lt2 = sen.split()
           
            
            att_words = [z for z in lt2 if z not in lt1]
            att_words = list(reversed(sorted(att_words, key=len)))
            index_att = []

            #print(len(att_words),att_words)
            #print(len(lt1),lt1)
            #print(len(lt2),lt2)
            
            # Don't put special char in attribute words. Reduce att -> reduce confusion
            my_att = []

            special_characters =  "!@#$%^&*()-+?_=<>/\'\'"
            for index, word  in enumerate(lt2) :
                if word in special_characters:
                    continue
                if word in lt1:
                    continue
                if any(c in special_characters for c in word) and len(word) < 3:
                    continue
                if "-" in word:
                    splitted_words = word.split("-")
                    if splitted_words[0] in lt1 and splitted_words[1] in lt1:
                        continue
                if any(c in special_characters for c in word) and len(word) < 4:
                    splitted_words = word.split("\'")
                    if len(splitted_words)>0 and splitted_words[0] in lt1 and splitted_words[1] in lt1:
                        continue
                # Attribute found
                my_att.append(word)
                # Remember index
                index_att.append(index)
            

            att_words = my_att

            #print("lenlt1", len(lt1), len(att_words))
            #print("lenlt2", len(lt2))
            #if len(att_words) != len(my_att):
            #print(len(att_words) != len(my_att))
            #print(len(att_words),att_words)
            #print(len(my_att),my_att)

            


                
                #if lt1[replace_index - 1] == lt2[replace_index - 1] and lt1[replace_index + 1] == lt2[replace_index + 1]:
                    #print("AAAAAAAAA") 
                    #print(lt2[replace_index]) 
                    #print(lt1[replace_index]) 
                    #lt1.insert(replace_index, "<REPLACE>")
                #print(replace_index)
                

            set_replace_tokens(lt1, lt2, my_att, index_att)

            if len(att_words) > 2:
                indx = np.array(list(range(len(att_words))))
                # Pref delete short words
                att_words = " ".join([att_words[indx[k]] for k in range(int(0.7 * len(att_words)))])
                #np.random.shuffle(indx)
            else: # If attributes less than 2 then keep all the attributes
                att_words = " ".join(att_words)
            #print("att_words", att_words)
            #print("sen: ", sen)
            #print("con: ", con)


            # TODO HERE MODIFIY CON with <REPLACE> to show position of word
            # Also put Token <REPLACE> in reference file 
            if(test):
                out_str = "<ATTR_WORDS> " + att_words + " <CON_START> " +  " ".join(lt1).strip() + " <START> " + "\n"
            else:
                out_str = "<ATTR_WORDS> " + att_words + " <CON_START> " +  " ".join(lt1).strip() + " <START> " + sen.strip() + " <END>"  + "\n"
            out_fp.write(out_str)

def set_replace_tokens(content_list, full_list, att_list, index_att_list, second_try = False):
    #print(content_list, full_list, att_list, index_att_list)
    street_index = 0
    last_replace_index = None
    insert_index = None
    for index, replace_index in enumerate(index_att_list):
        # Catch street 
        if last_replace_index == replace_index - 1:
            content_list.insert(replace_index, "<REPLACE>")
            last_replace_index = replace_index
            continue

        last_replace_index = replace_index
        # Catch first street if starts with 0
        if replace_index == street_index:
            street_index += 1
            content_list.insert(replace_index, "<REPLACE>")
            continue

        
            
        # TODO DEbug that after remove
        #out_of_index = False
        #if replace_index+1 >= len(content_list):
        #    out_of_index = True
        #    continue
        left_matches = []
        right_matches = []
        left_word_query = None
        right_word_query = None
        
        if replace_index-1 >= 0:
            left_word_query = full_list[replace_index-1]
            left_matches = [(index,x) for index,x in enumerate(content_list) if x == left_word_query]

        if replace_index+1 < len(full_list):
            right_word_query = full_list[replace_index+1]
            right_matches = [(index,x) for index,x in enumerate(content_list) if x == right_word_query]

        if len(left_matches) == 1:
            insert_index = left_matches[0][0]+1
            content_list.insert(insert_index, "<REPLACE>")

            #for x in range(get_street_length(index_att_list, replace_index)):
               # replace_index += 1
               # insert_index += 1
               # content_list.insert(insert_index, "<REPLACE>")
            continue
        
        if len(right_matches) == 1:
            insert_index = right_matches[0][0]
            content_list.insert(insert_index, "<REPLACE>")            
            continue
            #print("RIGHT")


        left_concat_matches = []
        right_concat_matches = []
        if left_word_query != None:
            left_concat_matches = [(index,x) for index,x in enumerate(content_list) if index-1 > -1 and content_list[index-1]+content_list[index] == left_word_query]
        if right_word_query != None:
            right_concat_matches = [(index,x) for index,x in enumerate(content_list) if index+1 < len(content_list) and content_list[index]+content_list[index+1] == right_word_query]
        
        if len(left_concat_matches) == 1: 
            insert_index = left_concat_matches[0][0]+1
            content_list.insert(insert_index, "<REPLACE>")
            continue

        if len(right_concat_matches) == 1: 
            insert_index = right_concat_matches[0][0]
            content_list.insert(insert_index, "<REPLACE>")
            continue


        left_shifted_match = []
        right_shifted_matches = []
        if replace_index-2 >= 0:
            left_word_query_shift = full_list[replace_index-2]
            left_shifted_match = [(index,x) for index,x in enumerate(content_list) if index-1 > -1 and content_list[index-1] == left_word_query_shift]
        if replace_index+2 < len(full_list):
            right_word_query_shift = full_list[replace_index+2]
            right_shifted_matches = [(index,x) for index,x in enumerate(content_list) if index+1 < len(content_list) and content_list[index+1] == right_word_query_shift]

        if len(left_shifted_match) == 1: 
            insert_index = left_shifted_match[0][0]+1
            content_list.insert(insert_index, "<REPLACE>")
            continue

        if len(right_shifted_matches) == 1: 
            insert_index = right_shifted_matches[0][0]
            content_list.insert(insert_index, "<REPLACE>")
            continue

        # insert street check routine

        if len(left_matches) == 0 and len(right_matches) == 0:
            print("NONE", index)
            print(att_list)
            print(index_att_list)
            print(content_list)
            print(full_list)
            #if second_try == False:
                #set_replace_tokens(content_list, full_list, list(reversed(att_list)), list(reversed(index_att_list)), True)

def next_is_street(index_att_list, current_index):
    #print(len(index_att_list) > current_index and  index_att_list[current_index] == index_att_list[current_index] + 1)

    if len(index_att_list) > current_index+1 and  index_att_list[current_index] + 1 == index_att_list[current_index+1]:
        return True
    else:
        return False

def get_street_length(index_att_list, current_index):
    counter = 0
    while len(index_att_list) > current_index+1 and  index_att_list[current_index] + 1 == index_att_list[current_index+1]:
        counter += 1
        current_index += 1
    return counter



In [184]:
def get_street_length(index_att_list, current_index):
    counter = 0
    #print(len(index_att_list) > current_index+1)
    #print(index_att_list[current_index] + 1 == index_att_list[current_index+1])
    while len(index_att_list) > current_index+1 and  index_att_list[current_index] + 1 == index_att_list[current_index+1]:
        counter += 1
        current_index += 1:
            left_word_query = full_list[replace_index-1]
            left_matches = [(index,x) for index,x in enumerate(content_list) if x == left_word_query]

for x in  range( get_street_length([0,1,2,5],2)):
    aa = [0,1,2,5]
    #print("aa",aa[4])
    print(x)

In [209]:
# Test changes here first
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_test.txt")
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test_1.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_test_1.txt")
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test_0.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_test_0.txt")

100%|██████████| 1000/1000 [00:00<00:00, 49159.68it/s]
100%|██████████| 500/500 [00:00<00:00, 54008.55it/s]
100%|██████████| 500/500 [00:00<00:00, 44968.52it/s]


In [44]:
'''Merge files'''
# Create sentiment_train file before run!
# Create sentiment_test file before run!
# Create sentiment_dev file before run!

filenames = [data_dir+"/processed_files_with_bert_with_best_head/sentiment_train_1.txt", data_dir+"/processed_files_with_bert_with_best_head/sentiment_train_0.txt"]
with open(data_dir+"/processed_files_with_bert_with_best_head/sentiment_train.txt", 'w') as outfile:
    for fname in filenames:
        with open(fname) as infile:
            for line in infile:
                outfile.write(line)


filenames = [data_dir+"/processed_files_with_bert_with_best_head/sentiment_test_1.txt", data_dir+"/processed_files_with_bert_with_best_head/sentiment_test_0.txt"]
with open(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test.txt", 'w') as outfile:
    for fname in filenames:
        with open(fname) as infile:
            for line in infile:
                outfile.write(line)

filenames = [data_dir+"/processed_files_with_bert_with_best_head/sentiment_dev_1.txt", data_dir+"/processed_files_with_bert_with_best_head/sentiment_dev_0.txt"]
with open(data_dir+"/processed_files_with_bert_with_best_head/sentiment_dev.txt", 'w') as outfile:
    for fname in filenames:
        with open(fname) as infile:
            for line in infile:
                outfile.write(line)

In [14]:

process_file(data_dir+"/processed_files_with_bert_with_best_head/sentiment_train.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_train_all_attrs.txt")
process_file(data_dir+"/processed_files_with_bert_with_best_head/sentiment_train_1.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_train_1_all_attrs.txt")
process_file(data_dir+"/processed_files_with_bert_with_best_head/sentiment_train_0.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_train_0_all_attrs.txt")

100%|██████████| 443259/443259 [00:01<00:00, 314534.16it/s]
100%|██████████| 266041/266041 [00:00<00:00, 344643.63it/s]
100%|██████████| 177218/177218 [00:00<00:00, 316493.76it/s]


In [12]:
process_file(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_test_all_attrs.txt")
process_file(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test_1.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_test_1_all_attrs.txt")
process_file(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test_0.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_test_0_all_attrs.txt")

100%|██████████| 1000/1000 [00:00<00:00, 142784.82it/s]
100%|██████████| 500/500 [00:00<00:00, 265529.50it/s]
100%|██████████| 500/500 [00:00<00:00, 269660.79it/s]


In [None]:
process_file(data_dir+"/processed_files_with_bert_with_best_head/sentiment_dev.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_dev_all_attrs.txt")
process_file(data_dir+"/processed_files_with_bert_with_best_head/sentiment_dev_0.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_dev_1_all_attrs.txt")
process_file(data_dir+"/processed_files_with_bert_with_best_head/sentiment_dev_1.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_dev_0_all_attrs.txt")

In [223]:
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_train.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_train.txt")
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_train_1.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_train_1.txt")
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_train_0.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_train_0.txt")

  3%|▎         | 12511/443259 [00:00<00:06, 62731.38it/s]

NONE 0
['well', 'worth']
[9, 10]
['but', 'even', 'if', 'you', 'have', 'to', 'wait', 'it', "'", 'it', '.']
['but', 'even', 'if', 'you', 'have', 'to', 'wait', 'it', "'s", 'well', 'worth', 'it', '.']


 10%|▉         | 43785/443259 [00:00<00:06, 61969.08it/s]

NONE 1
['yes', 'well', 'worth']
[0, 8, 9]
['<REPLACE>', ',', 'it', "'", 's', 'pricey', 'but', 'it', "'", 's', 'it', '!']
['yes', ',', 'it', "'s", 'pricey', 'but', 'it', "'s", 'well', 'worth', 'it', '!']


 14%|█▍        | 62692/443259 [00:01<00:06, 62600.83it/s]

NONE 1
['good', 'very', 'good']
[3, 5, 6]
['these', 'guys', 'are', '<REPLACE>', '-', '-', '-', '-', 'at', 'what', 'they', 'do', '.']
['these', 'guys', 'are', 'good', '--', 'very', 'good', '--', 'at', 'what', 'they', 'do', '.']
NONE 0
['definitely', 'worth']
[10, 11]
['expect', 'a', 'wait', 'for', 'a', 'table', ',', 'but', 'it', "'", 'it', '.']
['expect', 'a', 'wait', 'for', 'a', 'table', ',', 'but', 'it', "'s", 'definitely', 'worth', 'it', '.']


 23%|██▎       | 100753/443259 [00:01<00:05, 62935.56it/s]

NONE 0
['fit', 'fine', 'bought']
[1, 2, 6]
['t', '-', 'shirts', '-', 'my', 'husband', 'a', 'tool', 't', '-', 'shirt', '.']
['t-shirts', 'fit', 'fine', '-', 'my', 'husband', 'bought', 'a', 'tool', 't-shirt', '.']
NONE 1
['good', 'nello', 'good']
[4, 8, 10]
['it', "'", 's', 'not', 'just', '<REPLACE>', ',', 'it', "'", 's', "'", 's', '!']
['it', "'s", 'not', 'just', 'good', ',', 'it', "'s", 'nello', "'s", 'good', '!']


 27%|██▋       | 119615/443259 [00:01<00:05, 62626.71it/s]

NONE 0
['_num_', 'breakfast', 'best']
[1, 2, 10]
['_', 'num', '_', ',', 'country', 'fried', 'steak', ',', 'is', 'the', '!']
['#', '_num_', 'breakfast', ',', 'country', 'fried', 'steak', ',', 'is', 'the', 'best', '!']


 37%|███▋      | 163756/443259 [00:02<00:04, 62930.18it/s]

NONE 0
['_num_', '...', 'wow']
[1, 2, 3]
['_', 'num', '_', '.', '.', '.', '!']
['$', '_num_', '...', 'wow', '!']
NONE 1
['_num_', 'well', 'worth']
[2, 5, 6]
['for', '$', '<REPLACE>', '_', 'num', '_', 'it', "'", 'it', '.']
['for', '$', '_num_', 'it', "'s", 'well', 'worth', 'it', '.']


 41%|████      | 182775/443259 [00:02<00:04, 61800.19it/s]

NONE 1
['yes', 'well', 'worth']
[0, 11, 12]
['<REPLACE>', ',', 'there', "'", 's', 'almost', 'always', 'a', 'wait', 'but', 'it', "'", 's', 'it', '.']
['yes', ',', 'there', "'s", 'almost', 'always', 'a', 'wait', 'but', 'it', "'s", 'well', 'worth', 'it', '.']


 46%|████▌     | 201767/443259 [00:03<00:03, 62827.12it/s]

NONE 0
['visitor', 'here', '_num_']
[1, 2, 3]
['out', '-', 'of', '-', '_', 'num', '_', 'weeks', 'ago', '.']
['out-of-town', 'visitor', 'here', '_num_', 'weeks', 'ago', '.']


 55%|█████▌    | 245219/443259 [00:03<00:03, 60994.19it/s]

NONE 0
['always', 'worth']
[3, 4]
['but', 'it', "'", 'it', 'to', 'me', 'in', 'the', 'end', '.']
['but', 'it', "'s", 'always', 'worth', 'it', 'to', 'me', 'in', 'the', 'end', '.']
NONE 0
['definitely', 'worth']
[9, 10]
['there', 'is', 'a', 'wait', 'sometimes', ',', 'but', 'it', "'", 'it', '!']
['there', 'is', 'a', 'wait', 'sometimes', ',', 'but', 'it', "'s", 'definitely', 'worth', 'it', '!']
NONE 0
['definitely', 'staying', 'here']
[2, 3, 4]
['i', "'", 'm', 'the', 'next', 'time', 'i', "'", 'm', 'in', 'madison', '!']
['i', "'m", 'definitely', 'staying', 'here', 'the', 'next', 'time', 'i', "'m", 'in', 'madison', '!']


 61%|██████    | 270126/443259 [00:04<00:02, 59745.94it/s]

NONE 0
['well', 'worth']
[10, 11]
['you', 'get', 'what', 'you', 'pay', 'for', 'here', 'and', 'it', "'", 'it', '.']
['you', 'get', 'what', 'you', 'pay', 'for', 'here', 'and', 'it', "'s", 'well', 'worth', 'it', '.']
NONE 1
['should', 'take', '_num_', 'minutes']
[1, 3, 4, 5]
['it', '<REPLACE>', 'n', "'", 't', '_', 'num', '_', 'to', 'get', 'a', 'dozen', 'bagels', '.']
['it', 'should', "n't", 'take', '_num_', 'minutes', 'to', 'get', 'a', 'dozen', 'bagels', '.']
NONE 0
['bad', '...', 'anything', 'special']
[4, 5, 8, 9]
['my', 'latte', 'was', 'n', "'", 't', '.', '.', '.', 'was', 'n', "'", 't', 'either', '.']
['my', 'latte', 'was', "n't", 'bad', '...', 'was', "n't", 'anything', 'special', 'either', '.']


 64%|██████▎   | 281925/443259 [00:04<00:02, 57079.08it/s]

NONE 1
['_num_', '_num_', 'weeks', 'pregnant']
[3, 11, 12, 13]
['i', 'had', 'my', '<REPLACE>', '_', 'num', '_', 'year', 'old', 'with', 'me', 'and', 'i', "'", '_', 'num', '_', '.']
['i', 'had', 'my', '_num_', 'year', 'old', 'with', 'me', 'and', 'i', "'m", '_num_', 'weeks', 'pregnant', '.']
NONE 0
['probably', 'say', 'no']
[11, 12, 13]
['if', 'you', 'asked', 'me', 'if', 'i', "'", 'd', 'return', ',', 'i', "'", 'd', '.']
['if', 'you', 'asked', 'me', 'if', 'i', "'d", 'return', ',', 'i', "'d", 'probably', 'say', 'no', '.']


 67%|██████▋   | 298748/443259 [00:04<00:02, 53896.30it/s]

NONE 0
['maintenance', 'did']
[1, 2]
['n', "'", 't', 'know', 'i', 'was', 'in', 'there', ')', '.']
['(', 'maintenance', 'did', "n't", 'know', 'i', 'was', 'in', 'there', ')', '.']
NONE 1
['do', 'deserve', '_num_', 'star']
[1, 3, 4, 5]
['they', '<REPLACE>', 'n', "'", 't', '_', 'num', '_', '!']
['they', 'do', "n't", 'deserve', '_num_', 'star', '!']
NONE 0
['amazingly', 'impressed']
[3, 4]
['i', 'was', 'n', "'", ',', 'but', 'i', 'was', 'n', "'", 't', 'disappointed', ',', 'either', '.']
['i', 'was', "n't", 'amazingly', 'impressed', ',', 'but', 'i', 'was', "n't", 'disappointed', ',', 'either', '.']
NONE 1
['do', 'rent', 'unit', '_num_']
[0, 2, 3, 4]
['<REPLACE>', 'n', "'", 't', '_', 'num', '_', '.']
['do', "n't", 'rent', 'unit', '_num_', '.']
NONE 0
['mistake', 'number', '_num_']
[1, 2, 3]
['_', 'num', '_', 'if', 'anyone', 'is', 'counting', ')', '.']
['(', 'mistake', 'number', '_num_', 'if', 'anyone', 'is', 'counting', ')', '.']


 71%|███████   | 315330/443259 [00:05<00:02, 54644.89it/s]

NONE 1
['do', 'mean', 'med']
[1, 3, 4]
['i', '<REPLACE>', 'n', "'", 't', 'rare', 'or', 'rare', '.']
['i', 'do', "n't", 'mean', 'med', 'rare', 'or', 'rare', '.']
NONE 1
['could', 'each', 'much']
[1, 3, 4]
['i', '<REPLACE>', 'n', "'", 't', 'of', 'any', 'of', 'this', '.']
['i', 'could', "n't", 'each', 'much', 'of', 'any', 'of', 'this', '.']
NONE 1
['purchased', '_num_', 'pot', '_num_']
[0, 2, 3, 4]
['<REPLACE>', '_', 'num', '_', '_', 'num', '_', 'weeks', 'ago', '.']
['purchased', '$', '_num_', 'pot', '_num_', 'weeks', 'ago', '.']
NONE 0
['actual', 'repair', '_num_']
[1, 2, 6]
['cost', 'around', '$', '_', 'num', '_', ')', '.']
['(', 'actual', 'repair', 'cost', 'around', '$', '_num_', ')', '.']
NONE 1
['do', 'suggest', 'coffee']
[0, 2, 3]
['<REPLACE>', 'n', "'", 't', 'or', 'ignore', 'them', 'or', 'not', 'offer', 'last', 'call', '.']
['do', "n't", 'suggest', 'coffee', 'or', 'ignore', 'them', 'or', 'not', 'offer', 'last', 'call', '.']


 74%|███████▎  | 326413/443259 [00:05<00:02, 55081.64it/s]

NONE 0
['_num_', 'resort', 'fee']
[1, 2, 3]
['_', 'num', '_', 'per', 'night', '.']
['$', '_num_', 'resort', 'fee', 'per', 'night', '.']
NONE 0
['_num_', 'dollar', '_num_', 'alcohol']
[1, 2, 6, 8]
['_', 'num', '_', 'for', 'dinner', 'for', '_', 'num', '_', 'without', '.']
['$', '_num_', 'dollar', 'for', 'dinner', 'for', '_num_', 'without', 'alcohol', '.']
NONE 0
['_num_', 'spicy', 'octopus']
[1, 2, 3]
['_', 'num', '_', 'was', 'like', 'chewy', 'rubber', 'and', 'totally', 'inedible', '.']
['$', '_num_', 'spicy', 'octopus', 'was', 'like', 'chewy', 'rubber', 'and', 'totally', 'inedible', '.']


 76%|███████▌  | 337429/443259 [00:05<00:01, 54493.80it/s]

NONE 0
['busy', 'nor', 'emergency']
[3, 4, 8]
['it', 'was', 'n', "'", 't', 'was', 'there', 'an', 'or', 'anything', '.']
['it', 'was', "n't", 'busy', 'nor', 'was', 'there', 'an', 'emergency', 'or', 'anything', '.']
NONE 1
['do', 'switch']
[0, 2]
['<REPLACE>', 'n', "'", 't']
['do', "n't", 'switch']


 80%|███████▉  | 353746/443259 [00:05<00:01, 54110.71it/s]

NONE 0
['craving', 'ra']
[5, 6]
['guess', 'next', 'time', 'i', "'", 'i', 'will', 'be', 'driving', 'to', 'old', 'town', '.']
['guess', 'next', 'time', 'i', "'m", 'craving', 'ra', 'i', 'will', 'be', 'driving', 'to', 'old', 'town', '.']
NONE 0
['_num_', '1/2', 'stars']
[1, 2, 3]
['_', 'num', '1', '/', '2', ')', '.']
['(', '_num_', '1/2', 'stars', ')', '.']


 83%|████████▎ | 370073/443259 [00:06<00:01, 54223.67it/s]

NONE 0
['out', '_num_']
[2, 4]
['i', "'", '_', 'num', '_', 'and', 'i', 'have', 'no', 'gift', '.']
['i', "'m", 'out', '$', '_num_', 'and', 'i', 'have', 'no', 'gift', '.']
NONE 1
['do', 'use', 'david']
[0, 2, 3]
['<REPLACE>', 'n', "'", 't', "'", 's', 'bridal', '.']
['do', "n't", 'use', 'david', "'s", 'bridal', '.']
NONE 1
['should', 'take', '_num_', 'min']
[1, 3, 4, 5]
['it', '<REPLACE>', 'n', "'", 't', '_', 'num', '_', 'for', 'pancakes', 'and', 'eggs', '.']
['it', 'should', "n't", 'take', '_num_', 'min', 'for', 'pancakes', 'and', 'eggs', '.']


 86%|████████▌ | 380982/443259 [00:06<00:01, 54112.73it/s]

NONE 1
['should', 'take', '_num_', 'minutes']
[1, 3, 4, 5]
['it', '<REPLACE>', 'n', "'", 't', '_', 'num', '_', 'to', 'order', 'and', 'pay', 'for', 'one', 'bagel', '.']
['it', 'should', "n't", 'take', '_num_', 'minutes', 'to', 'order', 'and', 'pay', 'for', 'one', 'bagel', '.']
NONE 1
['do', 'eat', 'here']
[1, 3, 4]
['i', '<REPLACE>', 'n', "'", 't', 'by', 'choice', ',', 'only', 'by', 'the', 'choice', 'of', 'others', '.']
['i', 'do', "n't", 'eat', 'here', 'by', 'choice', ',', 'only', 'by', 'the', 'choice', 'of', 'others', '.']


 90%|████████▉ | 397360/443259 [00:06<00:00, 54169.80it/s]

NONE 0
['_num_', 's.', 'valley']
[1, 2, 3]
['_', 'num', '_', '.', 'view', ',', 'lv', ')', '.']
['(', '_num_', 's.', 'valley', 'view', ',', 'lv', ')', '.']
NONE 1
['ca', 'give', '_num_', 'stars']
[0, 2, 3, 4]
['<REPLACE>', 'n', "'", 't', '_', 'num', '_', '.']
['ca', "n't", 'give', '_num_', 'stars', '.']


 92%|█████████▏| 408278/443259 [00:06<00:00, 54406.84it/s]

NONE 1
['do', 'recommend', '...']
[0, 2, 3]
['<REPLACE>', 'n', "'", 't', '.', '.', '.', '.']
['do', "n't", 'recommend', '...', '.']
NONE 1
['service', 'and/or', 'cranky', 'never']
[1, 4, 5, 9]
['the', '<REPLACE>', 'was', 'non', '-', 'existent', 'and', '/', 'or', '-', '-', 'i', 'was', 'asked', 'for', 'a', 'refill', '.']
['the', 'service', 'was', 'non-existent', 'and/or', 'cranky', '--', 'i', 'was', 'never', 'asked', 'for', 'a', 'refill', '.']
NONE 0
['_num_', 'years', 'old', "'ve"]
[2, 3, 4, 8]
['i', "'", '_', 'num', '_', ',', 'so', 'i', "'", 've', 'had', 'my', 'share', 'of', 'them', '.']
['i', "'m", '_num_', 'years', 'old', ',', 'so', 'i', "'ve", 'had', 'my', 'share', 'of', 'them', '.']
NONE 0
['_num_', 'bucks', 'lunch', '_num_']
[1, 2, 4, 6]
['_', 'num', '_', 'for', 'for', '_', 'num', '_', '.']
['$', '_num_', 'bucks', 'for', 'lunch', 'for', '_num_', '.']


 95%|█████████▍| 419279/443259 [00:07<00:00, 54611.53it/s]

NONE 1
['tea', 'hunger', 'cravings']
[1, 4, 5]
['for', '<REPLACE>', ',', 'late', '-', 'night', ',', 'dinner', ',', 'lunch', ',', 'dessert', '.']
['for', 'tea', ',', 'late-night', 'hunger', 'cravings', ',', 'dinner', ',', 'lunch', ',', 'dessert', '.']
NONE 1
['did', 'taste', 'horrible', '...']
[1, 3, 4, 5]
['they', '<REPLACE>', 'n', "'", 't', '.', '.', '.', 'but', 'you', 'could', 'n', "'", 't', 'dip', 'them', 'into', 'anything', '.']
['they', 'did', "n't", 'taste', 'horrible', '...', 'but', 'you', 'could', "n't", 'dip', 'them', 'into', 'anything', '.']


100%|█████████▉| 441398/443259 [00:07<00:00, 55185.24it/s]

NONE 1
['do', 'waste', '_num_', 'months']
[0, 2, 3, 4]
['<REPLACE>', 'n', "'", 't', '_', 'num', '_', 'waiting', 'on', 'broken', 'promises', 'and', 'hard', 'luck', 'stories', '.']
['do', "n't", 'waste', '_num_', 'months', 'waiting', 'on', 'broken', 'promises', 'and', 'hard', 'luck', 'stories', '.']
NONE 0
['_num_', 'gold', 'club']
[1, 2, 3]
['_', 'num', '_', 'member', 'with', 'a', 'reservation', '?']
['#', '_num_', 'gold', 'club', 'member', 'with', 'a', 'reservation', '?']


100%|██████████| 443259/443259 [00:07<00:00, 58628.92it/s]


NONE 0
['_num_', 'n.', '_num_', 'rd']
[1, 2, 3, 4]
['_', 'num', '_', '.', '_', 'num', '_', '.']
['@', '_num_', 'n.', '_num_', 'rd', '.']
NONE 3
['discovered', 'pads', '_num_', 'x', '_num_']
[4, 6, 8, 10, 11]
['that', "'", 's', 'where', 'i', '<REPLACE>', 'the', '<REPLACE>', 'were', '<REPLACE>', '_', 'num', '_', "'", "'", '_', 'num', '_', "'", "'", '.']
['that', "'s", 'where', 'i', 'discovered', 'the', 'pads', 'were', '_num_', "''", 'x', '_num_', "''", '.']


  2%|▏         | 6086/266041 [00:00<00:04, 60846.42it/s]

NONE 0
['well', 'worth']
[9, 10]
['but', 'even', 'if', 'you', 'have', 'to', 'wait', 'it', "'", 'it', '.']
['but', 'even', 'if', 'you', 'have', 'to', 'wait', 'it', "'s", 'well', 'worth', 'it', '.']


 16%|█▋        | 43609/266041 [00:00<00:03, 62333.28it/s]

NONE 1
['yes', 'well', 'worth']
[0, 8, 9]
['<REPLACE>', ',', 'it', "'", 's', 'pricey', 'but', 'it', "'", 's', 'it', '!']
['yes', ',', 'it', "'s", 'pricey', 'but', 'it', "'s", 'well', 'worth', 'it', '!']


 23%|██▎       | 62071/266041 [00:01<00:03, 60511.99it/s]

NONE 1
['good', 'very', 'good']
[3, 5, 6]
['these', 'guys', 'are', '<REPLACE>', '-', '-', '-', '-', 'at', 'what', 'they', 'do', '.']
['these', 'guys', 'are', 'good', '--', 'very', 'good', '--', 'at', 'what', 'they', 'do', '.']
NONE 0
['definitely', 'worth']
[10, 11]
['expect', 'a', 'wait', 'for', 'a', 'table', ',', 'but', 'it', "'", 'it', '.']
['expect', 'a', 'wait', 'for', 'a', 'table', ',', 'but', 'it', "'s", 'definitely', 'worth', 'it', '.']


 37%|███▋      | 99388/266041 [00:01<00:02, 61014.53it/s]

NONE 0
['fit', 'fine', 'bought']
[1, 2, 6]
['t', '-', 'shirts', '-', 'my', 'husband', 'a', 'tool', 't', '-', 'shirt', '.']
['t-shirts', 'fit', 'fine', '-', 'my', 'husband', 'bought', 'a', 'tool', 't-shirt', '.']
NONE 1
['good', 'nello', 'good']
[4, 8, 10]
['it', "'", 's', 'not', 'just', '<REPLACE>', ',', 'it', "'", 's', "'", 's', '!']
['it', "'s", 'not', 'just', 'good', ',', 'it', "'s", 'nello', "'s", 'good', '!']


 44%|████▍     | 118038/266041 [00:01<00:02, 61560.36it/s]

NONE 0
['_num_', 'breakfast', 'best']
[1, 2, 10]
['_', 'num', '_', ',', 'country', 'fried', 'steak', ',', 'is', 'the', '!']
['#', '_num_', 'breakfast', ',', 'country', 'fried', 'steak', ',', 'is', 'the', 'best', '!']


 61%|██████    | 162685/266041 [00:02<00:01, 63738.43it/s]

NONE 0
['_num_', '...', 'wow']
[1, 2, 3]
['_', 'num', '_', '.', '.', '.', '!']
['$', '_num_', '...', 'wow', '!']
NONE 1
['_num_', 'well', 'worth']
[2, 5, 6]
['for', '$', '<REPLACE>', '_', 'num', '_', 'it', "'", 'it', '.']
['for', '$', '_num_', 'it', "'s", 'well', 'worth', 'it', '.']


 71%|███████   | 188385/266041 [00:03<00:01, 64046.12it/s]

NONE 1
['yes', 'well', 'worth']
[0, 11, 12]
['<REPLACE>', ',', 'there', "'", 's', 'almost', 'always', 'a', 'wait', 'but', 'it', "'", 's', 'it', '.']
['yes', ',', 'there', "'s", 'almost', 'always', 'a', 'wait', 'but', 'it', "'s", 'well', 'worth', 'it', '.']


 76%|███████▌  | 201318/266041 [00:03<00:01, 64369.36it/s]

NONE 0
['visitor', 'here', '_num_']
[1, 2, 3]
['out', '-', 'of', '-', '_', 'num', '_', 'weeks', 'ago', '.']
['out-of-town', 'visitor', 'here', '_num_', 'weeks', 'ago', '.']


 92%|█████████▏| 245922/266041 [00:03<00:00, 62871.73it/s]

NONE 0
['always', 'worth']
[3, 4]
['but', 'it', "'", 'it', 'to', 'me', 'in', 'the', 'end', '.']
['but', 'it', "'s", 'always', 'worth', 'it', 'to', 'me', 'in', 'the', 'end', '.']
NONE 0
['definitely', 'worth']
[9, 10]
['there', 'is', 'a', 'wait', 'sometimes', ',', 'but', 'it', "'", 'it', '!']
['there', 'is', 'a', 'wait', 'sometimes', ',', 'but', 'it', "'s", 'definitely', 'worth', 'it', '!']
NONE 0
['definitely', 'staying', 'here']
[2, 3, 4]
['i', "'", 'm', 'the', 'next', 'time', 'i', "'", 'm', 'in', 'madison', '!']
['i', "'m", 'definitely', 'staying', 'here', 'the', 'next', 'time', 'i', "'m", 'in', 'madison', '!']


100%|██████████| 266041/266041 [00:04<00:00, 62274.92it/s]


NONE 0
['well', 'worth']
[10, 11]
['you', 'get', 'what', 'you', 'pay', 'for', 'here', 'and', 'it', "'", 'it', '.']
['you', 'get', 'what', 'you', 'pay', 'for', 'here', 'and', 'it', "'s", 'well', 'worth', 'it', '.']


  0%|          | 0/177218 [00:00<?, ?it/s]

NONE 1
['should', 'take', '_num_', 'minutes']
[1, 3, 4, 5]
['it', '<REPLACE>', 'n', "'", 't', '_', 'num', '_', 'to', 'get', 'a', 'dozen', 'bagels', '.']
['it', 'should', "n't", 'take', '_num_', 'minutes', 'to', 'get', 'a', 'dozen', 'bagels', '.']


  6%|▌         | 10664/177218 [00:00<00:03, 52626.15it/s]

NONE 0
['bad', '...', 'anything', 'special']
[4, 5, 8, 9]
['my', 'latte', 'was', 'n', "'", 't', '.', '.', '.', 'was', 'n', "'", 't', 'either', '.']
['my', 'latte', 'was', "n't", 'bad', '...', 'was', "n't", 'anything', 'special', 'either', '.']
NONE 1
['_num_', '_num_', 'weeks', 'pregnant']
[3, 11, 12, 13]
['i', 'had', 'my', '<REPLACE>', '_', 'num', '_', 'year', 'old', 'with', 'me', 'and', 'i', "'", '_', 'num', '_', '.']
['i', 'had', 'my', '_num_', 'year', 'old', 'with', 'me', 'and', 'i', "'m", '_num_', 'weeks', 'pregnant', '.']


 12%|█▏        | 20977/177218 [00:00<00:03, 47927.76it/s]

NONE 0
['probably', 'say', 'no']
[11, 12, 13]
['if', 'you', 'asked', 'me', 'if', 'i', "'", 'd', 'return', ',', 'i', "'", 'd', '.']
['if', 'you', 'asked', 'me', 'if', 'i', "'d", 'return', ',', 'i', "'d", 'probably', 'say', 'no', '.']


 21%|██        | 36489/177218 [00:00<00:02, 50426.12it/s]

NONE 0
['maintenance', 'did']
[1, 2]
['n', "'", 't', 'know', 'i', 'was', 'in', 'there', ')', '.']
['(', 'maintenance', 'did', "n't", 'know', 'i', 'was', 'in', 'there', ')', '.']
NONE 1
['do', 'deserve', '_num_', 'star']
[1, 3, 4, 5]
['they', '<REPLACE>', 'n', "'", 't', '_', 'num', '_', '!']
['they', 'do', "n't", 'deserve', '_num_', 'star', '!']
NONE 0
['amazingly', 'impressed']
[3, 4]
['i', 'was', 'n', "'", ',', 'but', 'i', 'was', 'n', "'", 't', 'disappointed', ',', 'either', '.']
['i', 'was', "n't", 'amazingly', 'impressed', ',', 'but', 'i', 'was', "n't", 'disappointed', ',', 'either', '.']
NONE 1
['do', 'rent', 'unit', '_num_']
[0, 2, 3, 4]
['<REPLACE>', 'n', "'", 't', '_', 'num', '_', '.']
['do', "n't", 'rent', 'unit', '_num_', '.']
NONE 0
['mistake', 'number', '_num_']
[1, 2, 3]
['_', 'num', '_', 'if', 'anyone', 'is', 'counting', ')', '.']
['(', 'mistake', 'number', '_num_', 'if', 'anyone', 'is', 'counting', ')', '.']


 26%|██▋       | 46648/177218 [00:00<00:02, 50212.54it/s]

NONE 1
['do', 'mean', 'med']
[1, 3, 4]
['i', '<REPLACE>', 'n', "'", 't', 'rare', 'or', 'rare', '.']
['i', 'do', "n't", 'mean', 'med', 'rare', 'or', 'rare', '.']
NONE 1
['could', 'each', 'much']
[1, 3, 4]
['i', '<REPLACE>', 'n', "'", 't', 'of', 'any', 'of', 'this', '.']
['i', 'could', "n't", 'each', 'much', 'of', 'any', 'of', 'this', '.']


 32%|███▏      | 56885/177218 [00:01<00:02, 49358.82it/s]

NONE 1
['purchased', '_num_', 'pot', '_num_']
[0, 2, 3, 4]
['<REPLACE>', '_', 'num', '_', '_', 'num', '_', 'weeks', 'ago', '.']
['purchased', '$', '_num_', 'pot', '_num_', 'weeks', 'ago', '.']
NONE 0
['actual', 'repair', '_num_']
[1, 2, 6]
['cost', 'around', '$', '_', 'num', '_', ')', '.']
['(', 'actual', 'repair', 'cost', 'around', '$', '_num_', ')', '.']
NONE 1
['do', 'suggest', 'coffee']
[0, 2, 3]
['<REPLACE>', 'n', "'", 't', 'or', 'ignore', 'them', 'or', 'not', 'offer', 'last', 'call', '.']
['do', "n't", 'suggest', 'coffee', 'or', 'ignore', 'them', 'or', 'not', 'offer', 'last', 'call', '.']
NONE 0
['_num_', 'resort', 'fee']
[1, 2, 3]
['_', 'num', '_', 'per', 'night', '.']
['$', '_num_', 'resort', 'fee', 'per', 'night', '.']
NONE 0
['_num_', 'dollar', '_num_', 'alcohol']
[1, 2, 6, 8]
['_', 'num', '_', 'for', 'dinner', 'for', '_', 'num', '_', 'without', '.']
['$', '_num_', 'dollar', 'for', 'dinner', 'for', '_num_', 'without', 'alcohol', '.']
NONE 0
['_num_', 'spicy', 'octopus']
[1, 2

 41%|████      | 72083/177218 [00:01<00:02, 49943.12it/s]

NONE 0
['busy', 'nor', 'emergency']
[3, 4, 8]
['it', 'was', 'n', "'", 't', 'was', 'there', 'an', 'or', 'anything', '.']
['it', 'was', "n't", 'busy', 'nor', 'was', 'there', 'an', 'emergency', 'or', 'anything', '.']
NONE 1
['do', 'switch']
[0, 2]
['<REPLACE>', 'n', "'", 't']
['do', "n't", 'switch']


 47%|████▋     | 82495/177218 [00:01<00:01, 51051.10it/s]

NONE 0
['craving', 'ra']
[5, 6]
['guess', 'next', 'time', 'i', "'", 'i', 'will', 'be', 'driving', 'to', 'old', 'town', '.']
['guess', 'next', 'time', 'i', "'m", 'craving', 'ra', 'i', 'will', 'be', 'driving', 'to', 'old', 'town', '.']
NONE 0
['_num_', '1/2', 'stars']
[1, 2, 3]
['_', 'num', '1', '/', '2', ')', '.']
['(', '_num_', '1/2', 'stars', ')', '.']


 58%|█████▊    | 103424/177218 [00:02<00:01, 52140.12it/s]

NONE 0
['out', '_num_']
[2, 4]
['i', "'", '_', 'num', '_', 'and', 'i', 'have', 'no', 'gift', '.']
['i', "'m", 'out', '$', '_num_', 'and', 'i', 'have', 'no', 'gift', '.']
NONE 1
['do', 'use', 'david']
[0, 2, 3]
['<REPLACE>', 'n', "'", 't', "'", 's', 'bridal', '.']
['do', "n't", 'use', 'david', "'s", 'bridal', '.']
NONE 1
['should', 'take', '_num_', 'min']
[1, 3, 4, 5]
['it', '<REPLACE>', 'n', "'", 't', '_', 'num', '_', 'for', 'pancakes', 'and', 'eggs', '.']
['it', 'should', "n't", 'take', '_num_', 'min', 'for', 'pancakes', 'and', 'eggs', '.']


 64%|██████▍   | 113957/177218 [00:02<00:01, 52444.40it/s]

NONE 1
['should', 'take', '_num_', 'minutes']
[1, 3, 4, 5]
['it', '<REPLACE>', 'n', "'", 't', '_', 'num', '_', 'to', 'order', 'and', 'pay', 'for', 'one', 'bagel', '.']
['it', 'should', "n't", 'take', '_num_', 'minutes', 'to', 'order', 'and', 'pay', 'for', 'one', 'bagel', '.']
NONE 1
['do', 'eat', 'here']
[1, 3, 4]
['i', '<REPLACE>', 'n', "'", 't', 'by', 'choice', ',', 'only', 'by', 'the', 'choice', 'of', 'others', '.']
['i', 'do', "n't", 'eat', 'here', 'by', 'choice', ',', 'only', 'by', 'the', 'choice', 'of', 'others', '.']


 73%|███████▎  | 129810/177218 [00:02<00:00, 52193.21it/s]

NONE 0
['_num_', 's.', 'valley']
[1, 2, 3]
['_', 'num', '_', '.', 'view', ',', 'lv', ')', '.']
['(', '_num_', 's.', 'valley', 'view', ',', 'lv', ')', '.']
NONE 1
['ca', 'give', '_num_', 'stars']
[0, 2, 3, 4]
['<REPLACE>', 'n', "'", 't', '_', 'num', '_', '.']
['ca', "n't", 'give', '_num_', 'stars', '.']


 79%|███████▉  | 140456/177218 [00:02<00:00, 51876.04it/s]

NONE 1
['do', 'recommend', '...']
[0, 2, 3]
['<REPLACE>', 'n', "'", 't', '.', '.', '.', '.']
['do', "n't", 'recommend', '...', '.']
NONE 1
['service', 'and/or', 'cranky', 'never']
[1, 4, 5, 9]
['the', '<REPLACE>', 'was', 'non', '-', 'existent', 'and', '/', 'or', '-', '-', 'i', 'was', 'asked', 'for', 'a', 'refill', '.']
['the', 'service', 'was', 'non-existent', 'and/or', 'cranky', '--', 'i', 'was', 'never', 'asked', 'for', 'a', 'refill', '.']
NONE 0
['_num_', 'years', 'old', "'ve"]
[2, 3, 4, 8]
['i', "'", '_', 'num', '_', ',', 'so', 'i', "'", 've', 'had', 'my', 'share', 'of', 'them', '.']
['i', "'m", '_num_', 'years', 'old', ',', 'so', 'i', "'ve", 'had', 'my', 'share', 'of', 'them', '.']


 85%|████████▌ | 150961/177218 [00:02<00:00, 52183.58it/s]

NONE 0
['_num_', 'bucks', 'lunch', '_num_']
[1, 2, 4, 6]
['_', 'num', '_', 'for', 'for', '_', 'num', '_', '.']
['$', '_num_', 'bucks', 'for', 'lunch', 'for', '_num_', '.']
NONE 1
['tea', 'hunger', 'cravings']
[1, 4, 5]
['for', '<REPLACE>', ',', 'late', '-', 'night', ',', 'dinner', ',', 'lunch', ',', 'dessert', '.']
['for', 'tea', ',', 'late-night', 'hunger', 'cravings', ',', 'dinner', ',', 'lunch', ',', 'dessert', '.']


 94%|█████████▍| 166854/177218 [00:03<00:00, 52768.83it/s]

NONE 1
['did', 'taste', 'horrible', '...']
[1, 3, 4, 5]
['they', '<REPLACE>', 'n', "'", 't', '.', '.', '.', 'but', 'you', 'could', 'n', "'", 't', 'dip', 'them', 'into', 'anything', '.']
['they', 'did', "n't", 'taste', 'horrible', '...', 'but', 'you', 'could', "n't", 'dip', 'them', 'into', 'anything', '.']
NONE 1
['do', 'waste', '_num_', 'months']
[0, 2, 3, 4]
['<REPLACE>', 'n', "'", 't', '_', 'num', '_', 'waiting', 'on', 'broken', 'promises', 'and', 'hard', 'luck', 'stories', '.']
['do', "n't", 'waste', '_num_', 'months', 'waiting', 'on', 'broken', 'promises', 'and', 'hard', 'luck', 'stories', '.']
NONE 0
['_num_', 'gold', 'club']
[1, 2, 3]
['_', 'num', '_', 'member', 'with', 'a', 'reservation', '?']
['#', '_num_', 'gold', 'club', 'member', 'with', 'a', 'reservation', '?']


100%|██████████| 177218/177218 [00:03<00:00, 51339.26it/s]


NONE 0
['_num_', 'n.', '_num_', 'rd']
[1, 2, 3, 4]
['_', 'num', '_', '.', '_', 'num', '_', '.']
['@', '_num_', 'n.', '_num_', 'rd', '.']
NONE 3
['discovered', 'pads', '_num_', 'x', '_num_']
[4, 6, 8, 10, 11]
['that', "'", 's', 'where', 'i', '<REPLACE>', 'the', '<REPLACE>', 'were', '<REPLACE>', '_', 'num', '_', "'", "'", '_', 'num', '_', "'", "'", '.']
['that', "'s", 'where', 'i', 'discovered', 'the', 'pads', 'were', '_num_', "''", 'x', '_num_', "''", '.']


In [158]:
# Test changes here first
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_test.txt")
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test_1.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_test_1.txt")
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_test_0.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_test_0.txt")

100%|██████████| 1000/1000 [00:00<00:00, 56367.48it/s]
100%|██████████| 500/500 [00:00<00:00, 71094.72it/s]
100%|██████████| 500/500 [00:00<00:00, 66485.50it/s]


In [217]:
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_dev.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_dev.txt", test=True)
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_dev_0.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_dev_1.txt", test=True)
process_file_v1(data_dir+"/processed_files_with_bert_with_best_head/sentiment_dev_1.txt",data_dir+"/processed_files_with_bert_with_best_head/delete_retrieve_edit_model/sentiment_dev_0.txt", test=True)

100%|██████████| 4000/4000 [00:00<00:00, 44698.12it/s]


NONE 0
['_num_', 'wi-fi', 'charge']
[1, 2, 3]
['_', 'num', '_', '-', 'fi', '.']
['$', '_num_', 'wi-fi', 'charge', '.']


100%|██████████| 2000/2000 [00:00<00:00, 43963.84it/s]


NONE 0
['_num_', 'wi-fi', 'charge']
[1, 2, 3]
['_', 'num', '_', '-', 'fi', '.']
['$', '_num_', 'wi-fi', 'charge', '.']


100%|██████████| 2000/2000 [00:00<00:00, 45756.63it/s]
