In [1]:
import os
import sacrebleu
import numpy as np
import ast
from bs4 import BeautifulSoup
import re
import random
import copy
import csv
from sacremoses import MosesTokenizer, MosesDetokenizer

In [2]:
NUM_SEQUENCES = 5

def read_finetuned(file_path):
    with open(file_path) as f:
        lines = f.readlines()
        hypotheses = []
        for line in lines:
            if not line.startswith("=="):
                hypotheses.append(line.strip(' ').rstrip(' <EOS>'))
    return hypotheses

def read_pretrained(file_path):
    with open(file_path) as f:
        lines = f.readlines()
        hypotheses = []
        i=0
        l=0
        for line in lines:
            if len(line.split())==0:
                continue
            elif not line.startswith("==") and i>1:
                hypotheses.append(line.strip())
                i-=1
            elif not line.startswith("==") and i<=1:
                hypotheses[-1] = hypotheses[-1] +" "+ line.strip()
            else:
                i+=1
            if i==3:
                i=2
    return hypotheses


def group_hypotheses(hypotheses):
    hypotheses_basic = []
    num_inputs = len(hypotheses)//NUM_SEQUENCES
    for i in range(NUM_SEQUENCES):
        hypos = []
        for j in range(num_inputs):
            hypos.append(hypotheses[i+(j*NUM_SEQUENCES)].strip().split(':')[1].lstrip())
        hypotheses_basic.append(hypos)
    return hypotheses_basic

def group_ilm_hypotheses(hypotheses):
    hypotheses_basic = []
    num_inputs = len(hypotheses)//NUM_SEQUENCES
    for i in range(NUM_SEQUENCES):
        hypos = []
        for j in range(num_inputs):
            hypos.append(hypotheses[i+(j*NUM_SEQUENCES)].strip().split('<SEP>')[1].lstrip())
        hypotheses_basic.append(hypos)
    return hypotheses_basic

def convert_tokens_to_basic(hypotheses_basic):
    # convert tokens into a list
    hypotheses_modified = []
    for hyp in hypotheses_basic:
        hypo = []
        for scenario in hyp:
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            for a in soup.find_all('bevent'):
                events.append(a.string)
            if len(events)>0:
                h = ""
                idx=0
                for e in events:
                    if e is not None:
                        h+= str(idx+1) + ". " + e.strip() + " "
                        idx+=1
                hypo.append(h.strip())
        hypotheses_modified.append(hypo) 
    return hypotheses_modified

def read_references(file_path='./data/valid_references.txt'):
    with open(file_path) as f:
        lines = f.readlines()
        references = []
        for line in lines:
            ref = []
            x = ast.literal_eval(line)
            for i in x:
                ref.append(" ".join(i))
            references.append(ref)
    return references

def read_ilm_references(file_path='./data/valid_inference_references.txt'):
    with open(file_path) as f:
        lines = f.readlines()
        references = []
        for line in lines:
            references.append(line.strip().strip('<EOS>').strip())
    return references

def convert_refs_for_bleu(references):
    new_references = []
    for ref in references:
        for idx, i in enumerate(ref):
            if len(new_references) < 50:
                new_references.append([i])
            else:
                new_references[idx].append(i)
    return new_references


def replace_blanks(hypotheses):
    new_hypotheses = []
    for hyp in hypotheses:
        splitted_hyp = hyp.strip().split('<SEP> ')
        scenario = splitted_hyp[0]
        answer = splitted_hyp[1].strip().rstrip('<ANS>')
        new_hypotheses.append(scenario.replace('<BLK>', answer).strip())
    return new_hypotheses


In [100]:
references = read_references("./data/test_references.txt")
new_references = convert_refs_for_bleu(references)

In [191]:
len(new_outs)

25

In [615]:
hypotheses = read_pretrained('./outputs/generated_valid_ordered_large_g16_epoch1.txt')
print(len(hypotheses))
hypotheses_basic = group_hypotheses(hypotheses)
hypotheses_basic[0]

In [677]:
hypotheses = read_finetuned('./outputs/generated_valid_ordered_large_g16_epoch1.txt')
print(len(hypotheses))
hypotheses_basic = group_hypotheses(hypotheses)
hypotheses_basic[0]

25


['1. walk into the bathroom 2. turn on the water 3. get undressed 4. get in the shower 5. wash all over your body 6. wash your hair 7. get your hair cut 8. leave the bathroom',
 '1. go to the store 2. purchase the items needed 3. go back to the store 4. pick up the items 5. return them to the shop 6. buy the items in store 7. leave the store',
 '1. get a large box 2. take the tree out of its box 3. start the tree 4. put a small amount of soil in the soil 5. cut the branches 6. put the tree in the soil',
 '1. turn on flat tire 2. take out old tire 3. get new tire 4. put new tire into flat 5. replace old tire with new tire',
 '1. take off clothes 2. put on towel 3. sit in tub 4. take a bath 5. wash off shampoo 6. rinse shampoo 7. dry shampoo 8. use soap 9. rinse soap 10. dry soap 11. put soap in clothes 12. put on body 13. wear clothes']

In [650]:
hypotheses_modified = convert_tokens_to_basic(hypotheses_basic)
hypotheses_basic = hypotheses_modified
hypotheses_modified[0]

['1. prepare cake mix 2. put butter in pan 3. put water in pan 4. add sugar and vanilla to pan 5. wait for water to boil 6. pour batter into pan 7. stir as needed 8. check to ensure cake is cooked',
 '1. call the library and say that you want a book 2. tell the clerk that you want to borrow it 3. give them your card 4. take your book back',
 '1. park the car at the airport 2. get in the car 3. drive to the airport 4. take the proper flight ticket 5. board the airplane',
 '1. put on proper clothing 2. get in car 3. wait for train 4. get off at desired stop 5. find seat 6. sit down 7. watch or listen to the story 8. leave',
 '1. wait for bus 2. get on bus 3. find seat 4. find a seat 5. sit down 6. wait for stop 7. wait for stop 8. find a seat 9. sit down 10. start listening to music 11. get off the bus']

In [433]:
hypotheses = read_finetuned('./outputs/generated_valid_large_ilm_num_ga16_epoch2_subset.txt')
print(len(hypotheses))
hypotheses_basic = group_ilm_hypotheses(hypotheses)
hypotheses_basic[0]

50


['find bicycle tire 2. drive to home 3. remove old tire from ca',
 'remove tire. <ANS>',
 'find flat surface. <ANS>',
 'check to see how much rubber you need. <ANS>',
 'find flat tire. <ANS>',
 'remove wheel. <ANS>',
 'find the flat on which the wheel is being repaired. <ANS>',
 'replace wheel. <ANS>',
 'check flat tire for flat. <ANS>',
 'insert replacement tube into wheel. <ANS>']

In [254]:
hypotheses_modified = []
for hyp in hypotheses_basic:
    hypo = []
    for h in hyp:
        hypo.append(" ".join(h.strip().split(' ')[1:]))
    hypotheses_modified.append(hypo)

In [256]:
hypotheses_basic = hypotheses_modified

In [426]:
references = read_ilm_references('./data/valid_inference_reference_num_subset.txt')
# new_references = convert_refs_for_bleu(references)

In [427]:
new_references = [references]

In [428]:
new_references

[['prop bike upside down. <ANS>',
  'prop bike upside down. <ANS>',
  'prop bike upside down. <ANS>',
  'prop bike upside down. <ANS>',
  'prop bike upside down. <ANS>',
  'prop bike upside down. <ANS>',
  'prop bike upside down. <ANS>',
  'prop bike upside down. <ANS>',
  'remove wheel. <ANS>',
  'remove wheel. <ANS>']]

In [678]:
scores= []
for hypothesis in hypotheses_basic:
    scores.append(sacrebleu.corpus_bleu(hypothesis, new_references, force=True).score)

In [679]:
np.mean(scores), np.std(scores)

(47.834950900413894, 2.658539775560099)

In [228]:
references = read_references("./data/test_references.txt")
n_references = []
for refs in references:
    internal = []
    for r in refs:
        internal.append(r.split())
    for i in range(5):
        n_references.append(internal)

In [238]:
split='test'
prompt='ordered'
hypotheses = read_finetuned('./outputs/generated_'+split+'_'+prompt+'_large_g16_epoch1_removed_deduplicated_ordered.txt')
print(len(hypotheses))
hypotheses_basic = [ hyp.strip().split(':')[1].lstrip() for hyp in hypotheses] #group_hypotheses(hypotheses)
hypotheses_basic[0]

25


'1. fill pan with water 2. put water in food processor 3. place cake in pan 4. turn it on 5. add butter and sugar 6. mix 7. remove from pan 8. put in to oven'

In [127]:
split='test'
prompt='expect'
hypotheses = read_finetuned('./outputs/generated_'+split+'_'+prompt+'_large_g16_epoch1_removed_deduplicated_ordered.txt')
print(len(hypotheses))
hypotheses_basic = group_hypotheses(hypotheses)
hypotheses_basic[0]

25


['1. put the water in the stove 2. put the sugar in 3. turn on the stove 4. let the sugar melt 5. pour the cake into the water 6. mix the cake and pour it into the cake molds 7. place the molds on the cake',
 '1. go to the library 2. park the car 3. find a book in a shelf 4. bring the book to the library 5. go into the library 6. sign in the appropriate area 7. find a seat 8. read the book 9. get a copy of the book 10. return the book 11. leave the library',
 '1. get in the car 2. drive to the airport 3. drive to the terminal 4. get out of car 5. wait in line 6. go through turnstiles 7. check in bags 8. check in luggage 9. go through security check in 10. board airplane',
 '1. pick a train 2. take a train 3. wait for train to arrive 4. get on a train 5. wait for train to stop',
 '1. check in 2. find a driver 3. pay for driver 4. wait for driver 5. get on the bus 6. board bus']

In [227]:
split='test'
prompt='expect'
hypotheses = read_finetuned('./outputs/generated_'+split+'_'+prompt+'_large_g16_epoch1_removed.txt')
print(len(hypotheses))
hypotheses_basic = group_hypotheses(hypotheses)
hypotheses_basic[0]

25


['1. put the water in the stove 2. put the sugar in 3. turn on the stove 4. let the sugar melt 5. pour the cake into the water 6. mix the cake and pour it into the cake molds 7. place the molds on the cake',
 '1. go to the library 2. park the car 3. find a book in a shelf 4. bring the book to the library 5. go into the library 6. sign in the appropriate area 7. find a seat 8. read the book 9. get a copy of the book 10. return the book 11. leave the library',
 '1. get in the car 2. drive to the airport 3. check in luggage 4. check in bags 5. get out of car 6. drive to the terminal 7. wait in line 8. go through security check in 9. go through turnstiles 10. board airplane',
 '1. pick a train 2. take a train 3. get on a train 4. wait for train to arrive 5. wait for train to stop',
 '1. find a driver 2. pay for driver 3. check in 4. get on the bus 5. wait for driver 6. board bus']

In [224]:
split='test'
prompt='expect'
hypotheses = read_finetuned('./outputs/generated_'+split+'_'+prompt+'_large_g16_epoch1_removed_deduplicated.txt')
print(len(hypotheses))
hypotheses_basic = group_hypotheses(hypotheses)
hypotheses_basic[0]

25


['1. put the water in the stove 2. put the sugar in 3. turn on the stove 4. let the sugar melt 5. pour the cake into the water 6. mix the cake and pour it into the cake molds 7. place the molds on the cake',
 '1. go to the library 2. park the car 3. find a book in a shelf 4. bring the book to the library 5. go into the library 6. sign in the appropriate area 7. find a seat 8. read the book 9. get a copy of the book 10. return the book 11. leave the library',
 '1. get in the car 2. drive to the airport 3. check in luggage 4. check in bags 5. get out of car 6. drive to the terminal 7. wait in line 8. go through security check in 9. go through turnstiles 10. board airplane',
 '1. pick a train 2. take a train 3. get on a train 4. wait for train to arrive 5. wait for train to stop',
 '1. find a driver 2. pay for driver 3. check in 4. get on the bus 5. wait for driver 6. board bus']

In [225]:
scores= []
for hypothesis in hypotheses_basic:
    scores.append(corpus_bleu( n_references, [hyp.split() for hyp in hypothesis], weights=(0.25,0.25,0.25,0.25), smoothing_function=chencherry.method1))

In [239]:
scores.append(corpus_bleu( n_references, [hyp.split() for hyp in hypotheses_basic], weights=(0.25,0.25,0.25,0.25), smoothing_function=chencherry.method1))

In [240]:
np.mean(scores), np.std(scores)

(0.35350554351644137, 0.10929657498666526)

In [None]:
#15.430514484397264, 1.870
#25.323855666415096, 2.04
#(0.2759140360672336, 0.027068697645586715) B4-avg valid removed
# (0.2734350847838841, 0.02327469189485561 B4- normal
# (0.27831299424721084, 0.025122564177853777) B4- removed dedup B3- (0.2111375244700135, 0.029565576897924092)
# b2- (0.5196490011386177, 0.04002134748839136) B1- (0.8341912815176438, 0.051354008778274646)
# (0.27300572288236225, 0.0313929603326291) b4 - all valid B1-(0.8341912815176438, 0.051354008778274646)
# B2- (0.5149576588818612, 0.038928404112347284) B3- (0.2111438463900995, 0.031770312853950704)

# Calculate Iterative BLEU

In [37]:
NUM_ITERATIONS=10
references = read_references("./data/valid_references.txt")
new_references = convert_refs_for_bleu(references)
for i in range(NUM_ITERATIONS):
    hypotheses = read_finetuned("./outputs/iterative/output"+str(i+1)+".txt")
    new_outs = replace_blanks(hypotheses)
    hypotheses_basic = group_hypotheses(new_outs)
    scores= []
    for hypothesis in hypotheses_basic:
        scores.append(sacrebleu.corpus_bleu(hypothesis, new_references, force=True).score)
    print(i+1, np.mean(scores), np.std(scores))

1 45.912540297281296 3.4403952558548094
2 45.56947622190687 3.4204548515571287
3 45.12694655547445 2.447968386056204
4 43.976455822976774 3.0820645781642146
5 41.817718432594965 4.451037005749699
6 40.68770212913243 4.920513065481337
7 40.40053468683705 4.364671329507598
8 37.89160120977187 5.57103195401114
9 38.97107484970193 3.9669088378543225
10 37.382091787598405 3.5880537553788336


# Sentence BLEU Iterative

In [312]:
references = read_references("./data/test_references.txt")

hypotheses = read_finetuned("./outputs/generated_test_basic_large_g16_epoch1_removed.txt")
#new_outs = replace_blanks(hypotheses)
new_outs = hypotheses
hypotheses_basic = group_hypotheses(new_outs)
scores= []
for hypothesis in hypotheses_basic:
    for idx, hyp in enumerate(hypothesis):
        new_references = [[r] for r in references[idx]]
        print(hyp, sacrebleu.corpus_bleu(hyp, new_references).score)

1. fill pan with water 2. put oil in pan 3. turn on burner on stove 4. put cake on pan 5. wait for it to harden 6. put butter in pan 7. add sugar 8. beat with a spoon 9. pour batter in pan 10. let cool for at least 30 minutes 11. eat cake 36.41462028372938
1. go to the library 2. purchase a book 3. take a copy to the library 4. sit down in the library 5. read the book 55.93462684156708
1. get in the car 2. drive to the airport 3. check in 4. get your ticket 5. go to the boarding gate 6. wait 7. board the airplane 74.16327246130167
1. check where you're going to be going. 2. take a seat. 3. find a good conductor. 4. wait for train. 5. sit and enjoy the ride. 56.46714979884917
1. find a seat 2. put on seatbelt 3. get in bus 4. find destination 38.0712823888797
1. get a loaf of bread 2. get a pan 3. get butter, salt and sugar 4. place the pan on the stove 5. turn the stove on 6. put the bread in the pan 7. put some sugar and butter in the pan 8. turn on the stove 9. cook the cake 10. take

In [55]:
new_references = [[r] for r in references[0]]
sacrebleu.corpus_bleu(hypotheses_basic[0][0],new_references).score

45.07765970283899

In [50]:
hypotheses_basic[0][0]

'1. drive to the salon 2. pay for your hair 3. get your haircut 4. get out of car 5. walk to the hair bar 6. sit down 7. wait for your cut 8. get hair cut 9. start brushing  10. wash hair with shampoo 11. dry hair with towel 12. put hair back on'

In [314]:
references[0][0]

'1. look up a recipe 2. purchase ingredients 3. set out ingredients 4. preheat oven 5. being adding ingredients 6. mix 7. pour batter into pan 8. place pan into oven 9. set a timer 10. remove cake 11. frost cake 12. set cake to cool 13. enjoy a slice of cake 14. enjoy another slice of cake 15. place remainder in microwave (to store, not to cook)'

# Event level BLEU

In [264]:
dict_script = {}
dict_script["bake a cake"]="baking a cake"
dict_script["borrow a book from the library"]="borrowing a book from the library"
dict_script["change batteries in an alarm clock"]="changing batteries in an alarm clock"
dict_script["fly in an airplane"]="flying in an airplane"
dict_script["get a hair cut"]="getting a hair cut"
dict_script["go grocery shopping"]="going grocery shopping"
dict_script["go on a train"]="going on a train"
dict_script["plant a tree"]="planting a tree"
dict_script["repair a flat bicycle tire"]="repairing a flat bicycle tire"
dict_script["ride on a bus"]="riding on a bus"
dict_script["take a bath"]="taking a bath"
dict_script["order fastfood online"]= "ordering fastfood online"
dict_script["cook in a microwave"]="cooking in a microwave"
dict_script["answer telephone"]="answering telephone"
dict_script["buy from a vending machine"]="buying from a vending machine"
dict_script["tie shoe laces"]="tying shoe laces"
dict_script["brush teeth"]="brushing teeth"
dict_script["make ginger paste"]="making ginger paste"
dict_script["go for a wedding"]="going for a wedding"
dict_script["attend a wedding"]="attending a wedding"
dict_script["wash a car"]="washing a car"
dict_script["take out trash"]="taking out trash"
dict_script["take a taxi"]="taking a taxi"
dict_script["surf the internet"]="surfing the internet"
dict_script["watch television"] = "watching television"
dict_script["go to a club to dance"]="going to a club to dance"

In [153]:
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction
chencherry = SmoothingFunction()

def event_level_bleu_metric(in_path, scenarios):
    with open(in_path) as f:
        lines = f.readlines()
        precision = []
        coverage = []
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].rstrip(' <EOS>')
#             print(scenario)
            script = splitted[0].strip().replace("<BOS> here is an ordered sequence of events that occur when you ","")
#             print(splitted[0].strip())
#             script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>') # direct
#             script = splitted[0].strip().replace("<BOS> these are the things that happen when you ","")
#             script = splitted[0].rstrip(' <ESCR>').replace("<BOS> <SCR> ","") # for tokens and all_tokens
#             script = splitted[0].strip().replace("<BOS> describe ","") # for describe
#             script = script.replace(" in small sequences of short sentences","") #for describe
            new_scenario = script + ": "
            scenario = re.sub(r'\d+[.]', '</bevent> <bevent>', scenario)
            scenario = re.sub(r'<EEVENT>', '</bevent>', scenario)
            scenario = scenario + '</bevent>'
            scenario = scenario.strip().lstrip('</bevent>')
            soup = BeautifulSoup(scenario)
#             print(scenario, soup)
            events = []
            for a in soup.find_all('bevent'):
                events.append(a.string.strip())
            
            
            bleus=[]
            labels=[]
            for i in range(len(events)):
                max_bleu = -1
                max_label = ""
                if script in scenarios:
                    event_label = scenarios[script]
                else:
                    event_label = scenarios[dict_script[script]]
                for label in event_label:
                    bleu = sentence_bleu([ref.split() for ref in event_label[label]], events[i].strip().split(), weights=(0.25,0.25,0.25,0.25),smoothing_function=chencherry.method1)
                    #print(bleu)
                    if bleu > max_bleu:
                        max_bleu = bleu
                        max_label = label
                bleus.append(max_bleu)
                labels.append(max_label)
#                 print(events,bleus, max_label)
            coverage.append(len(list(set(labels)))/ len(list(set(event_label))))
            precision.append(np.mean(bleus))
        return precision, coverage
    
def event_level_bleu_metric_gt(script,lines, scenarios):
    coverage = []
    precision = []
#     print(len(lines))
    for scenario in lines:
        scenario = " ".join(scenario)
        scenario = re.sub(r'\d+[.]', '</bevent> <bevent>', scenario)
        scenario = re.sub(r'<EEVENT>', '</bevent>', scenario)
        scenario = scenario + '</bevent>'
        scenario = scenario.strip().lstrip('</bevent>')
        soup = BeautifulSoup(scenario)
#         print(scenario, soup)
        events = []
        for a in soup.find_all('bevent'):
            events.append(a.string.strip())
        bleus=[]
        labels=[]
        for i in range(len(events)):
            max_bleu = -1
            max_label = ""
            if script in scenarios:
                event_label = scenarios[script]
            else:
                event_label = scenarios[dict_script[script]]
            for label in event_label:
                bleu = sentence_bleu([ref.split() for ref in event_label[label]], events[i].strip().split(), weights=(0.25,0.25,0.25,0.25),smoothing_function=chencherry.method1)
                #print(bleu)
                if bleu > max_bleu:
                    max_bleu = bleu
                    max_label = label
            bleus.append(max_bleu)
            labels.append(max_label)
#                 print(events,bleus, max_label)
        coverage.append(len(list(set(labels)))/ len(list(set(event_label))))
        precision.append(np.mean(bleus))
    return precision, coverage
    

In [43]:
import json
with open('paraphrase.json') as f:
    scenarios = json.load(f)

In [49]:
#scenarios["riding on a bus"]

In [97]:
split='test'
prompt='ordered'
precision, coverage = event_level_bleu_metric('./outputs/generated_'+split+'_'+prompt+'_large_g16_epoch1.txt', scenarios)
precision1, coverage1 = event_level_bleu_metric('./outputs/generated_'+split+'_'+prompt+'_large_g16_epoch1_removed.txt', scenarios)
precision2, coverage2 = event_level_bleu_metric('./outputs/generated_'+split+'_'+prompt+'_large_g16_epoch1_removed_deduplicated.txt', scenarios)
#precision3 = event_level_bleu_metric('./outputs/generated_test_basic_large_g16_epoch1_removed_deduplicated_ordered.txt', scenarios)

In [168]:
precision, coverage = event_level_bleu_metric_gt('riding on a bus', n_references[4] ,scenarios)

In [169]:
print(np.mean(coverage), np.std(coverage))
# print(np.mean(coverage1), np.std(coverage1))
# print(np.mean(coverage2), np.std(coverage2))

0.3 0.06030226891555274


In [99]:
# B2
# 0.5983247099674841
# 0.6070936703721587
# 0.6023631342416226
# B1
# 0.8354362067915369
# 0.8396424111724123
# 0.8380486126849779
# B3
# 0.40014614710363317
# 0.40995982231730843
# 0.40171167046915657
# B4
# 0.13513015022015026
# 0.13706055315055318
# 0.13702165871165872
# B-Avg
# 0.34320921226025797
# 0.3484697176851206
# 0.345384799039688
print(np.mean(precision))
print(np.mean(precision1))
print(np.mean(precision2))

0.35864920454615157
0.36171876611805254
0.35902300884906396


In [869]:
# B-Avg valid
# 0.2964533926041073
# 0.30100009824419344
# 0.29913890204862414
# B1
# 0.8085896734621855
# 0.8241745783957904
# 0.8229712113924234
# B2
# 0.5552555777555778
# 0.5663572131572132
# 0.5666386946386947
# B3
# 0.27870865333234757
# 0.2840351926790629
# 0.2833808716914086
# B4
# 0.12213230241968633
# 0.12294634122039179
# 0.1197574523315029

# Rouge Calculation 

In [228]:
import files2rouge

In [243]:
def write_ref_file(refs, file_path='./ref.txt'):
    with open(file_path, 'w') as f:
        for ref in refs:
            f.write("{}\n".format(ref.strip()))

def write_hyp_file(hyps, file_path='./hyp.txt'):
    with open(file_path, 'w') as f:
        for hyp in hyps:
            f.write("{}\n".format(hyp.strip()))
            
def average_rouge(score):
    new_dict = {'rouge-1': {'f':0, 'p':0, 'r':0}, 'rouge-2': {'f':0, 'p':0, 'r':0},'rouge-l': {'f':0, 'p':0, 'r':0}}
    rouge1_f = []
    rouge1_p = []
    rouge1_r = []
    rouge2_f = []
    rouge2_p = []
    rouge2_r = []
    rougel_f = []
    rougel_p = []
    rougel_r = []
    for s in score:
        rouge1_f.append(s['rouge-1']['f'])
        rouge1_p.append(s['rouge-1']['p'])
        rouge1_r.append(s['rouge-1']['r'])
        rouge2_f.append(s['rouge-2']['f'])
        rouge2_p.append(s['rouge-2']['p'])
        rouge2_r.append(s['rouge-2']['r'])
        rougel_f.append(s['rouge-l']['f'])
        rougel_p.append(s['rouge-l']['p'])
        rougel_r.append(s['rouge-l']['r'])
    new_dict['rouge-1']['f'] = np.mean(rouge1_f)
    new_dict['rouge-1']['p'] = np.mean(rouge1_p)
    new_dict['rouge-1']['r'] = np.mean(rouge1_r)
    new_dict['rouge-2']['f'] = np.mean(rouge2_f)
    new_dict['rouge-2']['p'] = np.mean(rouge2_p)
    new_dict['rouge-2']['r'] = np.mean(rouge2_r)
    new_dict['rouge-l']['f'] = np.mean(rougel_f)
    new_dict['rouge-l']['p'] = np.mean(rougel_p)
    new_dict['rouge-l']['r'] = np.mean(rougel_r)
    return new_dict

In [296]:
hypotheses = read_finetuned('./outputs/generated_test_basic_large_g16_epoch1_removed.txt')
print(len(hypotheses))
hypotheses_basic = group_hypotheses(hypotheses)
hypotheses_basic[0]

25


['1. fill pan with water 2. put oil in pan 3. turn on burner on stove 4. put cake on pan 5. wait for it to harden 6. put butter in pan 7. add sugar 8. beat with a spoon 9. pour batter in pan 10. let cool for at least 30 minutes 11. eat cake',
 '1. go to the library 2. purchase a book 3. take a copy to the library 4. sit down in the library 5. read the book',
 '1. get in the car 2. drive to the airport 3. check in 4. get your ticket 5. go to the boarding gate 6. wait 7. board the airplane',
 "1. check where you're going to be going. 2. take a seat. 3. find a good conductor. 4. wait for train. 5. sit and enjoy the ride.",
 '1. find a seat 2. put on seatbelt 3. get in bus 4. find destination']

In [297]:
from rouge import Rouge 
rouge = Rouge()

scores = []
for hyps in hypotheses_basic:
    score = []
    for refs in new_references:
        score.append(rouge.get_scores(hyps, refs, avg=True))
    scores.append(average_rouge(score))

In [298]:
average_rouge(scores)

{'rouge-1': {'f': 0.4045293492369294,
  'p': 0.49325946865085796,
  'r': 0.3825668463623669},
 'rouge-2': {'f': 0.08401757846553003,
  'p': 0.10383870702386575,
  'r': 0.07852581671893599},
 'rouge-l': {'f': 0.44978031791239825,
  'p': 0.5407590209028927,
  'r': 0.4149776380076853}}

# CSR Length Analysis

In [None]:
# test length analysis
with open('./scripts/data/test_references.txt') as f:
    lines = f.readlines()
    length = []
    for line in lines:
        x = ast.literal_eval(line)
        for hyp in x:
            length.append(len(" ".join(hyp).split()))

In [3]:
# train length analysis
with open('./data/train.txt') as f:
    lines = f.readlines()
    length = []
    for line in lines:
        length.append(len(line.strip().split()))

In [6]:
np.mean(length), np.std(length)

(61.28566552901024, 19.690557480899493)

In [163]:
with open("./data/train_all_tokens.txt") as f:
    lines = f.readlines()
    length = []
    counts = []
    for scenario in lines:
        scenario = scenario.split(":")[1]
        scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
        soup = BeautifulSoup(scenario)
        #print(soup, scenario)
        events = []
        count=0
        for a in soup.find_all('bevent'):
            events.append(a.string)
        for e in events:
            if e is not None:
                count+=1
                length.append(len(e.strip().split()))
        counts.append(count)

print(np.mean(length), np.std(length))
print(np.mean(counts), np.std(counts))

4.646803227808815 2.464063799834829
8.247440273037542 2.7205202901583116


# Add addditional tokens to GPT2

In [202]:
from pytorch_transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
special_tokens_dict = {'bos_token': '<BOS>', 'eos_token': '<EOS>', 'pad_token': '<PAD>', 'sep_token': '<SEP>', 'additional_special_tokens': ['<ANS>','<BLK>','<SCR>','<ESCR>', '<BEVENT>', '<EEVENT>']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

tokenizer.save_pretrained('./scripts/gpt-tokenizer/')

# Add addditional tokens to Roberta

In [1]:
from pytorch_transformers import RobertaTokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')
special_tokens_dict = {'additional_special_tokens': ['<ANS>','<BLK>','<SCR>','<ESCR>', '<BEVENT>', '<EEVENT>']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

tokenizer.save_pretrained('./roberta-tokenizer/')

('./roberta-tokenizer/vocab.json',
 './roberta-tokenizer/merges.txt',
 './roberta-tokenizer/special_tokens_map.json',
 './roberta-tokenizer/added_tokens.json')

In [17]:
from pytorch_transformers import RobertaTokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')
special_tokens_dict = {'additional_special_tokens': ['[ANS]','[BLK]','[SCR]','[ESCR]', '[BEVENT]', '[EEVENT]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

tokenizer.save_pretrained('./roberta-tokenizer-new/')

('./roberta-tokenizer-new/vocab.json',
 './roberta-tokenizer-new/merges.txt',
 './roberta-tokenizer-new/special_tokens_map.json',
 './roberta-tokenizer-new/added_tokens.json')

In [13]:
x = [    0, 10859,    16,    10, 13931,    9, 1061,    14,  1369,   150, 14814,    10,
   8492,    35,   112,     4,   356,    62,    10, 10324,   132,     4,  2229,  7075,
    155,     4,   278,   66,  7075,   204,     4,  1198, 25978, 12941,   195,     4,
    145,  1271,  7075,   231,     4, 50266,   406,     4,  9650, 15867,    88,  5730,
    290,     4,   317,  5730,    88, 12941,   361,     4,   278,    10, 35809,   158,
      4,  3438,  8492,   365,     4, 18082,  8492,   316,     4,   278,  8492,     7,
   3035,   508,     4, 50266,  1570,     4,  2254,   277, 15711,     9,  8492,   379,
      4,   317, 11059,    11, 28562,    36,   560,  1400,     6,    45,     7,  7142,
     43,     2,   225, 20768,    10, 15711,     9,  8492,     2, 39915,     2,     1]

In [535]:
from pytorch_transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
special_tokens_dict = { 'cls_token': '<s>','sep_token': '</s>','additional_special_tokens': ['<ANS>','<BLK>','<SCR>','<ESCR>', '<BEVENT>', '<EEVENT>']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)


In [539]:
tokenizer.save_pretrained('./bert-tokenizer/')

('./bert-tokenizer/vocab.txt',
 './bert-tokenizer/special_tokens_map.json',
 './bert-tokenizer/added_tokens.json')

In [609]:
import torch
a,b =torch.max(torch.tensor([[0.5,0.4]]),1)
print(a,b)

tensor([0.5000]) tensor([0])


# Mask Sentences

In [344]:
np.random.seed(42)
def mask_sentences(in_path, out_path, prob=0.15):
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(":")
            scenario = splitted[1].rstrip('<EOS>')
            script = splitted[0].lstrip('<BOS> <SCR> ').rstrip('<ESCR>')
            new_scenario = "<BOS> here is a sequence of events that happen while " + script.strip() + ": "
            answer = "<SEP> "
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            if len(events)>0:
                idx=0
                for e in events:
                    if e is not None:
                        mask = False
                        if np.random.uniform(0,1) <= prob:
                            mask = True
                        if mask:
                            new_scenario += "<BLK> "
                            answer += str(idx+1) + ". "+ e.strip() + " <ANS> "
                        else:
                            new_scenario += str(idx+1) + ". " + e.strip() + " "
                        idx+=1
            o.write("{}\n".format(new_scenario + answer + "<EOS>"))
            
def mask_sentences_num(in_path, out_path, prob=0.15):
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(":")
            scenario = splitted[1].rstrip('<EOS>')
            script = splitted[0].lstrip('<BOS> <SCR> ').rstrip('<ESCR>')
            new_scenario = "<BOS> here is a sequence of events that happen while " + script.strip() + ": "
            answer = "<SEP> "
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            if len(events)>0:
                idx=0
                for e in events:
                    if e is not None:
                        mask = False
                        if np.random.uniform(0,1) <= prob:
                            mask = True
                        if mask:
                            new_scenario += str(idx+1) + ". <BLK> "
                            answer +=  e.strip() + " <ANS> "
                        else:
                            new_scenario += str(idx+1) + ". " + e.strip() + " "
                        idx+=1
            o.write("{}\n".format(new_scenario + answer + "<EOS>"))

def mask_sentences_tokens(in_path, out_path, prob=0.15):
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(":")
            scenario = splitted[1].rstrip('<EOS>')
            script = splitted[0].lstrip('<BOS> <SCR> ').rstrip('<ESCR>')
            new_scenario = "<BOS> here is a sequence of events that happen while " + script.strip() + ": "
            answer = "<SEP> "
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            if len(events)>0:
                idx=0
                for e in events:
                    if e is not None:
                        mask = False
                        if np.random.uniform(0,1) <= prob:
                            mask = True
                        if mask:
                            new_scenario += "<BEVENT> <BLK> <EEVENT> "
                            answer +=  e.strip() + " <ANS> "
                        else:
                            new_scenario += "<BEVENT> " + e.strip() + " <EEVENT> "
                        idx+=1
            o.write("{}\n".format(new_scenario + answer + "<EOS>"))

In [345]:
mask_sentences_tokens("./data/train_all_tokens.txt","./data/train_ilm_num_masked.txt")
mask_sentences_tokens("./data/valid_all_tokens.txt","./data/valid_ilm_num_masked.txt")

In [198]:
def inference_mask_dataset(in_path, out_path, ref_path):
    with open(in_path) as f, open(out_path, 'w') as o, open(ref_path, 'w') as r:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(":")
            scenario = splitted[1].rstrip('<EOS>')
            script = splitted[0].lstrip('<BOS> <SCR> ').rstrip('<ESCR>')
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            
            for blank_id in range(len(events)):
                new_scenario = "<BOS> here is a sequence of events that happen while " + script.strip() + ": "
                answer = "<SEP> "
                reference = ""
                idx=0
                for e in events:
                    if e is not None:
                        if idx==blank_id:
                            reference += e.strip() + " <ANS> "
                            new_scenario += str(idx+1) + ". <BLK> "
                        else:
                            new_scenario += str(idx+1) + ". " + e.strip() + " "
                        idx+=1
                        if idx>=(blank_id+1):
                            o.write("{}\n".format(new_scenario + answer))
                            r.write("{}\n".format(reference + "<EOS>"))
                    

In [200]:
inference_mask_dataset("./data/valid_all_tokens.txt","./data/valid_inference_ilm_num.txt", "./data/valid_inference_reference_num.txt")

# Prepare data for Classification

In [86]:
from itertools import combinations 
np.random.seed(42)

def classification_data_with_num(lines, out_path):
    with open(out_path, 'w') as o:
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario_answer = splitted[1].strip()
            script = splitted[0].strip()
            scenario = scenario_answer.split("</s>")[0]
            answer = "</s> " + scenario_answer.split("</s>")[1].strip() + " </s> " + scenario_answer.split("</s>")[2].strip()+ " "
            new_scenario = script + ": "
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>').replace('<BLK>', '[BLK]')
            soup = BeautifulSoup(scenario)    
#             print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string.replace('[BLK]', '<BLK>'))
#             print(events)
            if len(events)>0:
                for idx, e in enumerate(events):
                    new_scenario += str(idx+1)+ ". " + e.strip() + " "
                o.write("{}\n".format(new_scenario + answer))

                    
def classification_data(in_path, out_path):
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].rstrip('<EOS>')
            script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>')
            new_scenario = "here is a sequence of events that happen while " + script.strip() + ": "
            answer = ""
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            label = np.random.randint(0,2)
            first=True
            advance_event = ""
            if len(events)>0:
                eids = np.random.choice(len(events),2, replace=False)
                for idx, e in enumerate(events):
                    #print(idx, eids)
                    if idx in eids:
                        new_scenario += "<BEVENT> <BLK> <EEVENT> "
                        if label==1 and first:
                            answer +=  "</s> " + e.strip() + " "
                        elif label==0 and first:
                            advance_event = "</s> " + e.strip() + " "
                        else:
                            answer += "</s> " + e.strip() + " "
                        first = False
                    else:
                        new_scenario += "<BEVENT> " + e.strip() + " <EEVENT> "
                answer += advance_event
                o.write("{}\n".format(new_scenario + answer + str(label)))

            
def classification_all_combinations_data(in_path, out_path):
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].rstrip('<EOS>')
            script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>')
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            
            all_combinations = combinations(range(len(events)), 2)
            if len(events)>0:
                for eids in all_combinations:
                    new_scenario = "here is a sequence of events that happen while " + script.strip() + ": "
                    answer = ""
                    label = np.random.randint(0,2)
                    first=True
                    advance_event = ""
                    for idx, e in enumerate(events):
                        if idx in list(eids):
                            new_scenario += "<BEVENT> <BLK> <EEVENT> "
                            if label==1 and first:
                                answer +=  "</s> " + e.strip() + " "
                            elif label==0 and first:
                                advance_event = "</s> " + e.strip() + " "
                            else:
                                answer += "</s> " + e.strip() + " "
                            first = False
                        else:
                            new_scenario += "<BEVENT> " + e.strip() + " <EEVENT> "
                    answer += advance_event
                    o.write("{}\n".format(new_scenario + answer + str(label)))

def classification_all_combinations_partial_data(in_path, out_path):
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].rstrip('<EOS>')
            script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>')
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            
            all_combinations = combinations(range(len(events)), 2)
            if len(events)>0:
                for eids in all_combinations:
                    new_scenario = script.strip() + ": "
                    answer = ""
                    label = np.random.randint(0,2)
                    first=True
                    advance_event = ""
                    for idx, e in enumerate(events):
                        if idx in list(eids):
                            if label==1 and first:
                                answer +=  "</s> " + e.strip() + " "
                            elif label==0 and first:
                                advance_event = "</s> " + e.strip() + " "
                            else:
                                answer += "</s> " + e.strip() + " "
                            first = False
                    answer += advance_event
                    o.write("{}\n".format(new_scenario + answer + str(label)))

In [126]:
# classification_data("./data/valid_all_tokens.txt","./data/valid_classification.txt")
# classification_data("./data/train_all_tokens.txt","./data/train_classification.txt")
# classification_all_combinations_data("./data/valid_all_tokens.txt","./data/valid_classification_all.txt")
classification_all_combinations_partial_data("./data/train_all_tokens.txt","./data/train_classification_partial_context_all.txt")
# classification_all_combinations_partial_data("./data/valid_all_tokens.txt","./data/valid_classification_partial_context_all.txt")

In [575]:
with open("./data/train_classification.txt") as f:
    lines = f.readlines()

In [593]:
with open("./data/train_classification.txt") as f:
    lines = f.readlines()
classification_data_with_num(lines,"./data/train_classification_with_num.txt")
with open("./data/valid_classification.txt") as f:
    lines = f.readlines()
classification_data_with_num(lines,"./data/valid_classification_with_num.txt")
with open("./data/test_classification.txt") as f:
    lines = f.readlines()
classification_data_with_num(lines, "./data/test_classification_with_num.txt")

# Prepare test data for classification

In [171]:
references = read_references("./data/test_tokens_references.txt")

In [179]:
with open("./data/test_tokens.txt") as f:
    scenarios = f.readlines()

In [180]:
lines = []
for idx, ref in enumerate(references):
    for r in ref:
        lines.append(scenarios[idx].strip() + " " + r )

In [181]:
lines[0]

'<BOS> <SCR> baking a cake <ESCR>: <BEVENT> look up a recipe <EEVENT> <BEVENT> purchase ingredients <EEVENT> <BEVENT> set out ingredients <EEVENT> <BEVENT> preheat oven <EEVENT> <BEVENT> being adding ingredients <EEVENT> <BEVENT> mix <EEVENT> <BEVENT> pour batter into pan <EEVENT> <BEVENT> place pan into oven <EEVENT> <BEVENT> set a timer <EEVENT> <BEVENT> remove cake <EEVENT> <BEVENT> frost cake <EEVENT> <BEVENT> set cake to cool <EEVENT> <BEVENT> enjoy a slice of cake <EEVENT> <BEVENT> enjoy another slice of cake <EEVENT> <BEVENT> place remainder in microwave (to store, not to cook) <EEVENT>'

In [283]:
def test_classification_data(lines, out_path):
    # input is test_tokens_references
    with open(out_path, 'w') as o:
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].strip(' ')
#             print(scenario)
            script = splitted[0].strip(' ').lstrip('<BOS>').strip(' ')
            new_scenario = script.strip() + ": "
            answer = ""
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
#             print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            label = np.random.randint(0,2)
            first=True
            advance_event = ""
            if len(events)>0:
                eids = np.random.choice(len(events),2, replace=False)
                for idx, e in enumerate(events):
                    #print(idx, eids)
                    if idx in eids:
                        new_scenario += "<BEVENT> <BLK> <EEVENT> "
                        if label==1 and first:
                            answer +=  "</s> " + e.strip() + " "
                        elif label==0 and first:
                            advance_event = "</s> " + e.strip() + " "
                        else:
                            answer += "</s> " + e.strip() + " "
                        first = False
                    else:
                        new_scenario += "<BEVENT> " + e.strip() + " <EEVENT> "
                answer += advance_event
                o.write("{}\n".format(new_scenario + answer + str(label)))

            
def test_classification_all_combinations_data(lines, out_path):
    # input is all tokens references with test.txt 
    with open(out_path, 'w') as o:
        for scenario in lines:
            splitted = scenario.split(": ")
            script = splitted[0].strip(' ').lstrip('<BOS>').strip(' ')
            scenario = splitted[1].strip(' ')
            new_scenario = script.strip(' ') + ": "
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            
            all_combinations = combinations(range(len(events)), 2)
            if len(events)>0:
                for eids in all_combinations:
                    new_scenario = "here is a sequence of events that happen while " + script.strip() + ": "
                    answer = ""
                    label = np.random.randint(0,2)
                    first=True
                    advance_event = ""
                    for idx, e in enumerate(events):
                        if idx in list(eids):
                            new_scenario += "<BEVENT> <BLK> <EEVENT> "
                            if label==1 and first:
                                answer +=  "</s> " + e.strip() + " "
                            elif label==0 and first:
                                advance_event = "</s> " + e.strip() + " "
                            else:
                                answer += "</s> " + e.strip() + " "
                            first = False
                        else:
                            new_scenario += "<BEVENT> " + e.strip() + " <EEVENT> "
                    answer += advance_event
                    o.write("{}\n".format(new_scenario + answer + str(label)))

def test_classification_all_combinations_partial_data_from_lines(lines, out_path):
    # input is references from test_token_references with test.txt
    with open(out_path, 'w') as o:
        for scenario in lines:
            splitted = scenario.strip(' ').split(": ")
            scenario = splitted[1].strip()
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            script = splitted[0].strip().replace("<BOS> here is a sequence of events that happen while ","")
            new_scenario = script + ": "
            answer = "<SEP> "
            soup = BeautifulSoup(scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            
            all_combinations = combinations(range(len(events)), 2)
            if len(events)>0:
                for eids in all_combinations:
                    new_scenario = script.strip() + ": "
                    answer = ""
                    label = np.random.randint(0,2)
                    first=True
                    advance_event = ""
                    for idx, e in enumerate(events):
                        if idx in list(eids):
                            if label==1 and first:
                                answer +=  "</s> " + e.strip() + " "
                            elif label==0 and first:
                                advance_event = "</s> " + e.strip() + " "
                            else:
                                answer += "</s> " + e.strip() + " "
                            first = False
                    answer += advance_event
                    o.write("{}\n".format(new_scenario + answer + str(label)))
                    
def test_classification_all_combinations_partial_data(in_path, out_path, prompt):
    # input is generated output in numbered form
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].rstrip(' <EOS>')
            if prompt=='direct':
                script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>') # direct
            elif prompt=='describe':
                script = splitted[0].strip().replace("<BOS> describe ","") # for describe
                script = script.replace(" in small sequences of short sentences","") #for describe 
            elif prompt=='expect':
                script = splitted[0].strip().replace("<BOS> these are the things that happen when you ","") # expect
                script = dict_script[script]
            elif prompt=='ordered':
                script = splitted[0].strip().replace("<BOS> here is an ordered sequence of events that occur when you ","")
                script = dict_script[script]
            elif prompt=='basic':
                script = splitted[0].strip().replace("<BOS> here is a sequence of events that happen while ","")
            else:
                script = splitted[0].rstrip(' <ESCR>').replace("<BOS> <SCR> ","")
            new_scenario = script + ": "
            answer = "<SEP> "
            scenario = re.sub(r'\d+[.]', '</bevent> <bevent>', scenario)
            scenario = re.sub(r'<EEVENT>', '</bevent>', scenario)
            scenario = scenario + '</bevent>'
            scenario = scenario.strip().lstrip('</bevent>')
            soup = BeautifulSoup(scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            
            all_combinations = combinations(range(len(events)), 2)
            if len(events)>0:
                for eids in all_combinations:
                    new_scenario = script.strip() + ": "
                    answer = ""
                    label = np.random.randint(0,2)
                    first=True
                    advance_event = ""
                    for idx, e in enumerate(events):
                        if idx in list(eids):
                            if label==1 and first:
                                answer +=  "</s> " + e.strip() + " "
                            elif label==0 and first:
                                advance_event = "</s> " + e.strip() + " "
                            else:
                                answer += "</s> " + e.strip() + " "
                            first = False
                    answer += advance_event
                    o.write("{}\n".format(new_scenario + answer + str(label)))

def evaluation_consecutive_ordering_data(in_path, out_path, prompt):
    # input is generated output in numbered form
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].rstrip(' <EOS>')
            if prompt=='direct':
                script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>') # direct
            elif prompt=='describe':
                script = splitted[0].strip().replace("<BOS> describe ","") # for describe
                script = script.replace(" in small sequences of short sentences","") #for describe 
            elif prompt=='expect':
                script = splitted[0].strip().replace("<BOS> these are the things that happen when you ","") # expect
                script = dict_script[script]
            elif prompt=='ordered':
                script = splitted[0].strip().replace("<BOS> here is an ordered sequence of events that occur when you ","")
                script = dict_script[script]
            elif prompt=='basic':
                script = splitted[0].strip().replace("<BOS> here is a sequence of events that happen while ","")
            else:
                script = splitted[0].rstrip(' <ESCR>').replace("<BOS> <SCR> ","")

            scenario = re.sub(r'\d+[.]', '</bevent> <bevent>', scenario)
            scenario = re.sub(r'<EEVENT>', '</bevent>', scenario)
            scenario = scenario + '</bevent>'
            scenario = scenario.strip().lstrip('</bevent>')
            soup = BeautifulSoup(scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            
            if len(events)>0:
                for i in range(len(events)-1):
                    new_scenario = script.strip() + ";"
                    answer = events[i].strip() + ";" + events[i+1].strip()
                    o.write("{}\n".format(new_scenario + answer))



In [29]:
# test_classification_data(lines, "./data/test_classification.txt")
# test_classification_data(lines, "./data/test_classification_all.txt")

In [187]:
test_classification_all_combinations_partial_data("./outputs/generated_valid_basic_large_g16_epoch1.txt","./data/valid_classification_basic_output.txt")

In [263]:
prompt="describe"
test_classification_all_combinations_partial_data("./outputs/generated_valid_"+prompt+"_large_g16_epoch1_removed_deduplicated.txt","./data/valid_classification_"+prompt+"_output_removed_deduplicated.txt",prompt)

In [125]:
test_classification_all_combinations_partial_data_from_lines(lines,"./data/test_classification_partial_context_all.txt")

In [271]:
prompt='all_tokens'
test_classification_all_combinations_partial_data("./outputs/generated_test_"+prompt+"_large_g16_epoch1_removed_deduplicated.txt","./data/test_classification_"+prompt+"_output_removed_deduplicated.txt", prompt)

# New Scenarios

In [319]:
test_classification_all_combinations_partial_data("./outputs/folds/generated_new_scenarios_basic_large_fold1_g16_epoch1_removed_deduplicated.txt","./data/folds/test_classification_new_scenarios_basic_large_fold1_g16_removed_deduplicated.txt",prompt)

# Manual Evaluation Data

In [131]:
prompt='ordered'
split="test"
evaluation_consecutive_ordering_data("./outputs/generated_"+ split +"_"+prompt+"_large_g16_epoch1_removed_deduplicated_ordered.txt","./data/"+split+"_ordering.txt")



In [180]:
evaluation_consecutive_ordering_data("./outputs/generated_new_scenarios_ordered.txt","./data/new_scenarios_ordering.txt")


# Classfication data analysis

In [511]:
def read_data(data_path):
    with open(data_path, 'r') as f:
        lines = f.readlines()
        inputs = []
        labels = []
        for line in lines:
            inputs.append(line.strip()[0:-1].strip())
            labels.append(int(line.strip()[-1]))
    return inputs, labels  

In [514]:
_, labels = read_data('./data/valid_classification.txt')

In [515]:
from collections import Counter
x = Counter(labels)
x.keys(), x.values()

(dict_keys([0, 1]), dict_values([124, 126]))

# Prepare iterative data

In [436]:
hypotheses = read_finetuned("./outputs/generated_valid_basic_large_g16_epoch1.txt")

In [438]:
hypotheses[0]

'<BOS> here is a sequence of events that happen while getting a hair cut: 1. drive to the salon 2. pay for your hair 3. get your haircut 4. get out of car 5. walk to the hair bar 6. sit down 7. wait for your cut 8. get hair cut 9. get shampoo 10. wash hair with shampoo 11. dry hair with towel 12. put hair back on'

In [279]:
import re
def iterative_mask_sentences_num(out_path, lines):
    with open(out_path, 'w') as o:
        for scenario in lines:
            splitted = scenario.split(":")
            scenario = splitted[1].rstrip('<EOS>')
            script = splitted[0].strip()
            new_scenario = script + ": "
            answer = "<SEP> "
            scenario = re.sub(r'\d+[.]', '</bevent> <bevent>', scenario)
            scenario = scenario + '</bevent>'
            scenario = scenario.strip().lstrip('</bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            blk = np.random.randint(0,len(events))
            if len(events)>0:
                for idx, e in enumerate(events):
                    if e is not None:
                        if idx==blk:
                            new_scenario += str(idx+1) + ". <BLK> "
                            #answer +=  e.strip() + " <ANS> "
                        else:
                            new_scenario += str(idx+1) + ". " + e.strip() + " "
            o.write("{}\n".format(new_scenario + answer.strip()))

In [451]:
iterative_mask_sentences_num("./outputs/iterative/input1.txt", hypotheses)

# Convert into glue

In [561]:
import csv
with open("./data/valid_classification_no_context.txt") as f, open("../GPT/transformers/glue/RTE/dev.tsv", 'w') as g:
    g = csv.writer(g, delimiter='\t')
    lines = f.readlines()
    g.writerow(['index', 'sentence1', 'sentence2', 'label'])
    for idx, line in enumerate(lines):
        sentences_label = line.strip().split(' </s> ')
        sentence1 = sentences_label[0].strip()
        sentence2 = sentences_label[1].strip()[0:-1].strip()
        label = int(sentences_label[1].strip()[-1])
        if label==0:
            label="not_entailment"
        else:
            label="entailment"
        if idx==0:
            print(sentence1, sentence2, label)
        g.writerow([idx+1, sentence1, sentence2, label])

test ride bike. remove wheel. not_entailment


In [643]:
with open('../GPT/transformers/glue/MRPC/test.tsv') as g, open("./data/mrpc_test.txt", 'w') as f:
    lines = g.readlines()
    idx = 0
    for row in lines:
        #print(len(row))
        row = row.strip().split('\t')
        if idx==0:
            idx+=1
            continue
        if len(row)==5:
            label = int(row[0].strip())
#             print(row)
#             print("label", label)
#             print("sent1", row[3].strip())
#             print("sent2",row[4].strip())
#             if label=="not_entailment":
#                 label= 0
#             else:
#                 label =1
            f.write("{} </s> {} {}\n".format(row[3].strip().lower(), row[4].strip().lower(), label))
        

In [512]:
def convert_to_mrpc(in_path='./outputs/generated_test_basic_large_g16_epoch1_removed.txt', out_path_mrpc='../GPT/transformers/glue/MRPC/test.tsv', out_path_qqp='../GPT/transformers/glue/QQP/test_test.tsv'):
    with open(in_path) as f, open(out_path_mrpc, 'w') as o, open(out_path_qqp, 'w') as p:
        lines = f.readlines()
        o = csv.writer(o, delimiter='\t')
        o.writerow(['index','#1 ID', '#2 ID', '#1 String', '#2 String'])
        p = csv.writer(p, delimiter='\t')
        p.writerow(['id','question1','question2'])
        index = 0 
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].strip().rstrip(' <EOS>')
            script = splitted[0].strip().replace("<BOS> here is a sequence of events that happen while ","")
            scenario = re.sub(r'\d+[.]', '</bevent> <bevent>', scenario)
            scenario = scenario + '</bevent>'
            scenario = scenario.strip().lstrip('</bevent>')
            soup = BeautifulSoup(scenario)
            #print(soup, scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string.strip())
            #print(events)
            all_combinations = combinations(range(len(events)), 2)
            if len(events)>0:
                for eids in all_combinations:
                    #print(events[eids[0]])
                    o.writerow([index, 0,0, events[eids[0]], events[eids[1]]])  
                    p.writerow([index, events[eids[0]], events[eids[1]]])
                    index+=1
    

In [513]:
convert_to_mrpc()

# Relevant Classifier Data

In [275]:
random.seed(42)
np.random.seed(42)
def train_relevant_classification_positive_data(in_path, out_path):
    # input is all tokens file
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        scene_dict = {}
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].strip().rstrip('<EOS>')
            script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>')
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            label = 1 #only positive examples now
            if script.strip() not in scene_dict:
                scene_dict[script.strip()] = []
            if len(events)>0:
                for idx, e in enumerate(events):
                    new_scenario = script.strip() + ": "
                    answer = ""
                    scene_dict[script.strip()].append(e.strip())
                    answer +=  "</s> " + e.strip() + " "
                    o.write("{}\n".format(new_scenario + answer + str(label)))
    return scene_dict

def generated_relevant_classification_positive_data(in_path, out_path, prompt):
    # input is generated output in numbered form
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].rstrip(' <EOS>')
            if prompt=='direct':
                script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>') # direct
            elif prompt=='describe':
                script = splitted[0].strip().replace("<BOS> describe ","") # for describe
                script = script.replace(" in small sequences of short sentences","") #for describe 
            elif prompt=='expect':
                script = splitted[0].strip().replace("<BOS> these are the things that happen when you ","") # expect
                script = dict_script[script]
            elif prompt=='ordered':
                script = splitted[0].strip().replace("<BOS> here is an ordered sequence of events that occur when you ","")
                script = dict_script[script]
            elif prompt=='basic':
                script = splitted[0].strip().replace("<BOS> here is a sequence of events that happen while ","")
            else:
                script = splitted[0].rstrip(' <ESCR>').replace("<BOS> <SCR> ","")
            new_scenario = script + ": "
            scenario = re.sub(r'\d+[.]', '</bevent> <bevent>', scenario)
            scenario = re.sub(r'<EEVENT>', '</bevent>', scenario)
            scenario = scenario.strip() + ' </bevent>'
            scenario = scenario.strip().lstrip('</bevent>')
            soup = BeautifulSoup(scenario)
#             print(soup)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            label = 1 #only positive examples now

            if len(events)>0:
                for idx, e in enumerate(events):
                    new_scenario = script.strip() + ": "
                    answer = ""
                    answer +=  "</s> " + e.strip() + " "
                    o.write("{}\n".format(new_scenario + answer + str(label)))

def evaluation_relevant_classification_positive_data(in_path, out_path,prompt):
    # input is generated output in numbered form
    with open(in_path) as f, open(out_path, 'w') as o:
        lines = f.readlines()
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].rstrip(' <EOS>')
            if prompt=='direct':
                script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>') # direct
            elif prompt=='describe':
                script = splitted[0].strip().replace("<BOS> describe ","") # for describe
                script = script.replace(" in small sequences of short sentences","") #for describe 
            elif prompt=='expect':
                script = splitted[0].strip().replace("<BOS> these are the things that happen when you ","") # expect
                script = dict_script[script]
            elif prompt=='ordered':
                script = splitted[0].strip().replace("<BOS> here is an ordered sequence of events that occur when you ","")
                script = dict_script[script]
            elif prompt=='basic':
                script = splitted[0].strip().replace("<BOS> here is a sequence of events that happen while ","")
            else:
                script = splitted[0].rstrip(' <ESCR>').replace("<BOS> <SCR> ","")
            new_scenario = script + ": "
            scenario = re.sub(r'\d+[.]', '</bevent> <bevent>', scenario)
            scenario = re.sub(r'<EEVENT>', '</bevent>', scenario)
            scenario = scenario.strip() + ' </bevent>'
            scenario = scenario.strip().lstrip('</bevent>')
            soup = BeautifulSoup(scenario)
#             print(soup)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)

            if len(events)>0:
                for idx, e in enumerate(events):
                    new_scenario = script.strip() + ";"
                    answer = ""
                    answer +=  e.strip() + " "
                    o.write("{}\n".format(new_scenario + answer))

def test_relevant_classification_positive_data(lines, out_path):
    # input is tokens references with text.txt
    with open(out_path, 'w') as o:
        for scenario in lines:
            splitted = scenario.split(": ")
            scenario = splitted[1].strip()
            script = splitted[0].strip().lstrip('<BOS> <SCR> ').rstrip('<ESCR>')
            scenario = scenario.replace('<EEVENT>','</bevent>').replace('<BEVENT>','<bevent>')
            soup = BeautifulSoup(scenario)
            events = []
            count=0
            for a in soup.find_all('bevent'):
                events.append(a.string)
            label = 1 #only positive examples now

            if len(events)>0:
                for idx, e in enumerate(events):
                    new_scenario = script.strip() + ": "
                    answer = ""
                    answer +=  "</s> " + e.strip() + " "
                    o.write("{}\n".format(new_scenario + answer + str(label)))



def train_relevant_classification_negative_data(scene_dict, out_path):
    with open(out_path, 'a') as f:
        scenes = list(scene_dict.keys())
        label = 0 # negative example
        for idx, scene in enumerate(scenes):
            for i in range(len(scene_dict[scene])):
                temp_scenes = copy.deepcopy(scenes)
                temp_scenes.remove(scene) # remove the scence under consideration 
                contrastive_scene = np.random.choice(temp_scenes, 1)
                event = np.random.choice(scene_dict[contrastive_scene[0]],1) # only onne element in the list
                new_scenario = scene.strip() + ": "
                answer = ""
                answer +=  "</s> " + event[0].strip() + " "
                f.write("{}\n".format(new_scenario + answer + str(label)))

In [165]:
scene_dict = train_relevant_classification_positive_data('./data/train_all_tokens.txt', './data/train_relevant_classification.txt')
train_relevant_classification_negative_data(scene_dict, './data/train_relevant_classification.txt')

In [170]:
scene_dict = train_relevant_classification_positive_data('./data/valid_all_tokens.txt', './data/valid_relevant_classification.txt')
train_relevant_classification_negative_data(scene_dict, './data/valid_relevant_classification.txt')

In [168]:
scene_dict = train_relevant_classification_positive_data('./data/valid_all_tokens.txt', './data/valid_relevant_classification.txt')

In [186]:
test_relevant_classification_positive_data(lines, './data/test_relevant_classification.txt')

In [704]:
prompt="all_tokens"
generated_relevant_classification_positive_data('./outputs/generated_test_'+prompt+'_large_g16_epoch1.txt', './data/test_relevant_classification_all_tokens_output.txt',prompt)

# New Scenarios

In [281]:
prompt="basic"
generated_relevant_classification_positive_data('./outputs/folds/generated_new_scenarios_basic_large_fold1_g16_epoch1.txt', './data/folds/test_relevant_classification_new_scenarios_basic.txt', prompt)

In [38]:
with open('./data/train_all_tokens.txt') as f:
    lines = f.readlines()
    length = [len(line.strip().split()) for line in lines]

# Manual Evaluation Data

In [134]:
split='test'
prompt="ordered"
evaluation_relevant_classification_positive_data('./outputs/generated_'+split+'_'+prompt+'_large_g16_epoch1_removed_deduplicated_ordered.txt', './data/'+split+'_relevant_evaluation.txt')


In [179]:
evaluation_relevant_classification_positive_data('./outputs/generated_new_scenarios_ordered.txt', './data/new_scenarios_ordered_relevant_evaluation.txt')

# Shuffle and create valid classification data

In [277]:
fold_val={}
fold_val[1] = 'cooking pasta'
fold_val[2] = 'going bowling'
fold_val[3] = 'planting a tree'
fold_val[4] = 'going grocery shopping'
fold_val[5] = 'taking the underground'
fold_val[6] = 'paying with a credit card'
fold_val[7] = 'eating in a fast food restaurant'
fold_val[8] = 'getting a hair cut'
random.seed(42)

In [279]:
def shuffle_create_valid_data():
    for fold in range(1,9):
        filename =  "./data/folds/train_classification_partial_context_all_fold"+str(fold)+".txt"
        with open(filename, 'r') as f:
            lines = f.readlines()
        new_lines = []
        with open(filename, 'w') as f, open("./data/folds/valid_classification_partial_context_all_fold"+str(fold)+".txt",'w') as g:
            for line in lines:
#                 print(line)
                scenario = line.strip().split(': ')[0]
#                 print(scenario)
                if  scenario == fold_val[fold]:
                    g.write("{}\n".format(line.strip()))
                else:
                    new_lines.append(line.strip())
            random.shuffle(new_lines)
            for line in new_lines:
                f.write("{}\n".format(line.strip()))
        

    # relevancy train data
    for fold in range(1,9):
        filename = './data/folds/train_relevant_classification_fold'+str(fold)+'.txt'
        with open(filename, 'r') as f:
            lines = f.readlines()
        new_lines = []
        with open(filename, 'w') as f, open("./data/folds/valid_relevant_classification_fold"+str(fold)+".txt",'w') as g:
            for line in lines:
                if line.strip().split(': ')[0] == fold_val[fold]:
                    g.write("{}\n".format(line.strip()))
                else:
                    new_lines.append(line.strip())
            random.shuffle(new_lines)
            for line in new_lines:
                f.write("{}\n".format(line.strip()))           



In [280]:
shuffle_create_valid_data()

In [304]:
all_combinations = combinations(range(8), 2)

In [305]:
a=[]
for eid in all_combinations:
    a.append(eid)

In [307]:
random.shuffle(a)

In [309]:
np.random.choice(len(a),1, replace=False)

array([24])

In [310]:
a[24]

(3, 6)

In [317]:
np.random.choice(NUM_SEQUENCES, 1, replace=False)

array([0])