In [1]:
from rouge import rouge_n_sentence_level
from nltk.translate.bleu_score import SmoothingFunction

import numpy as np 
import pandas as pd
import pickle as pkl
import nltk
import os
import json

In [2]:
np.random.seed(138290)

In [3]:
def avg_n_rouge(list1, list2, n=2):
    
    rouge_sum = 0
    
    for i in range(len(list1)):
        
        _, _, rouge = rouge_n_sentence_level(list1[i],list2[i], n)
        
        rouge_sum += rouge
    
    rouge_avg = rouge_sum / len(list1)
    
    return rouge_avg

In [4]:
def get_avg_bleu(list1,list2):
    '''
    -----------------------
    Get smoothed average 
    BLEU score
    -----------------------
    '''
    sum_bleu = 0
    
    smoothie = SmoothingFunction().method5
    
    for i in range(len(list1)):
        
        hypothesis = list1[i].split(' ')
        
        reference = list2[i].split(' ')
        
        # The maximum is bigram so assign the weight into 2 half
        score = nltk.translate.bleu_score.sentence_bleu([reference], 
                                                        hypothesis, 
                                                        weights = (0.5, 0.5), 
                                                        smoothing_function = smoothie)
        sum_bleu += score
    
    avg_bleu = sum_bleu/len(list1)
    
    return avg_bleu

In [5]:
def get_number_of_ingredient_used(ingredient_list, text):
    '''
    ------------
    Get fraction of ingredients used by
    ------------
    '''
    ing_count = 0
    
    for ing in ingredient_list :
        
        if ing in text :
            
            ing_count += 1
    
    ing_frac = ing_count/len(ingredient_list)
    
    return ing_frac
 

In [6]:
def get_number_of_ingredient_used_2(ingredient_list, text):
    '''
    ------------
    Get number of ingredients used 
    ------------
    '''
    ing_count = 0
    
    total_words = 0
    
    for ing in ingredient_list :
        
        ings = ing.split(' ')
        
        total_words += len(ings) 
        
        for i in ings :
            
            if i in text :
                
                ing_count += 1
                
    ing_frac = ing_count/total_words
    
    return ing_frac

In [7]:
def make_ing_vocab(x):
    '''
    ----------
    Return ingredient vocabulary
    ----------
    '''
    return set(ing for ing_list in x[1] for ing in ing_list)

In [8]:
def get_extra_ingredient_used(ingredient_list, ing_vocab, text):
    '''
    ------------
    Get fraction of ingredients used by
    ------------
    '''
    extra_ing_count = 0
    
    for word in text:
    
        if word not in ingredient_list:
        
            if word in ing_vocab:
            
                extra_ing_count += 1
    
    extra_ing_frac = extra_ing_count/len(ingredient_list)
    
    return extra_ing_frac

In [9]:
def ingredients_overlap(txt, label, vocab):
    
    label_lst = []
    txt_lst = []
    
    for word in vocab:
        if word in txt:
            txt_lst.append(word)
        if word in label:
            label_lst.append(word)
            
    label_lst = set(label_lst)
    txt_lst = set(txt_lst)
        
    intersection = label_lst.intersection(txt_lst)
    
    return len(intersection)

In [10]:
def get_results(f, results_path = "results/", checklist=False):
    '''
    Execute all code
    '''
    
    if checklist:
        # Load File
        x = json.load(open(os.path.join(results_path, f), "r" ))
        # Process 
        x = process_checklist_output(x)
    else:
        # Load files
        x = pkl.load(open(os.path.join(results_path, f), "rb" ))
    
    # Masking column description
    if '0.8' in f: 
        masking = 0.8
    if '0.5' in f:
        masking = 0.5
    
    # Model type description
    if 'frozen_encoder' in f:
        model_type = 'Encoder frozen'
    elif 'all_layers' in f: 
        model_type = 'All layers'
    else:
        model_type = 'Checklist'

    # Rouge 2 score
    rouge_2_avg = avg_n_rouge(x[2],x[3])
    
    # Rouge 4 Score
    rouge_4_avg = avg_n_rouge(x[2], x[3], n=4)
    
    # BLEU score
    bleu_avg = get_avg_bleu(x[3],x[2])
    
    # Coherence of output
    ing_frac = [get_number_of_ingredient_used(x[1][i],x[2][i]) for i in range(len(x[1]))]
    
    overall_frac_1 = sum(ing_frac)/len(ing_frac)  
    
    # Coherence of target
    ing_frac_2 = [get_number_of_ingredient_used_2(x[1][i],x[3][i]) for i in range(len(x[1]))]
    
    overall_frac_2 = sum(ing_frac_2)/len(ing_frac_2)
    
    # Compute overall coherence
    overall_coherence = overall_frac_1/overall_frac_2
    
    # Ingredients Intersection
    
    ing_vocab = make_ing_vocab(x)
    overlap = [ingredients_overlap(x[2][i], x[3][i], ing_vocab) for i in range(len(x[1]))]
    avg_overlap = sum(overlap)/len(overlap)
    
    # Put results together
    results = [model_type, masking, rouge_2_avg, rouge_4_avg, bleu_avg, 
               overall_frac_1, overall_frac_2, overall_coherence, overall_frac_1, avg_overlap]
    
    ing_fracs = [ing_frac, ing_frac_2]
    
    # Return statement
    return(results, ing_fracs)

In [11]:
def gen_examples_qualitative(x):
    '''
    Generate indices for qualitative examples
    '''
    n = len(x)
    
    indices = np.random.choice(np.arange(n), size=100)
    
    return(indices)

In [12]:
def get_weird_examples(f, results_path = "/Users/akshatgoel/Desktop/results/"):
    '''
    
    '''
    # Print the file name
    print(f)
    
    # Load the file
    x = pkl.load(open(os.path.join(results_path, f), "rb" ))
    
    test = np.array(ing[0][1])
    
    test_indices = np.squeeze(np.where(test == 0)[0])
    
    for i in test_indices[:n]:
        
        print(x[0][i])
        
        print(x[1][i])
        
        print(x[2][i])
        
        print(x[3][i])

In [13]:
def process_checklist_output(x):
    
    y = []
    y.append(x['goal'])
    y.append(x['ingredients'])
    y.append([' '.join(lst) for lst in x['generated_text']])
    y.append([' '.join(lst) for lst in x['label']])
    
    return y

In [14]:
if __name__ == '__main__':
    
    results_path = "results/"
    
    files = [f for f in os.listdir(results_path) if f.endswith(".pkl")]
    chk_files = [f for f in os.listdir(results_path) if f.endswith(".json")]
    
    # x = pkl.load(open(os.path.join(results_path, files[0]), "rb" ))

    
    columns = ["Model type", "Masking level", "Rouge 2 Score", "Rouge 4 Score", "BLEU Score", 
               "Coherence: output", "Coherence: target", "Coherence: Overall", "Avg Ingredients",
              "Avg Overlap"]

    #results = [get_results(f) for f in files]
    chk_results = [get_results(f, checklist=True) for f in chk_files]
    
    #res = pd.DataFrame([r[0] for r in results], columns = columns)
    chk_res = pd.DataFrame([r[0] for r in chk_results], columns = columns)
    
#     ing = [r[1] for r in results]
    
#     indices = gen_examples_qualitative(x[0])
    
#     pd.DataFrame(indices).to_csv(os.path.join(results_path, "indices.csv"))