In [1]:
from rouge import rouge_n_sentence_level
from nltk.translate.bleu_score import SmoothingFunction

import numpy as np 
import pandas as pd
import pickle as pkl
import nltk
import os

In [2]:
def avg_rouge(list1, list2):
    
    rouge_sum = 0
    
    for i in range(len(list1)):
        
        _, _, rouge = rouge_n_sentence_level(list1[i],list2[i], 4)
        
        rouge_sum += rouge
    
    rouge_avg = rouge_sum / len(list1)
    
    return rouge_avg

In [3]:
def get_avg_bleu(list1,list2):
    '''
    -----------------------
    Get smoothed average 
    BLEU score
    -----------------------
    '''
    sum_bleu = 0
    
    smoothie = SmoothingFunction().method5
    
    for i in range(len(list1)):
        
        hypothesis = list1[i].split(' ')
        
        reference = list2[i].split(' ')
        
        # The maximum is bigram so assign the weight into 2 half
        score = nltk.translate.bleu_score.sentence_bleu([reference], 
                                                        hypothesis, 
                                                        weights = (0.5, 0.5), 
                                                        smoothing_function = smoothie)
        sum_bleu += score
    
    avg_bleu = sum_bleu/len(list1)
    
    return avg_bleu

In [4]:
def get_number_of_ingredient_used(ingredient_list, text):
    '''
    ------------
    Get fraction of ingredients used by
    ------------
    '''
    ing_count = 0
    
    for ing in ingredient_list :
        
        if ing in text :
            
            ing_count += 1
    
    ing_frac = ing_count/len(ingredient_list)
    
    return ing_frac
 

In [5]:
def get_number_of_ingredient_used_2(ingredient_list, text):
    '''
    ------------
    Get number of ingredients used 
    ------------
    '''
    ing_count = 0
    
    total_words = 0
    
    for ing in ingredient_list :
        
        ings = ing.split(' ')
        
        total_words += len(ings) 
        
        for i in ings :
            
            if i in text :
                
                ing_count += 1
                
    ing_frac = ing_count/total_words
    
    return ing_frac

In [36]:
def make_ing_vocab(x):
    '''
    ----------
    Return ingredient vocabulary
    ----------
    '''
    return([ing for ing_list in x[1] for ing in ing_list])

In [47]:
def get_extra_ingredient_used(ingredient_list, ing_vocab, text):
    '''
    ------------
    Get fraction of ingredients used by
    ------------
    '''
    extra_ing_count = 0
    
    for word in text:
    
        if word not in ingredient_list:
        
            if word in ing_vocab
            
                extra_ing_count += 1
    
    extra_ing_frac = extra_ing_count/len(ingredient_list)
    
    return extra_ing_frac

In [6]:
def get_results(f, results_path = "/Users/akshatgoel/Desktop/results/"):
    '''
    Execute all code
    '''
    # Load files
    x = pkl.load(open(os.path.join(results_path, f), "rb" ))
    
    # Masking column description
    if '0.8' in f: 
        masking = 0.8
    if '0.5' in f:
        masking = 0.5
    
    # Model type description
    if 'frozen_encoder' in f:
        model_type = 'Encoder frozen'
    if 'all_layers' in f: 
        model_type = 'All layers'
    
    # Rouge score
    rouge_avg = avg_rouge(x[2],x[3])
    
    # BLEU score
    bleu_avg = get_avg_bleu(x[3],x[2])
    
    # Coherence of output
    ing_frac = [get_number_of_ingredient_used(x[1][i],x[2][i]) for i in range(len(x[1]))]
    
    overall_frac_1 = sum(ing_frac)/len(ing_frac)  
    
    # Coherence of target
    ing_frac_2 = [get_number_of_ingredient_used_2(x[1][i],x[3][i]) for i in range(len(x[1]))]
    
    overall_frac_2 = sum(ing_frac_2)/len(ing_frac_2)
    
    # Compute overall coherence
    overall_coherence = overall_frac_1/overall_frac_2
    
    # Put results together
    results = [model_type, masking, rouge_avg, bleu_avg, overall_frac_1, overall_frac_2, overall_coherence]
    
    ing_fracs = [ing_frac, ing_frac_2]
    
    # Return statement
    return(results, ing_fracs)

In [71]:
def gen_examples_qualitative(x):
    '''
    
    '''
    pass

In [None]:
def get_weird_examples(f, results_path = "/Users/akshatgoel/Desktop/results/"):
    '''
    
    '''
    # Print the file name
    print(f)
    
    # Load the file
    x = pkl.load(open(os.path.join(results_path, f), "rb" ))
    
    test = np.array(ing[0][1])
    
    test_indices = np.squeeze(np.where(test == 0)[0])
    
    for i in test_indices[:n]:
        
        print(x[0][i])
        
        print(x[1][i])
        
        print(x[2][i])
        
        print(x[3][i])

In [7]:
if __name__ == '__main__':

    files = [f for f in os.listdir( "/Users/akshatgoel/Desktop/results/") if f.endswith(".pkl")]
    
    columns = ["Model type", "Masking level", "Rouge Score", "BLEU Score", 
               "Coherence: output", "Coherence: target", "Coherence: Overall"]

    results = [get_results(f) for f in files]
    
    res = pd.DataFrame([r[0] for r in results], columns = columns)
    
    ing = [r[1] for r in results]