In [4]:
from nltk.stem import PorterStemmer
import pickle
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.tokenize import word_tokenize 
import string
from nltk.translate.meteor_score import meteor_score

[nltk_data] Downloading package punkt to
[nltk_data]     /home/ct2020dl5787/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/ct2020dl5787/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [5]:
def get_ingred_f1(pred, label):
    '''
    input: 
        pred: a list of predicted ingredients
        label: a list of label ingredients
    output: 
        F-1 score of the prediction
    
    i.e.  
    in: 
        pred = ["tomato", "sugar", "beef"]
        label = ["potato", "tomato"]
     
    out: 
        0.4
    '''
    intersection = list(set(pred) & set(label))
    precision = len(intersection) / len(pred)
    recall = len(intersection) / len(label)
    if precision + recall == 0:
        return 0
    f1 = 2 * precision * recall / (precision + recall)
    return f1


def get_ingred_IOU(pred, label):
    '''
    input: 
        pred: a list of predicted ingredients
        label: a list of label ingredients
    output: 
        IOU of the prediction
    
    i.e.  
    in: 
        pred = ["tomato", "sugar", "beef"]
        label = ["potato", "tomato"]
     
    out: 
        0.25
    '''
    intersection = len(list(set(pred) & set(label)))
    union = len(list(set(pred) | set(label)))
    iou = intersection / union
    return iou





def get_bleu_n_score(pred, label, n = 4):
    
    '''
    TODO: STEM not added
    input: 
        pred: One string of predict recipe 
        label: One string of reference recipe
        n(optional): up to n-gram.
    output: 
        bleu score
    
    i.e.  
    in: 
        pred = "Add the buttter"
        label = "Add half butter and mix well"
    out: 
        0.25
    '''
    weights = [1/n] * n
    pred_list = pred.translate(str.maketrans('', '', string.punctuation)).split()
    pred_list = [ps.stem(word) for word in pred_list]
    label_list = label.translate(str.maketrans('', '', string.punctuation)).split()
    label_list = [ps.stem(word) for word in label_list]
    BLEUscore = nltk.translate.bleu_score.sentence_bleu([label_list], pred_list, weights)
    return BLEUscore




def get_meteor_score(pred, label):
    return meteor_score([label], pred)

def read_ingre_vocab(filepath):
    '''
    input: 
        filepath of recipe1m_vocab_ingrs.pkl
    output: 
        a list of stemed ingre
    
    i.e.  
    in: 
        "4 large baking potatoes (2 lb./900 g) Safeway 2 pkg For $5.00 thru 02/09""
    out: 
        ["potato"]
    '''
    ingre = pickle.load(open(filepath, "rb" ))
    result = []
    for i in range(1, len(ingre.idx2word)):
        word = min(ingre.idx2word[i], key=len)
        if "_" not in word:
            result.append(ps.stem(word))
    return result

In [6]:
class Vocabulary(object):
    """Simple vocabulary wrapper."""
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0

    def add_word(self, word, idx=None):
        if idx is None:
            if not word in self.word2idx:
                self.word2idx[word] = self.idx
                self.idx2word[self.idx] = word
                self.idx += 1
            return self.idx
        else:
            if not word in self.word2idx:
                self.word2idx[word] = idx
                if idx in self.idx2word.keys():
                    self.idx2word[idx].append(word)
                else:
                    self.idx2word[idx] = [word]

                return idx

    def __call__(self, word):
        if not word in self.word2idx:
            return self.word2idx['<pad>']
        return self.word2idx[word]

    def __len__(self):
        return len(self.idx2word)



In [7]:
def score_ingre_f1(results, ref):
    '''
    input: 
        results: a list of candidates ingre list
        ref: a list of reference ingre
    output: 
        f1 score
    '''
    score = 0
    for result in results:
        score = max(score, get_ingred_f1(result, ref))
            
    return score


def score_ingre_IOU(results, ref):
    '''
    input: 
        results: a list of candidates ingre list
        ref: a list of reference ingre
    output: 
        IOU score
    '''
    score = 0
    for result in results:
        score = max(score, get_ingred_IOU(result, ref))
            
    return score

def show_ingre(df, i):
    print (df.iloc[i]["ingredients"])
    print (df.iloc[i]["generate_ingre"])
    
    
    
def score_recipe_meteor(generate_recipes, ref):
    score = 0
    ref = " ".join(ref)
    for recipe in generate_recipes:
        joined_recipe = " ".join(recipe)
        score = max(score, get_meteor_score(joined_recipe, ref))
    return score


def score_blue_n(generate_recipes, ref, n):
    score = 0
    ref = " ".join(ref)
    for recipe in generate_recipes:
        joined_recipe = " ".join(recipe)
        score = max(score, get_bleu_n_score(joined_recipe, ref, n))
    return score





def get_tokenized_and_stemmed_list(ingre_lists, ingre_vocab):
    '''
    input: 
        a list of ingredient str
        a list of stemmed reference ingre vocab
    output: 
        a set of indredient
    
    i.e.  
    in: 
        "4 large baking potatoes (2 lb./900 g) Safeway 2 pkg For $5.00 thru 02/09""
    out: 
        ["potato"]
    '''
    result = set()
    for ingre_list in ingre_lists:
        words = word_tokenize(ingre_list) 
        for word in words: 
            result.add(ps.stem(word))
            
    return result & set(ingre_vocab)


def is_recipe_validate(generate_score):
    count = 0
    for j in range (4):
#         if results.iloc[i]["generate_score"][j][0] == False:
        if generate_score[j][0] == False:
            count += 1
    if count == 4:
        return False
    return True

In [6]:
# import string
# "apple?! jfs".translate(str.maketrans('', '', string.punctuation)).split()

In [9]:
ps = PorterStemmer()
# results = pickle.load(open( "df_recipe1023_all.pkl", "rb" ))
results = pickle.load(open( "chinese172sample_allinfo.pkl", "rb" ))
stemmed_ingre = read_ingre_vocab("../data/recipe1m_vocab_ingrs.pkl")

In [11]:
results = results.rename(columns={'true_ingredients':'ingredients'}, inplace=False)


In [12]:
results

Unnamed: 0,img_names,ingredients,ingredients_Chinese,generate_ingre,generate_reci,generate_title,generate_score
0,92_075811n2tr1trtvr21vvao.jpg,"[scallion, pepper, beef]","[Minced green onion, Shredded pepper, Shredded...","[[pepper, oil, onion, clove, soy_sauce, sugar,...",[[Heat oil in a large skillet over medium heat...,"[Spicy bean stir-fry, Stir-fried vegetables an...","[[True, 0.5], [True, 0.5540540540540541], [Tru..."
1,105_09-49-04-39-1.jpg,"[black_sesame_seed, yam, sugar]","[Black sesame, Yam chunks, Crystal sugar]","[[soy_sauce, sugar, sake, oil, mirin, chicken]...",[[Cut the chicken wings into bite-sized pieces...,"[Teriyaki chicken wings, Delicious simmered sa...","[[True, 0.5918367346938775], [True, 0.58490566..."
2,149_01300534137870134686102049450.jpg,[scallion],[Scallion pancake],"[[salt, oil, flour, water, egg, pepper], [salt...","[[In a large bowl, mix together the flour, sal...","[Easy homemade pasta, Homemade pancake, Homema...","[[True, 0.603448275862069], [True, 0.625], [Tr..."
3,119_0013d350e0e80956a9ca0f.jpg,"[dates, water, nuts, mushroom]","[Red dates, Water, Lotus seeds, White fungus]","[[sugar, milk, banana], [sugar, milk, banana],...","[[In a medium saucepan, combine the milk, suga...","[Banana pudding, Banana ice cream, Microwaved ...","[[True, 0.6229508196721312], [True, 0.63953488..."
4,49_01200000023782135247544658087.jpg,"[diced_green_chilis, egg, gourd]","[Crushed pepper, Scrambled egg, Loofah]","[[pepper, oil, onion, salt, soy_sauce, clove],...",[[Heat oil in a wok or large skillet over medi...,"[Stir-fried peppers and onions, Quick garlic s...","[[True, 0.543859649122807], [True, 0.626666666..."
...,...,...,...,...,...,...,...
167,26_-14a0b3561215bc87dbb-7d8b.jpg,"[chicken, parsley]","[Chicken Wings, Parsley]","[[sugar, egg, butter, milk, extract], [sugar, ...","[[In a medium saucepan, combine the sugar, but...","[Easy caramel sauce, Baked vanilla pound cake,...","[[True, 0.6346153846153846], [True, 0.62337662..."
168,41_0023ae99e1440e9a332d05.jpg,"[scallion, shrimp, egg]","[Minced green onion, Dried sea shrimp, Steamed...","[[milk, sugar, onion, pepper, butter, salt, fl...",[[Melt butter in a large saucepan over medium ...,"[Creamy onion soup, Creamy green onion soup mi...","[[True, 0.5376344086021505], [True, 0.48148148..."
169,164_01300000245880122362433364658.jpg,"[chicken, water, herbs]","[Whole black chicken, Water, Codonopsis pilosula]","[[water, salt, butter, sugar], [water, salt, b...","[[In a medium saucepan, combine the peanut but...","[Peanut butter sauce, Hot water, Caramel sauce...","[[True, 0.5769230769230769], [True, 0.57142857..."
170,89_07c47c849ee611e3b4a6e0db5512b209.jpg,"[scallion, egg, seaweed, water]","[Minced green onion, Egg drop, Laver, Water]","[[soy_sauce, sugar, seaweed, salt, mirin], [so...","[[Rehydrate the hijiki seaweed in water., Drai...","[Hijiki seaweed and aburaage, Hijiki seaweed w...","[[True, 0.45977011494252873], [True, 0.5581395..."


In [35]:
results.iloc[2]["generate_score"]

[[False, 0.47333333333333333],
 [True, 0.6266666666666667],
 [False, 0.47333333333333333],
 [True, 0.6875]]

In [15]:
# For 172 only

f1_summary = []
IOU_summary = []
for i in range(0, len(results)):
    
    # validate
    generate_score = results.iloc[i]["generate_score"]
    if not is_recipe_validate(generate_score):
        continue
        
    # Evaluate the ingredient
    generate_ingre = results.iloc[i]["generate_ingre"]
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)

    try:
        f1_score = score_ingre_f1(generate_ingre, reference_ingre)
        iou_score = score_ingre_IOU(generate_ingre, reference_ingre)

        f1_summary.append(f1_score)
        IOU_summary.append(iou_score)
    except:
        print (i)

    
print(sum(f1_summary)/len(f1_summary))
print(sum(IOU_summary)/len(IOU_summary))

21
93
0.08284862196626902
0.04812447356565002


In [13]:
# For Recipe 1M only

n_bleu = 3 # bleu-n score, 4 is default
f1_summary = []
IOU_summary = []
meteor_summary = []
bleu_summary = []
for i in range(0, len(results)):
    
    # validate
    generate_score = results.iloc[i]["generate_score"]
    if not is_recipe_validate(generate_score):
        continue
        
    # Evaluate the ingredient
    generate_ingre = results.iloc[i]["generate_ingre"]
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)
    f1_score = score_ingre_f1(generate_ingre, reference_ingre)
    iou_score = score_ingre_IOU(generate_ingre, reference_ingre)
    
#     print("F-1 score", f1_score)
#     print("IOU score", iou_score)
    
    # Evaluate the recipe
    generate_recipes = results.iloc[i]["generate_reci"]
    ref_recipe = results.iloc[i]["instructions"]
    m_score = score_recipe_meteor(generate_recipes, ref_recipe)
    bleu_score = score_blue_n(generate_recipes, ref_recipe, n_bleu)
#     print("Meteor score", m_score)
#     print("Bleu score", bleu_score)

#     if(bleu_score < 0.05):
#         print (i)
#         print (bleu_score)
#         print (generate_recipes)
#         print (ref_recipe)
#         break
    
    
    f1_summary.append(f1_score)
    IOU_summary.append(iou_score)
    meteor_summary.append(m_score)
    bleu_summary.append(bleu_score)
    
print(sum(f1_summary)/len(f1_summary))
print(sum(IOU_summary)/len(IOU_summary))
print(sum(meteor_summary)/len(meteor_summary))
print(sum(bleu_summary)/len(bleu_summary))

KeyError: 'instructions'

In [49]:
results.iloc[2]["generate_reci"]

[['Preheat oven to 350 degrees f (175 degrees c).',
  'Grease a 9-inch pie pan.',
  'In a large bowl, mix together the flour, sugar, salt, and cinnamon.',
  'Cut in the butter until the mixture resembles coarse crumbs.',
  'Stir in the water and egg until the dough forms a ball.',
  'Press the dough into the bottom and up the sides of the prepared pie pan.',
  'Bake in the preheated oven for 15 minutes.',
  'Reduce heat to 350 degrees f (175 degrees c) and bake for an additional 30 minutes, or until the crust is golden brown.',
  'Allow to cool.',
  'In a large bowl, mix together the rhubarb, strawberries, and sugar.',
  'Pour into the cooled crust.'],
 ['Combine sugar and flour.',
  'Add melted butter and mix until crumbly.',
  'Add water and beaten egg, mix again.',
  'Place in a greased baking dish and cover with strawberries and rhubarb.',
  'Mix brown sugar, flour, cinnamon and salt in bowl, then sprinkle over top of berries.',
  'Bake in a 400 degree f oven for 35 minutes.'],
 ['

In [50]:
results.iloc[2]["instructions"]

['Put ingredients in a buttered 9 x 12 x 2-inch pan in even layers in the order that they are given - DO NOT MIX.',
 'Bake in a 350 oven for 1 hour.']