In [18]:
from nltk.stem import PorterStemmer
import pickle
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.tokenize import word_tokenize 
import string
from nltk.translate.meteor_score import meteor_score

[nltk_data] Downloading package punkt to
[nltk_data]     /home/ct2020dl5787/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/ct2020dl5787/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [19]:
def get_ingred_f1(pred, label):
    '''
    input: 
        pred: a list of predicted ingredients
        label: a list of label ingredients
    output: 
        F-1 score of the prediction
    
    i.e.  
    in: 
        pred = ["tomato", "sugar", "beef"]
        label = ["potato", "tomato"]
     
    out: 
        0.4
    '''
    intersection = list(set(pred) & set(label))
    precision = len(intersection) / len(pred)
    recall = len(intersection) / len(label)
    if precision + recall == 0:
        return 0
    f1 = 2 * precision * recall / (precision + recall)
    return f1


def get_ingred_IOU(pred, label):
    '''
    input: 
        pred: a list of predicted ingredients
        label: a list of label ingredients
    output: 
        IOU of the prediction
    
    i.e.  
    in: 
        pred = ["tomato", "sugar", "beef"]
        label = ["potato", "tomato"]
     
    out: 
        0.25
    '''
    intersection = len(list(set(pred) & set(label)))
    union = len(list(set(pred) | set(label)))
    iou = intersection / union
    return iou





def get_bleu_n_score(pred, label, n = 4):
    
    '''
    TODO: STEM not added
    input: 
        pred: One string of predict recipe 
        label: One string of reference recipe
        n(optional): up to n-gram.
    output: 
        bleu score
    
    i.e.  
    in: 
        pred = "Add the buttter"
        label = "Add half butter and mix well"
    out: 
        0.25
    '''
    weights = [1/n] * n
    pred_list = pred.translate(str.maketrans('', '', string.punctuation)).split()
    pred_list = [ps.stem(word) for word in pred_list]
    label_list = label.translate(str.maketrans('', '', string.punctuation)).split()
    label_list = [ps.stem(word) for word in label_list]
    BLEUscore = nltk.translate.bleu_score.sentence_bleu([label_list], pred_list, weights)
    return BLEUscore




def get_meteor_score(pred, label):
    return meteor_score([label], pred)

def read_ingre_vocab(filepath):
    '''
    input: 
        filepath of recipe1m_vocab_ingrs.pkl
    output: 
        a list of stemed ingre
    
    i.e.  
    in: 
        "4 large baking potatoes (2 lb./900 g) Safeway 2 pkg For $5.00 thru 02/09""
    out: 
        ["potato"]
    '''
    ingre = pickle.load(open(filepath, "rb" ))
    result = []
    for i in range(1, len(ingre.idx2word)):
        word = min(ingre.idx2word[i], key=len)
        if "_" not in word:
            result.append(ps.stem(word))
    return result

In [20]:
class Vocabulary(object):
    """Simple vocabulary wrapper."""
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0

    def add_word(self, word, idx=None):
        if idx is None:
            if not word in self.word2idx:
                self.word2idx[word] = self.idx
                self.idx2word[self.idx] = word
                self.idx += 1
            return self.idx
        else:
            if not word in self.word2idx:
                self.word2idx[word] = idx
                if idx in self.idx2word.keys():
                    self.idx2word[idx].append(word)
                else:
                    self.idx2word[idx] = [word]

                return idx

    def __call__(self, word):
        if not word in self.word2idx:
            return self.word2idx['<pad>']
        return self.word2idx[word]

    def __len__(self):
        return len(self.idx2word)



In [21]:
def score_ingre_f1(results, ref):
    '''
    input: 
        results: a list of candidates ingre list
        ref: a list of reference ingre
    output: 
        f1 score
    '''
    score = 0
    for result in results:
        score = max(score, get_ingred_f1(result, ref))
            
    return score


def score_ingre_IOU(results, ref):
    '''
    input: 
        results: a list of candidates ingre list
        ref: a list of reference ingre
    output: 
        IOU score
    '''
    score = 0
    for result in results:
        score = max(score, get_ingred_IOU(result, ref))
            
    return score

def show_ingre(df, i):
    print (df.iloc[i]["ingredients"])
    print (df.iloc[i]["generate_ingre"])
    
    
    
def score_recipe_meteor(generate_recipes, ref):
    score = 0
    ref = " ".join(ref)
    for recipe in generate_recipes:
        joined_recipe = " ".join(recipe)
        score = max(score, get_meteor_score(joined_recipe, ref))
    return score


def score_blue_n(generate_recipes, ref, n):
    score = 0
    ref = " ".join(ref)
    for recipe in generate_recipes:
        joined_recipe = " ".join(recipe)
        score = max(score, get_bleu_n_score(joined_recipe, ref, n))
    return score





def get_tokenized_and_stemmed_list(ingre_lists, ingre_vocab):
    '''
    input: 
        a list of ingredient str
        a list of stemmed reference ingre vocab
    output: 
        a set of indredient
    
    i.e.  
    in: 
        "4 large baking potatoes (2 lb./900 g) Safeway 2 pkg For $5.00 thru 02/09""
    out: 
        ["potato"]
    '''
    result = set()
    for ingre_list in ingre_lists:
        words = word_tokenize(ingre_list) 
        for word in words: 
            result.add(ps.stem(word))
            
    return result & set(ingre_vocab)


def is_recipe_validate(generate_score):
    count = 0
    for j in range (1):
#         if results.iloc[i]["generate_score"][j][0] == False:
        if generate_score[j][0] == False:
            count += 1
    if count == 4:
        return False
    return True

In [6]:
# import string
# "apple?! jfs".translate(str.maketrans('', '', string.punctuation)).split()

In [45]:
ps = PorterStemmer()
# results = pickle.load(open( "df_recipe1023_all.pkl", "rb" ))
# results = pickle.load(open( "chinese1720sample_allinfo.pkl", "rb" ))
results = pickle.load(open( "output/transform.pkl", "rb" ))
stemmed_ingre = read_ingre_vocab("../data/recipe1m_vocab_ingrs.pkl")
df_origin = pickle.load(open( "df_recipe1023_all.pkl", "rb" ))
df_ablation = pickle.load(open( "output/transform.pkl", "rb" ))
true_ingre_dict = pickle.load(open( "true_ingre_1k.pickle", "rb" ))

In [38]:
results = results.rename(columns={'true_ingredients':'ingredients'}, inplace=False)


In [39]:
results.iloc[0]["url"]

'http://img.sndimg.com/food/image/upload/w_512,h_512,c_fit,fl_progressive,q_95/v1/img/recipes/47/91/49/picaYYmb9.jpg'

In [40]:
results.iloc[2]["generate_score"]

[[True, 0.5567010309278351],
 [True, 0.6268656716417911],
 [True, 0.52],
 [True, 0.5196850393700787]]

In [64]:
def show_ablation_study(df_origin, df_ablation, i, true_ingre_dict):
    # Compare two ingre
    predicted_ingre = df_origin.iloc[i]["generate_ingre"][0]
    true_ingre = true_ingre_dict[df_origin.iloc[i]["url"]]
    
    # Compare two instruction
    generate_recipe_with_predicted_ingre = df_origin.iloc[i]["generate_reci"]
    generate_recipe_with_true_ingre = df_ablation.iloc[i]["generate_reci"]
    print("predicted_ingre-----------------------------------------")
    print(predicted_ingre)
    print("true_ingre----------------------------------------------")
    print(true_ingre)
    print("generate_recipe_with_predicted_ingre--------------------")
    print(generate_recipe_with_predicted_ingre)
    print("generate_recipe_with_true_ingre-------------------------")
    print(generate_recipe_with_true_ingre)

In [89]:
show_ablation_study(df_origin, df_ablation, 21, true_ingre_dict)

predicted_ingre-----------------------------------------
['potato', 'oil', 'pepper', 'squash', 'salt', 'onion']
true_ingre----------------------------------------------
['appl', 'broth', 'chicken', 'oil', 'oliv', 'parsley', 'onion', 'squash', 'crouton', 'thyme', 'cream']
generate_recipe_with_predicted_ingre--------------------
[['Preheat oven to 425 degrees f (220 degrees c).', 'Place sweet potatoes and onion in a large bowl.', 'Drizzle olive oil over the vegetables and toss to coat.', 'Spread vegetables onto a baking sheet.', 'Season with salt and pepper.', 'Roast in the preheated oven until vegetables are tender and lightly browned, about 45 minutes.'], ['Preheat the oven to 400.', 'Peel the squash and slice it diagonally into 1/4 inch cubes.', 'In a bowl, toss the squash and the onions with 2 teaspoons of the salt.', 'Spread into a single layer on a baking sheet and roast until the vegetables are browned and tender, 35 to 40 minutes.', 'Meanwhile, put the oil in a large skillet and 

In [90]:
df_origin.iloc[21]["url"]

'http://img.sndimg.com/food/image/upload/w_512,h_512,c_fit,fl_progressive,q_95/v1/img/recipes/26/09/29/piclnaUDO.jpg'

In [63]:
df_ablation

Unnamed: 0,id,ingredients,instructions,url,generate_ingre,generate_reci,generate_title,generate_score
0,00003a70b1,"[2 12 cups milk, 1 12 cups water, 14 cup butte...","[Preheat oven to 350 degrees Fahrenheit., Spra...","http://img.sndimg.com/food/image/upload/w_512,...","[[cheese, onion, pepper, soup, milk, salt, but...",[[Preheat oven to 350 degrees f (175 degrees c...,"[Cheesy hash browns casserole, Easy scalloped ...","[[True, 0.6138613861386139], [True, 0.74468085..."
1,000075604a,"[2 Chicken thighs, 2 tsp Kombu tea, 1 White pe...",[Pierce the skin of the chicken with a fork or...,https://img-global.cpcdn.com/001_recipes/58069...,"[[chicken, pepper, salt, lemon, oil, onion, ju...",[[Preheat oven to 350 degrees f (175 degrees c...,"[Lemon chicken, Easy lemon chicken, Lemon pepp...","[[True, 0.6756756756756757], [True, 0.57843137..."
2,00007bfd16,"[6 -8 cups fresh rhubarb, or, 6 -8 cups frozen...",[Put ingredients in a buttered 9 x 12 x 2-inch...,"http://img.sndimg.com/food/image/upload/w_512,...","[[sugar, strawberries, water, egg, rhubarb, sa...",[[Preheat oven to 350 degrees f (175 degrees c...,"[Strawberry rhubarb pie, Rhubarb pie, Rhubarb ...","[[True, 0.5567010309278351], [True, 0.62686567..."
3,000095fc1d,"[8 ounces, weight Light Fat Free Vanilla Yogur...",[Layer all ingredients in a serving dish.],http://tastykitchen.com/recipes/wp-content/upl...,"[[strawberries, yogurt, blueberries, honey, al...","[[Layer half each of the strawberries, blueber...","[Berry parfait, Fresh berry crunch parfait, St...","[[True, 0.7419354838709677], [True, 0.73076923..."
4,0000b1e2b5,"[1 teaspoon fennel seeds, 1 pound pork tenderl...","[Preheat oven to 350F with rack in middle., Cr...",http://assets.epicurious.com/photos/5609a4d662...,"[[oil, pepper, potato, clove, salt, rosemary, ...","[[Preheat oven to 425 degrees f., In a large b...","[Roasted potatoes with herbs, Roasted garlic f...","[[True, 0.6833333333333333], [True, 0.58024691..."
...,...,...,...,...,...,...,...,...
987,00a2b3f1e4,"[4 whole Fresh Alaskan Halibut Fillets, Skin R...","[Salt and pepper the halibut steaks., Sprinkle...",http://tastykitchen.com/recipes/wp-content/upl...,"[[pepper, tomato, chicken, cheese, oil, salt, ...","[[Preheat oven to 350 degrees f., In a large s...","[Chicken with tomatoes and feta, Tomato chicke...","[[True, 0.5384615384615384], [True, 0.45833333..."
988,00a2dbf7a0,"[4 c. low-sodium chicken broth, 2 c. arborio r...",[Heat oven to 400 degrees and arrange a rack i...,http://del.h-cdn.co/assets/cm/15/10/54f6786832...,"[[oil, flour, salt, egg, baking_powder, sugar,...","[[Mix all ingredients together., Heat skillet ...","[Easy pancakes, Pancakes, Homemade yellow cake...","[[True, 0.7027027027027027], [True, 0.77272727..."
989,00a31da144,"[1/2 cup sugar, Juice of 1 orange (or 1/4 cup ...","[1., In a deep saucepan, bring the sugar, oran...",http://assets.epicurious.com/photos/560d99d97b...,"[[sugar, beet, water, blueberries, salt, butte...","[[In a large saucepan, combine the beets, wate...","[Blueberry beets, Beet blueberry coulis, Beets...","[[True, 0.46218487394957986], [True, 0.4551724..."
990,00a3285c25,"[7 roma tomatoes, 1 tbsp chicken flavor bouill...","[put tomatoes in pot of water, boil until toma...",https://img-global.cpcdn.com/001_photo_reports...,"[[cheese, onion, pepper, chicken, salt, tomato...","[[Preheat oven to 350, Mix all ingredients tog...","[Chicken enchiladas, Baked rotel chicken, Bake...","[[True, 0.7777777777777778], [True, 0.73809523..."


In [60]:
df_origin.iloc[1]

id                                                       000075604a
ingredients       [2 Chicken thighs, 2 tsp Kombu tea, 1 White pe...
instructions      [Pierce the skin of the chicken with a fork or...
url               https://img-global.cpcdn.com/001_recipes/58069...
generate_ingre    [[chicken, pepper, salt, lemon, oil, paprika, ...
generate_reci     [[Preheat oven to 350 degrees f (175 degrees c...
generate_title    [Lemon chicken, Grilled paprika chicken, Lemon...
generate_score    [[True, 0.6756756756756757], [True, 0.59259259...
Name: 1, dtype: object

In [33]:
# For 172 only

f1_summary = []
IOU_summary = []
for i in range(0, len(results)):
    
    # validate
    generate_score = results.iloc[i]["generate_score"]
    if not is_recipe_validate(generate_score):
        continue
        
    # Evaluate the ingredient
    generate_ingre = results.iloc[i]["generate_ingre"]
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)

    try:
        f1_score = score_ingre_f1(generate_ingre, reference_ingre)
        iou_score = score_ingre_IOU(generate_ingre, reference_ingre)

        f1_summary.append(f1_score)
        IOU_summary.append(iou_score)
    except:
        print (i)

    
print(sum(f1_summary)/len(f1_summary))
print(sum(IOU_summary)/len(IOU_summary))

0.36873641832573967
0.2425339539461238


In [43]:
# For Recipe 1M only

n_bleu = 3 # bleu-n score, 4 is default
f1_summary = []
IOU_summary = []
meteor_summary = []
bleu_summary = []
for i in range(0, len(results)):
    
    # validate
    generate_score = results.iloc[i]["generate_score"]
    if not is_recipe_validate(generate_score):
        continue
        
    # Evaluate the ingredient
    generate_ingre = results.iloc[i]["generate_ingre"]
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)
    f1_score = score_ingre_f1(generate_ingre, reference_ingre)
    iou_score = score_ingre_IOU(generate_ingre, reference_ingre)
    
#     print("F-1 score", f1_score)
#     print("IOU score", iou_score)
    
    # Evaluate the recipe
    generate_recipes = results.iloc[i]["generate_reci"]
    ref_recipe = results.iloc[i]["instructions"]
    m_score = score_recipe_meteor(generate_recipes, ref_recipe)
    bleu_score = score_blue_n(generate_recipes, ref_recipe, n_bleu)
#     print("Meteor score", m_score)
#     print("Bleu score", bleu_score)

#     if(bleu_score < 0.05):
#         print (i)
#         print (bleu_score)
#         print (generate_recipes)
#         print (ref_recipe)
#         break
    
    
    f1_summary.append(f1_score)
    IOU_summary.append(iou_score)
    meteor_summary.append(m_score)
    bleu_summary.append(bleu_score)
    
print(sum(f1_summary)/len(f1_summary))
print(sum(IOU_summary)/len(IOU_summary))
print(sum(meteor_summary)/len(meteor_summary))
print(sum(bleu_summary)/len(bleu_summary))

0.3822714563075838
0.2520162645266425
0.2126551174956491
0.0824011311040622


In [None]:
0.22894143727771604
0.30131453447328344
0.16368592972933632
0.09660006694603887
0.05463204076204984

In [28]:
results.iloc[2]["generate_reci"]

[['Preheat oven to 350 degrees f (175 degrees c).',
  'Grease a 9x13 inch baking dish.',
  'In a large bowl, mix together the rhubarb, sugar, and water.',
  'Pour into the prepared baking dish.',
  'In a small bowl, mix together the cake mix and melted butter.',
  'Sprinkle over the rhubarb mixture.',
  'Bake in the preheated oven for 45 minutes, or until a toothpick inserted into the center of the cake comes out clean.'],
 ['Preheat oven to 350*.',
  'In a small bowl, mix together the dry jello, dry cake mix and water.',
  'Spread evenly on bottom of an ungreased 9x13 pan.',
  'In a medium sized bowl combine the rhubarb, sugar and butter.',
  'Pour evenly over dry cake mixture.',
  'Bake for 30-35 minutes or until a knife inserted in the center comes out clean.',
  'Remove from oven, cool for 10 minutes on a wire rack before serving.'],
 ["Place rhubarb in 9x13'' baking pan.",
  'Mix jello, sugar and butter.',
  'Add boiling water, and stir till melted.',
  'Pour over rhubarb.',
  'Ch

In [11]:
results.iloc[2]["instructions"]

['Put ingredients in a buttered 9 x 12 x 2-inch pan in even layers in the order that they are given - DO NOT MIX.',
 'Bake in a 350 oven for 1 hour.']

In [14]:
transformation:
0.38209121434442705
0.2518988218773229
0.21262867201302285
0.045752426936059604

In [15]:
rotation:
0.36885635410182627
0.24263463759368842
0.2030833555305791
0.03971467392930373

In [16]:
原版：
0.3849248128279484
0.2549482903374174
0.21105063107297653
0.04279394272598328

In [11]:
# Extract token of true ingre for 1000
true_ingre_1k = {}
for i in range(0, len(results)):
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)
    url = results.iloc[i]["url"]
    true_ingre_1k[url] = list(reference_ingre)
        

In [12]:
pickle.dump(true_ingre_1k, open( "true_ingre_1k.pickle", "wb" ) )