In [1]:
from nltk.stem import PorterStemmer
import pickle
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.tokenize import word_tokenize 
import string
from nltk.translate.meteor_score import meteor_score

[nltk_data] Downloading package punkt to
[nltk_data]     /home/ct2020dl5787/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/ct2020dl5787/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
def get_ingred_f1(pred, label):
    '''
    input: 
        pred: a list of predicted ingredients
        label: a list of label ingredients
    output: 
        F-1 score of the prediction
    
    i.e.  
    in: 
        pred = ["tomato", "sugar", "beef"]
        label = ["potato", "tomato"]
     
    out: 
        0.4
    '''
    intersection = list(set(pred) & set(label))
    precision = len(intersection) / len(pred)
    recall = len(intersection) / len(label)
    if precision + recall == 0:
        return 0
    f1 = 2 * precision * recall / (precision + recall)
    return f1


def get_ingred_IOU(pred, label):
    '''
    input: 
        pred: a list of predicted ingredients
        label: a list of label ingredients
    output: 
        IOU of the prediction
    
    i.e.  
    in: 
        pred = ["tomato", "sugar", "beef"]
        label = ["potato", "tomato"]
     
    out: 
        0.25
    '''
    intersection = len(list(set(pred) & set(label)))
    union = len(list(set(pred) | set(label)))
    iou = intersection / union
    return iou





def get_bleu_n_score(pred, label, n = 4):
    
    '''
    TODO: STEM not added
    input: 
        pred: One string of predict recipe 
        label: One string of reference recipe
        n(optional): up to n-gram.
    output: 
        bleu score
    
    i.e.  
    in: 
        pred = "Add the buttter"
        label = "Add half butter and mix well"
    out: 
        0.25
    '''
    weights = [1/n] * n
    pred_list = pred.translate(str.maketrans('', '', string.punctuation)).split()
    pred_list = [ps.stem(word) for word in pred_list]
    label_list = label.translate(str.maketrans('', '', string.punctuation)).split()
    label_list = [ps.stem(word) for word in label_list]
    BLEUscore = nltk.translate.bleu_score.sentence_bleu([label_list], pred_list, weights)
    return BLEUscore




def get_meteor_score(pred, label):
    return meteor_score([label], pred)

def read_ingre_vocab(filepath):
    '''
    input: 
        filepath of recipe1m_vocab_ingrs.pkl
    output: 
        a list of stemed ingre
    
    i.e.  
    in: 
        "4 large baking potatoes (2 lb./900 g) Safeway 2 pkg For $5.00 thru 02/09""
    out: 
        ["potato"]
    '''
    ingre = pickle.load(open(filepath, "rb" ))
    result = []
    for i in range(1, len(ingre.idx2word)):
        word = min(ingre.idx2word[i], key=len)
        if "_" not in word:
            result.append(ps.stem(word))
    return result

In [3]:
class Vocabulary(object):
    """Simple vocabulary wrapper."""
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0

    def add_word(self, word, idx=None):
        if idx is None:
            if not word in self.word2idx:
                self.word2idx[word] = self.idx
                self.idx2word[self.idx] = word
                self.idx += 1
            return self.idx
        else:
            if not word in self.word2idx:
                self.word2idx[word] = idx
                if idx in self.idx2word.keys():
                    self.idx2word[idx].append(word)
                else:
                    self.idx2word[idx] = [word]

                return idx

    def __call__(self, word):
        if not word in self.word2idx:
            return self.word2idx['<pad>']
        return self.word2idx[word]

    def __len__(self):
        return len(self.idx2word)



In [4]:
def score_ingre_f1(results, ref):
    '''
    input: 
        results: a list of candidates ingre list
        ref: a list of reference ingre
    output: 
        f1 score
    '''
    score = 0
    for result in results:
        score = max(score, get_ingred_f1(result, ref))
            
    return score


def score_ingre_IOU(results, ref):
    '''
    input: 
        results: a list of candidates ingre list
        ref: a list of reference ingre
    output: 
        IOU score
    '''
    score = 0
    for result in results:
        score = max(score, get_ingred_IOU(result, ref))
            
    return score

def show_ingre(df, i):
    print (df.iloc[i]["ingredients"])
    print (df.iloc[i]["generate_ingre"])
    
    
    
def score_recipe_meteor(generate_recipes, ref):
    score = 0
    ref = " ".join(ref)
    for recipe in generate_recipes:
        joined_recipe = " ".join(recipe)
        score = max(score, get_meteor_score(joined_recipe, ref))
    return score


def score_blue_n(generate_recipes, ref, n):
    score = 0
    ref = " ".join(ref)
    for recipe in generate_recipes:
        joined_recipe = " ".join(recipe)
        score = max(score, get_bleu_n_score(joined_recipe, ref, n))
    return score





def get_tokenized_and_stemmed_list(ingre_lists, ingre_vocab):
    '''
    input: 
        a list of ingredient str
        a list of stemmed reference ingre vocab
    output: 
        a set of indredient
    
    i.e.  
    in: 
        "4 large baking potatoes (2 lb./900 g) Safeway 2 pkg For $5.00 thru 02/09""
    out: 
        ["potato"]
    '''
    result = set()
    for ingre_list in ingre_lists:
        words = word_tokenize(ingre_list) 
        for word in words: 
            result.add(ps.stem(word))
            
    return result & set(ingre_vocab)


def is_recipe_validate(generate_score):
    count = 0
    for j in range (1):
#     for j in range (4):
        if generate_score[j][0] == False:
            count += 1
    if count == 4:
        return False
    return True

In [6]:
ps = PorterStemmer()
results = pickle.load(open( "output/ingr_only_1720.pkl", "rb" ))
stemmed_ingre = read_ingre_vocab("../data/recipe1m_vocab_ingrs.pkl")

In [7]:
results = results.rename(columns={'true_ingredients':'ingredients'}, inplace=False)


In [8]:
results

Unnamed: 0,img_names,ingredients,ingredients_Chinese,generate_ingre
0,92_075811n2tr1trtvr21vvao.jpg,"[scallion, pepper, beef]","[Minced green onion, Shredded pepper, Shredded...","[pepper, oil, onion, clove, soy_sauce, sugar, ..."
1,92_075817xlk22xxxagbk296x.jpg,"[scallion, pepper, beef]","[Minced green onion, Shredded pepper, Shredded...","[pepper, oil, onion, clove, beans, salt, tomato]"
2,92_100053030.1.jpg,"[scallion, pepper, beef]","[Minced green onion, Shredded pepper, Shredded...","[pepper, onion, oil, chicken, mushroom, salt, ..."
3,92_10_0.jpg,"[Parsley, beef]","[Chinese Parsleycoriander, Shredded beef tripe]","[pepper, oil, salt, potato, clove, onion, pars..."
4,92_10_1.jpg,"[Parsley, chili, garlic, beef]","[Chinese Parsleycoriander, Crushed hot and dry...","[oil, onion, pepper, soy_sauce, sugar, chicken..."
...,...,...,...,...
1715,63_10_14.jpg,"[tofu, chili]","[Tofu chunks, Chili oil]","[onion, pepper, oil, chicken, clove, tomato, w..."
1716,63_10_16.jpg,"[scallion, tofu, chili]","[Minced green onion, Tofu chunks, Chili oil]","[onion, pepper, oil, soy_sauce, clove, broth, ..."
1717,63_10_18.jpg,"[scallion, pork, tofu, chili]","[Minced green onion, Minced pork, Tofu chunks,...","[onion, pepper, potato, clove, oil, tomato, sa..."
1718,63_10_19.jpg,"[scallion, pepper, tofu]","[Minced green onion, Hot and dry pepper, Tofu ...","[onion, oil, soy_sauce, pepper, chicken, water..."


In [20]:
# For 172 only

f1_summary = []
IOU_summary = []
for i in range(0, len(results)):
    
    # validate
#     generate_score = results.iloc[i]["generate_score"]
#     if not is_recipe_validate(generate_score):
#         continue
        
    # Evaluate the ingredient
    generate_i = results.iloc[i]["generate_ingre"]
    generate_ingre = [generate_i]
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)
    if i < 10:
        print(generate_ingre,reference_ingre)

    try:    
        f1_score = score_ingre_f1(generate_ingre, reference_ingre)
        iou_score = score_ingre_IOU(generate_ingre, reference_ingre)
        f1_summary.append(f1_score)
        IOU_summary.append(iou_score)
    except:
         print (i)

    
print(sum(f1_summary)/len(f1_summary))
print(sum(IOU_summary)/len(IOU_summary))

[['pepper', 'oil', 'onion', 'clove', 'soy_sauce', 'sugar', 'beans', 'carrot']] {'beef', 'pepper', 'scallion'}
0.18181818181818182 0.1
[['pepper', 'oil', 'onion', 'clove', 'beans', 'salt', 'tomato']] {'beef', 'pepper', 'scallion'}
0.2 0.1111111111111111
[['pepper', 'onion', 'oil', 'chicken', 'mushroom', 'salt', 'soy_sauce', 'clove']] {'beef', 'pepper', 'scallion'}
0.18181818181818182 0.1
[['pepper', 'oil', 'salt', 'potato', 'clove', 'onion', 'parsley']] {'beef', 'parsley'}
0.22222222222222224 0.125
[['oil', 'onion', 'pepper', 'soy_sauce', 'sugar', 'chicken', 'vinegar']] {'garlic', 'beef', 'parsley', 'chili'}
0 0
[['pepper', 'oil', 'onion', 'salt', 'soy_sauce', 'clove']] {'beef', 'parsley', 'cucumb'}
0 0
[['oil', 'pepper', 'onion', 'clove', 'soy_sauce', 'ginger', 'sugar']] {'beef', 'pepper'}
0.22222222222222224 0.125
[['onion', 'soy_sauce', 'sugar', 'sake', 'water', 'mirin', 'stock', 'oil']] {'beef', 'parsley'}
0 0
[['oil', 'pepper', 'soy_sauce', 'clove', 'onion', 'ginger', 'sugar']] {'c

In [9]:
# For Recipe 1M only

n_bleu = 4 # bleu-n score, 4 is default
f1_summary = []
IOU_summary = []
meteor_summary = []
bleu_summary = []
for i in range(0, len(results)):
    
    # validate
    generate_score = results.iloc[i]["generate_score"]
    if not is_recipe_validate(generate_score):
        continue
        
    # Evaluate the ingredient
    generate_ingre = results.iloc[i]["generate_ingre"]
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)
    f1_score = score_ingre_f1(generate_ingre, reference_ingre)
    iou_score = score_ingre_IOU(generate_ingre, reference_ingre)
    
#     print("F-1 score", f1_score)
#     print("IOU score", iou_score)
    
    # Evaluate the recipe
    generate_recipes = results.iloc[i]["generate_reci"]
    ref_recipe = results.iloc[i]["instructions"]
    m_score = score_recipe_meteor(generate_recipes, ref_recipe)
    bleu_score = score_blue_n(generate_recipes, ref_recipe, n_bleu)
#     print("Meteor score", m_score)
#     print("Bleu score", bleu_score)

#     if(bleu_score < 0.05):
#         print (i)
#         print (bleu_score)
#         print (generate_recipes)
#         print (ref_recipe)
#         break
    
    
    f1_summary.append(f1_score)
    IOU_summary.append(iou_score)
    meteor_summary.append(m_score)
    bleu_summary.append(bleu_score)
    
print(sum(f1_summary)/len(f1_summary))
print(sum(IOU_summary)/len(IOU_summary))
print(sum(meteor_summary)/len(meteor_summary))
print(sum(bleu_summary)/len(bleu_summary))

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


0.2725867782706981
0.1717123872443988
0.1220253762729777
0.014463251029724216


In [13]:
results.iloc[2]["generate_reci"]

[['Preheat oven to 350 degrees.',
  'In a large bowl, combine flour, baking soda and salt.',
  'In a separate bowl, beat together peanut butter, oil, sugar and egg.',
  'Add to dry ingredients and mix well.',
  'Drop by rounded teaspoonfuls onto ungreased cookie sheets.',
  'Bake for 8-10 minutes.']]

In [14]:
results.iloc[2]["instructions"]

['Put ingredients in a buttered 9 x 12 x 2-inch pan in even layers in the order that they are given - DO NOT MIX.',
 'Bake in a 350 oven for 1 hour.']