In [1]:
from nltk.stem import PorterStemmer
import pickle
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.tokenize import word_tokenize 
import string
from nltk.translate.meteor_score import meteor_score

[nltk_data] Downloading package punkt to
[nltk_data]     /home/ct2020dl5787/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/ct2020dl5787/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
def get_ingred_f1(pred, label):
    '''
    input: 
        pred: a list of predicted ingredients
        label: a list of label ingredients
    output: 
        F-1 score of the prediction
    
    i.e.  
    in: 
        pred = ["tomato", "sugar", "beef"]
        label = ["potato", "tomato"]
     
    out: 
        0.4
    '''
    intersection = list(set(pred) & set(label))
    precision = len(intersection) / len(pred)
    recall = len(intersection) / len(label)
    if precision + recall == 0:
        return 0
    f1 = 2 * precision * recall / (precision + recall)
    return f1


def get_ingred_IOU(pred, label):
    '''
    input: 
        pred: a list of predicted ingredients
        label: a list of label ingredients
    output: 
        IOU of the prediction
    
    i.e.  
    in: 
        pred = ["tomato", "sugar", "beef"]
        label = ["potato", "tomato"]
     
    out: 
        0.25
    '''
    intersection = len(list(set(pred) & set(label)))
    union = len(list(set(pred) | set(label)))
    iou = intersection / union
    return iou





def get_bleu_n_score(pred, label, n = 4):
    
    '''
    TODO: STEM not added
    input: 
        pred: One string of predict recipe 
        label: One string of reference recipe
        n(optional): up to n-gram.
    output: 
        bleu score
    
    i.e.  
    in: 
        pred = "Add the buttter"
        label = "Add half butter and mix well"
    out: 
        0.25
    '''
    weights = [1/n] * n
    pred_list = pred.translate(str.maketrans('', '', string.punctuation)).split()
    pred_list = [ps.stem(word) for word in pred_list]
    label_list = label.translate(str.maketrans('', '', string.punctuation)).split()
    label_list = [ps.stem(word) for word in label_list]
    BLEUscore = nltk.translate.bleu_score.sentence_bleu([label_list], pred_list, weights)
    return BLEUscore




def get_meteor_score(pred, label):
    return meteor_score([label], pred)

def read_ingre_vocab(filepath):
    '''
    input: 
        filepath of recipe1m_vocab_ingrs.pkl
    output: 
        a list of stemed ingre
    
    i.e.  
    in: 
        "4 large baking potatoes (2 lb./900 g) Safeway 2 pkg For $5.00 thru 02/09""
    out: 
        ["potato"]
    '''
    ingre = pickle.load(open(filepath, "rb" ))
    result = []
    for i in range(1, len(ingre.idx2word)):
        word = min(ingre.idx2word[i], key=len)
        if "_" not in word:
            result.append(ps.stem(word))
    return result

In [3]:
class Vocabulary(object):
    """Simple vocabulary wrapper."""
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0

    def add_word(self, word, idx=None):
        if idx is None:
            if not word in self.word2idx:
                self.word2idx[word] = self.idx
                self.idx2word[self.idx] = word
                self.idx += 1
            return self.idx
        else:
            if not word in self.word2idx:
                self.word2idx[word] = idx
                if idx in self.idx2word.keys():
                    self.idx2word[idx].append(word)
                else:
                    self.idx2word[idx] = [word]

                return idx

    def __call__(self, word):
        if not word in self.word2idx:
            return self.word2idx['<pad>']
        return self.word2idx[word]

    def __len__(self):
        return len(self.idx2word)



In [4]:
def score_ingre_f1(results, ref):
    '''
    input: 
        results: a list of candidates ingre list
        ref: a list of reference ingre
    output: 
        f1 score
    '''
    score = 0
    for result in results:
        score = max(score, get_ingred_f1(result, ref))
            
    return score


def score_ingre_IOU(results, ref):
    '''
    input: 
        results: a list of candidates ingre list
        ref: a list of reference ingre
    output: 
        IOU score
    '''
    score = 0
    for result in results:
        score = max(score, get_ingred_IOU(result, ref))
            
    return score

def show_ingre(df, i):
    print (df.iloc[i]["ingredients"])
    print (df.iloc[i]["generate_ingre"])
    
    
    
def score_recipe_meteor(generate_recipes, ref):
    score = 0
    ref = " ".join(ref)
    for recipe in generate_recipes:
        joined_recipe = " ".join(recipe)
        score = max(score, get_meteor_score(joined_recipe, ref))
    return score


def score_blue_n(generate_recipes, ref, n):
    score = 0
    ref = " ".join(ref)
    for recipe in generate_recipes:
        joined_recipe = " ".join(recipe)
        score = max(score, get_bleu_n_score(joined_recipe, ref, n))
    return score





def get_tokenized_and_stemmed_list(ingre_lists, ingre_vocab):
    '''
    input: 
        a list of ingredient str
        a list of stemmed reference ingre vocab
    output: 
        a set of indredient
    
    i.e.  
    in: 
        "4 large baking potatoes (2 lb./900 g) Safeway 2 pkg For $5.00 thru 02/09""
    out: 
        ["potato"]
    '''
    result = set()
    for ingre_list in ingre_lists:
        words = word_tokenize(ingre_list) 
        for word in words: 
            result.add(ps.stem(word))
            
    return result & set(ingre_vocab)


def is_recipe_validate(generate_score):
    count = 0
    for j in range (1):
#         if results.iloc[i]["generate_score"][j][0] == False:
        if generate_score[j][0] == False:
            count += 1
    if count == 4:
        return False
    return True

In [6]:
# import string
# "apple?! jfs".translate(str.maketrans('', '', string.punctuation)).split()

In [None]:
vocab_file = '/home/ct2020dl5787/VireoFood172/vocab172_mapping.csv'
chinese_vocab = []
mapped_vocab = []
with open(vocab_file) as file:
    csv_reader = csv.reader(file, delimiter=',')
    for row in csv_reader:
        chinese_vocab.append(row[0])
        mapped_vocab.append(row[1])

In [6]:
ps = PorterStemmer()
# results = pickle.load(open( "df_recipe1023_all.pkl", "rb" ))
# results = pickle.load(open( "chinese1720sample_allinfo.pkl", "rb" ))
results = pickle.load(open( "output/transform.pkl", "rb" ))
stemmed_ingre = read_ingre_vocab("../data/recipe1m_vocab_ingrs.pkl")
df_origin = pickle.load(open( "df_recipe1023_all.pkl", "rb" ))
df_ablation = pickle.load(open( "output/transform.pkl", "rb" ))
true_ingre_dict = pickle.load(open( "true_ingre_1k.pickle", "rb" ))
df_1720 = pickle.load(open( "output/finetune_1720.pkl", "rb" ))

In [7]:
df_1720

Unnamed: 0,img_names,true_onehot,true_top20,ingredients_Chinese,generate_ingre
0,86_10102721546691.jpg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[345, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",[Crab],"[Parsley, Chinese Parsleycoriander, Dumplings,..."
1,140_10_14.jpg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[15, 95, 58, 174, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[Crushed pepper, Crushed hot and dry chili, Cr...","[Crushed pepper, Minced green onion, Minced gr..."
2,100_10_2.jpg,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[99, 7, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[Sliced ham, Egg cake, Brunoise diced lentinus...","[Minced green onion, Minced green onion, Mince..."
3,64_0fd9b207887e2199159d73db9cc5a803.jpg,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[125, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[Seared green onion, Shredded pepper, Tofu chu...","[Crushed pepper, Minced green onion, Minced gr..."
4,160_10_13.jpg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...","[13, 146, 15, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[Black sesame, Crushed pepper, Chinese Parsley...","[Minced green onion, Minced green onion, Mince..."
...,...,...,...,...,...
1715,109_10_24.jpg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...","[323, 21, 13, 46, 58, 0, 0, 0, 0, 0, 0, 0, 0, ...","[Black sesame, Chinese Parsleycoriander, Groun...","[Crushed pepper, Crushed hot and dry chili, Mi..."
1716,84_10_2.jpg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[27, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[Green vegetables, Stinky tofu]","[Crushed pepper, Lettuce, Minced green onion, ..."
1717,111_10_21.jpg,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...","[0, 121, 63, 12, 58, 21, 135, 0, 0, 0, 0, 0, 0...","[Minced green onion, Hob blocks of carrot, Chi...","[Minced green onion, Minced green onion, Mince..."
1718,135_10_10.jpg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[265, 27, 266, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[Green vegetables, Rice noodle, Fried yuba skin]","[Minced green onion, Minced green onion, Mince..."


In [38]:
results = results.rename(columns={'true_ingredients':'ingredients'}, inplace=False)


In [39]:
results.iloc[0]["url"]

'http://img.sndimg.com/food/image/upload/w_512,h_512,c_fit,fl_progressive,q_95/v1/img/recipes/47/91/49/picaYYmb9.jpg'

In [14]:
# results.iloc[2]["generate_score"]

In [64]:
def show_ablation_study(df_origin, df_ablation, i, true_ingre_dict):
    # Compare two ingre
    predicted_ingre = df_origin.iloc[i]["generate_ingre"][0]
    true_ingre = true_ingre_dict[df_origin.iloc[i]["url"]]
    
    # Compare two instruction
    generate_recipe_with_predicted_ingre = df_origin.iloc[i]["generate_reci"]
    generate_recipe_with_true_ingre = df_ablation.iloc[i]["generate_reci"]
    print("predicted_ingre-----------------------------------------")
    print(predicted_ingre)
    print("true_ingre----------------------------------------------")
    print(true_ingre)
    print("generate_recipe_with_predicted_ingre--------------------")
    print(generate_recipe_with_predicted_ingre)
    print("generate_recipe_with_true_ingre-------------------------")
    print(generate_recipe_with_true_ingre)

In [13]:
# show_ablation_study(df_origin, df_ablation, 21, true_ingre_dict)

In [90]:
df_origin.iloc[21]["url"]

'http://img.sndimg.com/food/image/upload/w_512,h_512,c_fit,fl_progressive,q_95/v1/img/recipes/26/09/29/piclnaUDO.jpg'

In [8]:
# df_ablation

In [9]:
# df_origin.iloc[1]

In [33]:
# For 172 only

f1_summary = []
IOU_summary = []
for i in range(0, len(results)):
    
    # validate
    generate_score = results.iloc[i]["generate_score"]
    if not is_recipe_validate(generate_score):
        continue
        
    # Evaluate the ingredient
    generate_ingre = results.iloc[i]["generate_ingre"]
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)

    try:
        f1_score = score_ingre_f1(generate_ingre, reference_ingre)
        iou_score = score_ingre_IOU(generate_ingre, reference_ingre)

        f1_summary.append(f1_score)
        IOU_summary.append(iou_score)
    except:
        print (i)

    
print(sum(f1_summary)/len(f1_summary))
print(sum(IOU_summary)/len(IOU_summary))

0.36873641832573967
0.2425339539461238


In [43]:
# For Recipe 1M only

n_bleu = 3 # bleu-n score, 4 is default
f1_summary = []
IOU_summary = []
meteor_summary = []
bleu_summary = []
for i in range(0, len(results)):
    
    # validate
    generate_score = results.iloc[i]["generate_score"]
    if not is_recipe_validate(generate_score):
        continue
        
    # Evaluate the ingredient
    generate_ingre = results.iloc[i]["generate_ingre"]
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)
    f1_score = score_ingre_f1(generate_ingre, reference_ingre)
    iou_score = score_ingre_IOU(generate_ingre, reference_ingre)
    
#     print("F-1 score", f1_score)
#     print("IOU score", iou_score)
    
    # Evaluate the recipe
    generate_recipes = results.iloc[i]["generate_reci"]
    ref_recipe = results.iloc[i]["instructions"]
    m_score = score_recipe_meteor(generate_recipes, ref_recipe)
    bleu_score = score_blue_n(generate_recipes, ref_recipe, n_bleu)
#     print("Meteor score", m_score)
#     print("Bleu score", bleu_score)

#     if(bleu_score < 0.05):
#         print (i)
#         print (bleu_score)
#         print (generate_recipes)
#         print (ref_recipe)
#         break
    
    
    f1_summary.append(f1_score)
    IOU_summary.append(iou_score)
    meteor_summary.append(m_score)
    bleu_summary.append(bleu_score)
    
print(sum(f1_summary)/len(f1_summary))
print(sum(IOU_summary)/len(IOU_summary))
print(sum(meteor_summary)/len(meteor_summary))
print(sum(bleu_summary)/len(bleu_summary))

0.3822714563075838
0.2520162645266425
0.2126551174956491
0.0824011311040622


In [10]:
# 0.22894143727771604
# 0.30131453447328344
# 0.16368592972933632
# 0.09660006694603887
# 0.05463204076204984

In [11]:
# results.iloc[2]["generate_reci"]

In [12]:
# results.iloc[2]["instructions"]

In [14]:
transformation:
0.38209121434442705
0.2518988218773229
0.21262867201302285
0.045752426936059604

In [15]:
rotation:
0.36885635410182627
0.24263463759368842
0.2030833555305791
0.03971467392930373

In [16]:
原版：
0.3849248128279484
0.2549482903374174
0.21105063107297653
0.04279394272598328

In [11]:
# Extract token of true ingre for 1000
true_ingre_1k = {}
for i in range(0, len(results)):
    reference_ingre = get_tokenized_and_stemmed_list(results.iloc[i]["ingredients"], stemmed_ingre)
    url = results.iloc[i]["url"]
    true_ingre_1k[url] = list(reference_ingre)
        

In [12]:
pickle.dump(true_ingre_1k, open( "true_ingre_1k.pickle", "wb" ) )