In [None]:
! pip install scikit-learn nltk rouge-score bert-score

In [24]:
import json
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import jaccard_score
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from bert_score import score as bert_score

# Load data
def load_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return {item['id']: item for item in json.load(f)}  #回傳一個dictionary，key是id，內容是料理的dictionary

# 1. 食材評估：F1-score、Jaccard (IOU)
def evaluate_ingredients(gt_data, pred_data):
    ids = list(gt_data.keys())
    all_gt = [gt_data[i]['ingredients'] for i in ids]
    all_pred = [pred_data[i]['ingredients'] for i in ids]

    mlb = MultiLabelBinarizer()
    all_labels = all_gt + all_pred
    mlb.fit(all_labels)

    y_true = mlb.transform(all_gt)
    y_pred = mlb.transform(all_pred)

    precision = precision_score(y_true, y_pred, average='micro')
    recall = recall_score(y_true, y_pred, average='micro')
    f1 = f1_score(y_true, y_pred, average='micro')
    jaccard = jaccard_score(y_true, y_pred, average='samples')

    return precision, recall, f1, jaccard

# 2. 步驟評估：BLEU、ROUGE、BERTScore
def evaluate_steps(gt_data, pred_data):
    bleu_scores = []
    rouge_l_scores = []

    gt_texts = []
    pred_texts = []

    smoothie = SmoothingFunction().method4
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    for i in gt_data:
        reference = gt_data[i]['cooking_instructions']
        hypothesis = pred_data[i]['cooking_instructions']

        bleu = sentence_bleu([reference], hypothesis, smoothing_function=smoothie)
        rouge_l = scorer.score(reference, hypothesis)['rougeL'].fmeasure

        bleu_scores.append(bleu)
        rouge_l_scores.append(rouge_l)
        gt_texts.append(reference)
        pred_texts.append(hypothesis)

    # BERTScore（中文模型）
    P, R, F1 = bert_score(pred_texts, gt_texts, lang="zh", verbose=False)

    return np.mean(bleu_scores), np.mean(rouge_l_scores), float(F1.mean())


gt_file = 'ground_truth.json'
pred_file = 'model_ouput.json'
gt_data = load_json(gt_file)
pred_data = load_json(pred_file)
print(gt_data)

# print("📌 食材辨識評估")
# precision, recall, f1, jaccard = evaluate_ingredients(gt_data, pred_data)
# print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}, Jaccard: {jaccard:.4f}")

print("instruction metrics:\n")
bleu, rouge_l, bert = evaluate_steps(gt_data, pred_data)
print(f"BLEU: {bleu:.4f}, ROUGE-L: {rouge_l:.4f}, BERTScore(F1): {bert:.4f}")




{1: {'id': 1, 'fake_id': 100, 'cuisine_img_url': 'https://icook.tw/recipes/455986', 'cuisine_name': '滷肉飯', 'ingredients': [{'ingredient': '五花肉', 'amount': '2000', 'unit': '克'}, {'ingredient': '洋蔥', 'amount': '2', 'unit': '顆'}, {'ingredient': '紅蔥頭末', 'amount': '50', 'unit': '克'}, {'ingredient': '蒜末', 'amount': '50', 'unit': '克'}, {'ingredient': '五香粉', 'amount': '1', 'unit': '小匙'}, {'ingredient': '醬油', 'amount': '1', 'unit': '杯'}, {'ingredient': '油膏', 'amount': '0.5', 'unit': '杯'}, {'ingredient': '紹興酒', 'amount': '4', 'unit': '大匙'}, {'ingredient': '冰糖', 'amount': '50', 'unit': '克'}], 'cooking_instructions': '小黃瓜洗淨後，去除頭尾，然後以菜刀拍碎; 將拍碎的小黃瓜切小段; 加入細砂糖拌勻，去菁15分鐘; 辣椒清洗後切成辣椒圈; 蒜頭清洗後切成蒜末; 用手將小黃瓜多餘的水份擠出，然後倒掉; 加入辣椒圈和蒜末; 加入醬油; 加入烏醋; 加入鹽; 淋上加熱後的植物油(淋熱油); 將所有食材拌勻即可食用; 於冰箱醃漬2小時以上會更入味更好吃; '}, 2: {'id': 2, 'fake_id': 200, 'cuisine_img_url': 'https://icook.tw/recipes/455986', 'cuisine_name': '滷肉飯', 'ingredients': [{'ingredient': '五花肉', 'amount': '2000', 'unit': '克'}, {'ingredient': '洋蔥', 'amount': '2', 'un

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/624 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/269k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/412M [00:00<?, ?B/s]

BLEU: 1.0000, ROUGE-L: 1.0000, BERTScore(F1): 1.0000
