In [None]:
import json
def load_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return {item['cuisine_id']: item for item in json.load(f)}

gt_file = 'ground_truth.json'
gpt_file = 'response_gpt.json'
gemini_file = 'response_gemini.json'
claude_file = 'response_claude.json'

gt_data = load_json(gt_file)
gpt_data = load_json(gpt_file)
gemini_data = load_json(gemini_file)
claude_data = load_json(claude_file)

print(gt_data)
print(gpt_data)
print(gemini_data)
print(claude_data)

## 資料整理

### ingredients 變成 sentence 

In [None]:
# {id:" ", ing:" "}
gt_ing_sentence_dic = {}
gpt_ing_sentence_dic = {}
gemini_ing_sentence_dic = {}
claude_ing_sentence_dic = {}

def ingredients_to_sentence (data):
    ing_sentence = {}
    for item in data:
        sentence = ""
        for ingredient_list in data[item]['ingredients']:
            sentence = sentence + ingredient_list['ingredient'] + " "
        ing_sentence[item] = sentence
    return ing_sentence

gt_ing_sentence_dic = ingredients_to_sentence(gt_data)
gpt_ing_sentence_dic = ingredients_to_sentence(gpt_data)
gemini_ing_sentence_dic = ingredients_to_sentence(gemini_data)
claude_ing_sentence_dic = ingredients_to_sentence(claude_data)

### Instruction 變成 sentence

In [None]:
# {id:" ", instrc:" "}
gt_instrc_sentence_dic = {}
gpt_instrc_sentence_dic = {}
gemini_instrc_sentence_dic = {}
claude_instrc_sentence_dic = {}

def instructions_to_sentence (data):
    print(data)
    instrc_sentence = {}
    for item in data:
        print(data[item]['cooking_instructions'])
        instrc_sentence[item] = data[item]['cooking_instructions']
    return instrc_sentence

gt_instrc_sentence_dic = instructions_to_sentence(gt_data)
gpt_instrc_sentence_dic = instructions_to_sentence(gpt_data)
gemini_instrc_sentence_dic = instructions_to_sentence(gemini_data)
claude_instrc_sentence_dic = instructions_to_sentence(claude_data)


## 食材種類正確性

### bert_base_chinese

In [None]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F


In [None]:
gpt_ing_bert_base_chinese = []
gemini_ing_bert_base_chinese = []
claude_ing_bert_base_chinese = []

def bert_base_chinese_for_ing_sentence(gt_ing_sentence, model_ing_sentence):
    print(gt_ing_sentence)
    print(model_ing_sentence)
    
    tokenizer = AutoTokenizer.from_pretrained("bert-base-chinese")
    model = AutoModel.from_pretrained("bert-base-chinese")


    inputs = tokenizer(gt_ing_sentence, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    gt_embedding = outputs.last_hidden_state[:, 0, :]  # shape: [1, hidden_size]

    inputs = tokenizer(model_ing_sentence, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    model_embedding = outputs.last_hidden_state[:, 0, :]  # shape: [1, hidden_size]
        
    # 計算餘弦相似度（值域 -1 到 1，越接近 1 越相似）
    similarity = F.cosine_similarity(gt_embedding, model_embedding)
    print(f"相似度: {similarity.item():.4f}")
    return similarity.item()

def get_ing_bert_base_chinese(gt_ing_sentence_dic, model_ing_sentence_dic):
    ing_bert_base_chinese = []
    for gt_id, model_id in zip(gt_ing_sentence_dic, model_ing_sentence_dic):
        sentence1 = gt_ing_sentence_dic[gt_id]
        sentence2 = model_ing_sentence_dic[model_id]
        print(sentence1)
        print(sentence2)
        ing_bert_base_chinese.append(bert_base_chinese_for_ing_sentence(sentence1, sentence2))
    return ing_bert_base_chinese

gpt_ing_bert_base_chinese = get_ing_bert_base_chinese(gt_ing_sentence_dic, gpt_ing_sentence_dic)
gemini_ing_bert_base_chinese = get_ing_bert_base_chinese(gt_ing_sentence_dic, gemini_ing_sentence_dic)
claude_ing_bert_base_chinese = get_ing_bert_base_chinese(gt_ing_sentence_dic, claude_ing_sentence_dic)

print(f'gpt_ing_bert_base_chinese = {gpt_ing_bert_base_chinese}')
print(f'gemini_ing_bert_base_chinese = {gemini_ing_bert_base_chinese}')
print(f'claude_ing_bert_base_chinese = {claude_ing_bert_base_chinese}')

In [None]:
import matplotlib.pyplot as plt
x = list(range(1, 16))
x2 = [i + 0.2 for i in x]
x3 = [i - 0.2 for i in x]

fig, ax = plt.subplots(figsize=(12, 6))

color_map = {0: "#e9f1c3", 5: "#c4edfb", 10: "#f7dfcb"}

for i in range(0, 15, 5):
    ax.axvspan(i + 0.5, i + 5.5, facecolor=color_map[i], alpha=0.99)

# 畫長條圖
ax.bar(x3, claude_ing_bert_base_chinese, color='g', width=0.2, label='Claude')
ax.bar(x, gemini_ing_bert_base_chinese, color='r', width=0.2, label='Gemini')
ax.bar(x2, gpt_ing_bert_base_chinese, color='b', width=0.2, label='GPT')

# 加上 legend、標題與格式調整
ax.legend()
ax.set_xticks(range(1, 16))
ax.set_xlabel('Samples', fontsize=12)
ax.set_ylabel('Bert', fontsize=12)
ax.set_title('Ingredients bert', fontsize=14)
ax.set_ylim(0.5, 1.0)

plt.tight_layout()
plt.savefig('ing_bert.png')

## Cooking Instructions

In [None]:
gpt_instrc_bert_base_chinese = []
gemini_instrc_bert_base_chinese = []
claude_instrc_bert_base_chinese = []

def bert_base_chinese_for_instrc_sentence(gt_instrc_sentence, model_instrc_sentence):
    print(gt_instrc_sentence)
    print(model_instrc_sentence)
    
    tokenizer = AutoTokenizer.from_pretrained("bert-base-chinese")
    model = AutoModel.from_pretrained("bert-base-chinese")


    inputs = tokenizer(gt_instrc_sentence, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    gt_embeddinstrc = outputs.last_hidden_state[:, 0, :]  # shape: [1, hidden_size]

    inputs = tokenizer(model_instrc_sentence, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    model_embeddinstrc = outputs.last_hidden_state[:, 0, :]  # shape: [1, hidden_size]
        
    # 計算餘弦相似度（值域 -1 到 1，越接近 1 越相似）
    similarity = F.cosine_similarity(gt_embeddinstrc, model_embeddinstrc)
    print(f"相似度: {similarity.item():.4f}")
    return similarity.item()

def get_instrc_bert_base_chinese(gt_instrc_sentence_dic, model_instrc_sentence_dic):
    instrc_bert_base_chinese = []
    for gt_id, model_id in zip(gt_instrc_sentence_dic, model_instrc_sentence_dic):
        sentence1 = gt_instrc_sentence_dic[gt_id]
        sentence2 = model_instrc_sentence_dic[model_id]
        print(sentence1)
        print(sentence2)
        instrc_bert_base_chinese.append(bert_base_chinese_for_instrc_sentence(sentence1, sentence2))
    return instrc_bert_base_chinese

gpt_instrc_bert_base_chinese = get_instrc_bert_base_chinese(gt_instrc_sentence_dic, gpt_instrc_sentence_dic)
gemini_instrc_bert_base_chinese = get_instrc_bert_base_chinese(gt_instrc_sentence_dic, gemini_instrc_sentence_dic)
claude_instrc_bert_base_chinese = get_instrc_bert_base_chinese(gt_instrc_sentence_dic, claude_instrc_sentence_dic)

print(f'gpt_instrc_bert_base_chinese = {gpt_instrc_bert_base_chinese}')
print(f'gemini_instrc_bert_base_chinese = {gemini_instrc_bert_base_chinese}')
print(f'claude_instrc_bert_base_chinese = {claude_instrc_bert_base_chinese}')

In [None]:
x = list(range(1, 16))
x2 = [i + 0.2 for i in x]
x3 = [i - 0.2 for i in x]

fig, ax = plt.subplots(figsize=(12, 6))

color_map = {0: "#e9f1c3", 5: "#c4edfb", 10: "#f7dfcb"}

for i in range(0, 15, 5):
    ax.axvspan(i + 0.5, i + 5.5, facecolor=color_map[i], alpha=0.99)

# 畫長條圖
ax.bar(x3, claude_instrc_bert_base_chinese, color='g', width=0.2, label='Claude')
ax.bar(x, gemini_instrc_bert_base_chinese, color='r', width=0.2, label='Gemini')
ax.bar(x2, gpt_instrc_bert_base_chinese, color='b', width=0.2, label='GPT')

# 加上 legend、標題與格式調整
ax.legend()
ax.set_xticks(range(1, 16))
ax.set_xlabel('Samples', fontsize=12)
ax.set_ylabel('Bert', fontsize=12)
ax.set_title('Cooking Instructions bert', fontsize=14)
ax.set_ylim(0.5, 1.0)

plt.tight_layout()
plt.savefig('instrc_bert.png')