In [1]:
import unittest
from sentence_transformers import SentenceTransformer
from rag import embed_rag
from algorithm import matching_algorithm
import json

  from .autonotebook import tqdm as notebook_tqdm


## Declaration

In [2]:
embed_dim = 384  # for MiniLM-L12-v2
# multilingual lm
embeding_model = SentenceTransformer(
    'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
# import recipes from json file
top_k = 10
json_file_path = './data/raw/cookpad_recipe_ayam.json'
with open(json_file_path, 'r', encoding='utf-8') as f:
    recipes = json.load(f)

In [3]:
recipes = recipes[:10]
recipe_texts = [
    recipe['title'] + ' ' +
    " ".join(recipe['ingredients']) + ' ' +
    " ".join(step['text'] for step in recipe["steps"])
    for recipe in recipes
]
embeddings = embeding_model.encode(recipe_texts)
print("test embed recipes", embeddings.shape)

test embed recipes (10, 384)


In [4]:
embeddings

array([[-0.11901397, -0.0949651 ,  0.08674426, ...,  0.13223359,
         0.27163774, -0.09754443],
       [ 0.0968091 ,  0.11048147,  0.05694484, ..., -0.00428091,
         0.10568769, -0.07021818],
       [ 0.07361859, -0.01513905,  0.07011578, ...,  0.03816275,
         0.19945344,  0.01410835],
       ...,
       [ 0.06101205,  0.07363529,  0.06166223, ...,  0.14631297,
         0.22188115,  0.06272151],
       [ 0.12363438,  0.05425766,  0.05966412, ...,  0.09552623,
         0.18661752, -0.08534338],
       [ 0.09856094, -0.00779507,  0.05609968, ...,  0.1521916 ,
        -0.0293845 ,  0.01670752]], shape=(10, 384), dtype=float32)

## Development Algorithm Matching

In [None]:
import numpy as np
# from sklearn.metrics.pairwise import cosine_similarity

# --------------------------
# Helper functions
# --------------------------


def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def update_user_pref(user_pref, item_embedding, rating, lr=0.5):
    """Update preferensi user berdasarkan rating (0-1)."""
    # print(item_embedding)
    # print(user_pref)
    return user_pref + lr * rating * (item_embedding - user_pref)

def mmr_rerank(user_pref, candidates, lambd=0.7, top_k=1, selected=[]):
    """Maximal Marginal Relevance Reranking."""
    bests = []
    while len(bests) < top_k and candidates:
        scores = []
        for name, emb in candidates:
            sim_to_user = cosine_similarity(user_pref, emb)
            sim_to_selected = max([cosine_similarity(emb, s[1]) for s in selected], default=0)
            score = lambd * sim_to_user - (1 - lambd) * sim_to_selected
            scores.append((score, (name, emb)))
        scores.sort(key=lambda x: x[0], reverse=True)
        best = scores[0][1]
        selected.append(best)
        bests.append(best)
        candidates.remove(best)
    return bests

In [None]:
# --------------------------
# Simulasi
# --------------------------

user_pref = embeddings[0]   # preferensi awal user
imgs = {f"ayam_{i}": embedding for i, embedding in enumerate(embeddings[1:])}
candidates = list(imgs.items())

selected = []
for step in range(5):  # iterasi 5x
    # print("selected:", selected)
    reranked = mmr_rerank(user_pref, candidates, lambd=0.7, top_k=1, selected=selected)
    print(f"\nIterasi {step+1}")
    for rank, (name, _) in enumerate(reranked, 1):
        print(f"{rank}. {name}")

    # hanya rating top-1
    top1_name, top1_emb = reranked[0]
    rating = float(input(f"Berikan rating untuk {top1_name} (-5->5): "))
    user_pref = update_user_pref(user_pref, top1_emb, rating, lr=0.8)



Iterasi 1
1. ayam_2

Iterasi 2
1. ayam_0

Iterasi 3
1. ayam_6

Iterasi 4
1. ayam_5

Iterasi 5
1. ayam_3


In [60]:
def rerank_ingredients(embed_all, embed_ingredients, lambd=0.7):
    return embed_ingredients * lambd + embed_all * (1 - lambd)

In [62]:
embeding_ingredients = embeding_model.encode(
    [" ".join(recipe['ingredients']) for recipe in recipes])

print("test embed recipes", embeding_ingredients.shape)

test embed recipes (10, 384)


In [65]:
# --------------------------
# Simulasi
# --------------------------

user_pref = embeddings[0]   # preferensi awal user
imgs_all = {f"ayam_{i}": embedding for i, embedding in enumerate(embeddings[1:])}
imgs_ingredients = {f"ayam_{i}": embedding for i, embedding in enumerate(embeding_ingredients[1:])}
imgs = {name: rerank_ingredients(imgs_all[name], imgs_ingredients[name], lambd=0.7) for name in imgs_all}
candidates = list(imgs.items())

selected = []
for step in range(5):  # iterasi 5x
    # print("selected:", selected)
    reranked = mmr_rerank(user_pref, candidates, lambd=0.7, top_k=1, selected=selected)

    # hanya rating top-1
    top1_name, top1_emb = reranked[0]
    print(f"\nIterasi {step+1}")
    for rank, (name, _) in enumerate(reranked, 1):
        print(f"{rank}. {name}")
    rating = float(input(f"Berikan rating untuk {top1_name} (-5->5): "))
    user_pref = update_user_pref(user_pref, top1_emb, rating, lr=0.8)



Iterasi 1
1. ayam_4

Iterasi 2
1. ayam_5

Iterasi 3
1. ayam_2

Iterasi 4
1. ayam_3

Iterasi 5
1. ayam_7
