In [None]:
from transformers import TFBertModel, BertTokenizer
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
from tensorflow.keras import layers, Model
from sklearn.metrics.pairwise import cosine_similarity



In [None]:
recipes_df = pd.read_csv("recipes_with_final_embeddings.csv")
embeddings = recipes_df['embedding']

In [None]:
def common_category(categories1, categories2):
    set1 = set(eval(categories1)) if isinstance(categories1, str) else set(categories1)
    set2 = set(eval(categories2)) if isinstance(categories2, str) else set(categories2)
    return len(set1.intersection(set2)) > 0

def generate_pairs(recipes_df, embeddings, num_pairs=1000):
    positive_pairs, negative_pairs = [], []
    
    for _ in range(num_pairs):
        idx1 = np.random.randint(len(recipes_df))
        
        positive_candidates = [idx for idx in recipes_df.index if idx != idx1 and common_category(
            recipes_df.iloc[idx1]['categories'], recipes_df.iloc[idx]['categories'])]
        
        if positive_candidates:
            idx2 = np.random.choice(positive_candidates)
            positive_pairs.append([embeddings[idx1], embeddings[idx2]])

        negative_candidates = [idx for idx in recipes_df.index if idx != idx1 and not common_category(
            recipes_df.iloc[idx1]['categories'], recipes_df.iloc[idx]['categories'])]
        
        if negative_candidates:
            idx3 = np.random.choice(negative_candidates)
            negative_pairs.append([embeddings[idx1], embeddings[idx3]])

    return np.array(positive_pairs), np.array(negative_pairs)

positive_pairs, negative_pairs = generate_pairs(recipes_df, embeddings)



In [None]:

def contrastive_loss(y_true, y_pred, margin=1.0):
    positive_loss = y_true * tf.square(y_pred)
    negative_loss = (1 - y_true) * tf.square(tf.maximum(margin - y_pred, 0))
    return tf.reduce_mean(positive_loss + negative_loss)

input_shape = (768,)  
input_a = layers.Input(shape=input_shape)
input_b = layers.Input(shape=input_shape)

shared_network = tf.keras.Sequential([
    layers.InputLayer(shape=(768,)),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu')
])
processed_a = shared_network(input_a)
processed_b = shared_network(input_b)
distance = layers.Lambda(lambda embeddings: tf.norm(embeddings[0] - embeddings[1], axis=1, keepdims=True))([processed_a, processed_b])
siamese_model = Model(inputs=[input_a, input_b], outputs=distance)
siamese_model.compile(optimizer='adam', loss=contrastive_loss)


In [None]:
siamese_model.fit([positive_pairs[:, 0], positive_pairs[:, 1]], np.ones(len(positive_pairs)),
                  epochs=10, batch_size=32)

siamese_model.fit([negative_pairs[:, 0], negative_pairs[:, 1]], np.zeros(len(negative_pairs)),
                  epochs=10, batch_size=32)


In [None]:
def recommend_similar(liked_recipe_indices, top_n=5):
    liked_embeddings = [shared_network.predict(tf.reshape(embeddings[idx], (1, -1))) for idx in liked_recipe_indices]
    average_embedding = np.mean(np.array(liked_embeddings), axis=0)  
    all_embeddings = shared_network.predict(embeddings)
    similarities = cosine_similarity(average_embedding, all_embeddings)[0]
    similar_indices = similarities.argsort()[-top_n:][::-1]  # tanpa original embedding

    print("Liked Recipes:")
    liked_recipes = recipes_df.iloc[liked_recipe_indices]
    print(liked_recipes[['title', 'categories', 'ingredients']])
    print("\nRecommended Recipes:")
    
    recommended_recipes = recipes_df.iloc[similar_indices]
    return recommended_recipes[['title', 'categories', 'ingredients']]

liked_recipe_indices = [7, 20, 50]  # contoh sample list resep
recommended_recipes = recommend_similar(liked_recipe_indices, top_n=5)
print(recommended_recipes)
