In [None]:
!pip install -U spacy sentence-transformers faiss-cpu
!python -m spacy download en_core_web_sm

import pandas as pd

# تحميل البيانات (تأكد من رفع الملف أولاً إلى Google Colab أو notebook)
csv_file_path = '/content/Food Ingredients and Recipe Dataset with Image Name Mapping.csv'
df = pd.read_csv(csv_file_path)

# تنظيف البيانات من القيم الفارغة
df = df.dropna(subset=['Title', 'Ingredients', 'Cleaned_Ingredients'])

# تحويل الأعمدة إلى نصوص صريحة
df['Title'] = df['Title'].astype(str)
df['Ingredients'] = df['Ingredients'].astype(str)
df['Cleaned_Ingredients'] = df['Cleaned_Ingredients'].astype(str)

df.head()

import spacy
from tqdm import tqdm

# تحميل النموذج
nlp = spacy.load("en_core_web_sm")

# دالة lemmatizing
def lemmatize_text(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct])

# تطبيق lemmatizing على الأعمدة
print("Processing ingredients...")
df['Lemmatized_Ingredients'] = df['Ingredients'].apply(lemmatize_text)
df['Lemmatized_Cleaned_Ingredients'] = df['Cleaned_Ingredients'].apply(lemmatize_text)

df[['Lemmatized_Ingredients', 'Lemmatized_Cleaned_Ingredients']].head()

def merge_recipe(row):
    return f"{row['Title']} {row['Lemmatized_Ingredients']} {row['Lemmatized_Cleaned_Ingredients']}"

df['full_recipe'] = df.apply(merge_recipe, axis=1)

df[['Title', 'full_recipe']].head()

from sentence_transformers import SentenceTransformer
import numpy as np

print("Generating embeddings...")
model_bi_encoder = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model_bi_encoder.encode(df['full_recipe'].tolist(), show_progress_bar=True)

# حفظ النتائج
np.save('recipe_embeddings.npy', embeddings)
df.to_csv('lemmatized_recipes.csv', index=False)

import faiss

embedding_matrix = np.load('recipe_embeddings.npy')

# إنشاء الفهرس
index = faiss.IndexFlatL2(embedding_matrix.shape[1])
index.add(embedding_matrix)

def search_recipes(query, top_k=10):
    query_vec = model_bi_encoder.encode([lemmatize_text(query)])
    D, I = index.search(np.array(query_vec), top_k)
    return df.iloc[I[0]][['Title', 'full_recipe']]

from sentence_transformers import CrossEncoder

print("Loading Cross-Encoder...")
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

def rerank(query, candidates_df):
    pairs = [(query, row['full_recipe']) for _, row in candidates_df.iterrows()]
    scores = cross_encoder.predict(pairs)
    candidates_df = candidates_df.copy()
    candidates_df['score'] = scores
    return candidates_df.sort_values(by='score', ascending=False)

query = "How to cook spicy chicken with rice?"

# البحث واسترجاع النتائج
candidates = search_recipes(query, top_k=10)
results = rerank(query, candidates)

# عرض أفضل النتائج
print("\nTop Recipes:")
for idx, row in results.head(5).iterrows():
    print(f"\nTitle: {row['Title']}\nScore: {row['score']:.4f}\n")