In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from collections import defaultdict

ModuleNotFoundError: No module named 'surprise'

In [2]:
! pip install surprise

Collecting surprise
  Obtaining dependency information for surprise from https://files.pythonhosted.org/packages/61/de/e5cba8682201fcf9c3719a6fdda95693468ed061945493dea2dd37c5618b/surprise-0.1-py2.py3-none-any.whl.metadata
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
     ---------------------------------------- 0.0/772.0 kB ? eta -:--:--
      --------------------------------------- 10.2/772.0 kB ? eta -:--:--
      --------------------------------------- 10.2/772.0 kB ? eta -:--:--
     - ----------------------------------- 30.7/772.0 kB 259.2 kB/s eta 0:00:03
     - ----------------------------------- 30.7/772.0 kB 259.2 kB/s eta 0:00:03
     ----- ------------------------------ 112.6/772.0 kB 467.6 kB/s eta 0:00:02
     ----- ------------------------------ 112.6/772.0 kB 467.6 kB/s eta 0:00:02
     ------ ----------------------------- 143.4/772.0 kB 472.1 kB/s eta 

In [None]:


# Sample dataset of dishes and ingredients
data = {
    'Dish': ['Spaghetti Bolognese', 'Chicken Curry', 'Caprese Salad', 'Beef Tacos'],
    'Ingredients': ['spaghetti, beef, tomato sauce, onion', 
                    'chicken, curry paste, coconut milk, vegetables', 
                    'tomato, mozzarella cheese, basil, balsamic vinegar',
                    'beef, tortillas, lettuce, tomato, cheese']
}

df = pd.DataFrame(data)

# Content-Based Filtering
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['Ingredients'])

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Collaborative Filtering
reader = Reader(rating_scale=(1, 5))
data_surprise = Dataset.load_from_df(df[['Dish', 'Ingredients']], reader)
trainset, testset = train_test_split(data_surprise, test_size=0.2)

# Fit SVD algorithm
algo = SVD()
algo.fit(trainset)

def hybrid_recommendations(ingredients, num_recommendations=5):
    # Content-Based Filtering
    tfidf_matrix_input = tfidf.transform([ingredients])
    cosine_sim_input = linear_kernel(tfidf_matrix_input, tfidf_matrix)
    content_based_scores = list(enumerate(cosine_sim_input[0]))
    content_based_scores = sorted(content_based_scores, key=lambda x: x[1], reverse=True)
    
    # Collaborative Filtering
    user_id = trainset.to_inner_uid(0)
    collaborative_based_scores = defaultdict(float)
    for dish_id, _ in df.iterrows():
        pred = algo.predict(user_id, dish_id)
        collaborative_based_scores[dish_id] = pred.est
        
    # Combine recommendations
    hybrid_scores = defaultdict(float)
    for i in range(len(df)):
        dish_id = content_based_scores[i][0]
        hybrid_scores[dish_id] = 0.5 * content_based_scores[i][1] + 0.5 * collaborative_based_scores[dish_id]
        
    # Sort recommendations by hybrid score
    hybrid_scores = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)
    recommended_dishes = [df.iloc[dish_id]['Dish'] for dish_id, _ in hybrid_scores[:num_recommendations]]
    
    return recommended_dishes

# Example usage
ingredients_input = 'beef, tomato sauce, onion'
recommendations = hybrid_recommendations(ingredients_input)
print("Recommended dishes based on input ingredients:")
for i, dish in enumerate(recommendations):
    print(f"{i+1}. {dish}")
