In [1]:
import pandas as pd
import numpy as np

In [2]:
recipes = pd.read_csv('recipes/recipes.csv')
recipes.head(2)

Unnamed: 0,id,title,description,recipeLink,keywords,recipeType,ratingCount,ratingValue,authorType,authorName,prepTime,cookTime,recipeCategory,recipeCuisine,serving,nutrition,suitableForDiet,gotImage
0,0,Chickpeas with harissa and yoghurt,"Chickpeas are filling, delicious, easy to cook...",https://www.bbc.co.uk/food/recipes/chickpeas_w...,"quick, 10-minute meals, easy chickpea, afforda...",Recipe,16.0,4875.0,Person,Justine Pattison,0.00:30:00,0.00:30:00,Light meals & snacks,,Serves 2,,"['http://schema.org/GlutenFreeDiet', 'http://s...",FALSO
1,1,"Chickpea, spinach and egg curry",This vegetarian low-calorie chickpea curry mak...,https://www.bbc.co.uk/food/recipes/chickpea_sp...,"quick, easy chickpea, 500-calorie dinners, 6 q...",Recipe,51.0,4313730000000000.0,Person,Sunil Vijayakar,0.00:30:00,0.00:30:00,Main course,Indian,Serves 2,"{'calories': '441kcal', 'carbohydrateContent':...","['http://schema.org/LowLactoseDiet', 'http://s...",FALSO


In [3]:
recipes.shape[0]

5611

In [4]:
recipes.drop_duplicates(inplace=True, subset = ['title', 'description'])

In [5]:
recipes.shape[0]

2732

In [6]:
recipes.dropna(axis='rows', inplace=True, subset=['ratingValue', 'ratingCount'])

In [7]:
recipes.shape[0]

2258

In [8]:
def get_rating(number):
    string = str(number)[:2]
    rating = float(string)/10
    return rating

recipes['ratingValue'] = recipes['ratingValue'].apply(get_rating)

count_treshold = 10
value_treshold = 3.0

recipes = recipes[recipes['ratingCount']>count_treshold]
recipes = recipes[recipes['ratingValue']>value_treshold]

recipes.reset_index(drop=True, inplace=True)

In [9]:
recipes.shape[0]

503

In [10]:
indices = pd.Series(recipes.index, index=recipes['title'])

In [11]:
recipes.drop(columns=['recipeLink', 'authorType', 'prepTime', 'cookTime', 'serving', 'nutrition', 'suitableForDiet', 'gotImage', 'ratingCount', 'ratingValue', 'recipeType', 'description', 'authorName'], inplace=True)


recipes.head(2)

Unnamed: 0,id,title,keywords,recipeCategory,recipeCuisine
0,0,Chickpeas with harissa and yoghurt,"quick, 10-minute meals, easy chickpea, afforda...",Light meals & snacks,
1,1,"Chickpea, spinach and egg curry","quick, easy chickpea, 500-calorie dinners, 6 q...",Main course,Indian


In [12]:
recipes.shape[0]

503

In [13]:
def reduce_string(string):
    if type(string) != 'str': string = str(string)
    return string.lower().replace(' ', '')

def reduce_keywords(string):
    str_list = string.split(',')
    reduced = [reduce_string(key) for key in str_list]
    return ' '.join(reduced)

In [14]:
recipes.keywords = recipes.keywords.apply(reduce_keywords)
features = ['recipeCategory', 'recipeCuisine']


recipes.head(2)

Unnamed: 0,id,title,keywords,recipeCategory,recipeCuisine
0,0,Chickpeas with harissa and yoghurt,quick 10-minutemeals easychickpea affordable30...,Light meals & snacks,
1,1,"Chickpea, spinach and egg curry",quick easychickpea 500-caloriedinners 6quickan...,Main course,Indian


In [15]:
for feature in features:
    recipes[feature] = recipes[feature].apply(reduce_string)

recipes.head(2)

Unnamed: 0,id,title,keywords,recipeCategory,recipeCuisine
0,0,Chickpeas with harissa and yoghurt,quick 10-minutemeals easychickpea affordable30...,lightmeals&snacks,
1,1,"Chickpea, spinach and egg curry",quick easychickpea 500-caloriedinners 6quickan...,maincourse,indian


In [16]:
# creating soup
def create_soup(x):
    labels = ['recipeCategory', 'recipeCuisine','keywords']
    return ' '.join([x[label] for label in labels])

In [17]:
recipes['soup'] = recipes.drop(columns = ['title']).apply(create_soup, axis=1)

recipes.head()

Unnamed: 0,id,title,keywords,recipeCategory,recipeCuisine,soup
0,0,Chickpeas with harissa and yoghurt,quick 10-minutemeals easychickpea affordable30...,lightmeals&snacks,,lightmeals&snacks nan quick 10-minutemeals eas...
1,1,"Chickpea, spinach and egg curry",quick easychickpea 500-caloriedinners 6quickan...,maincourse,indian,maincourse indian quick easychickpea 500-calor...
2,2,"Tomato, chickpea and pasta soup",quick easychickpea 15minutemeals 20minutemeals...,lightmeals&snacks,italian,lightmeals&snacks italian quick easychickpea 1...
3,3,Tom Kerridge's beef stroganoff,quick freshstartfor1-2 luxuryweeknightmeals mo...,maincourse,easteuropean,maincourse easteuropean quick freshstartfor1-2...
4,14,Apple crumble,makethemostofapples traditionalroastbeefdinner...,desserts,british,desserts british makethemostofapples tradition...


In [18]:
from sklearn.feature_extraction.text import CountVectorizer

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(recipes['soup'])

In [19]:
count_matrix.shape

(503, 1519)

In [20]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [21]:
cosine_sim.shape

(503, 503)

In [22]:
def get_recommendations(title, cosine_sim=cosine_sim):
        # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    recipes_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return recipes['title'].iloc[recipes_indices]

In [23]:
get_recommendations('Tomato, chickpea and pasta soup')

368                          Parsnip soup
0      Chickpeas with harissa and yoghurt
21                        Beetroot hummus
415                Carrot and lentil soup
262             Carrot and coriander soup
433                     Corned beef hash 
394                 Lentil and bacon soup
472              Carrot and ginger soup  
129                  Leek and potato soup
116           5-ingredient sausage pasta 
Name: title, dtype: object

In [24]:
recipes['title']

0      Chickpeas with harissa and yoghurt
1         Chickpea, spinach and egg curry
2         Tomato, chickpea and pasta soup
3          Tom Kerridge's beef stroganoff
4                           Apple crumble
                      ...                
498                    Thai chicken curry
499                     Blueberry muffins
500       Tuscan beans on sourdough toast
501                     Sourdough starter
502                         Custard slice
Name: title, Length: 503, dtype: object

In [25]:
classified_recipes = {
    0:[1, 3, 6, 34, 35, 235, 354],
    1:[0, 2, 4, 65, 346, 53, 100]
}

input_recipes = [1, 3, 0, 2, 4]

In [26]:
def get_user_recomendation(classified_recipes, input_recipes, cosine_sim = cosine_sim):

    pos_indexes = classified_recipes.get(1) #indexes of the liked recipes
    neg_indexes = classified_recipes.get(0) #indexes of disliked recipes
    input_indexes = input_recipes

    cos_matrix = np.array(cosine_sim)
    cos_matrix[:,neg_indexes] = - cos_matrix[:,neg_indexes] #flipping sign of disliked recipes
    reduced_matrix = cos_matrix[:,pos_indexes + pos_indexes][input_indexes,:]

    sum_vec = reduced_matrix.sum(axis=1)

    index = input_recipes[sum_vec.argmax()]
    recipe_name = recipes.at[index,'title']

    return index, recipe_name

get_user_recomendation(classified_recipes, input_recipes)

(0, 'Chickpeas with harissa and yoghurt')