## Import

In [2]:
import pandas as pd
import numpy as np
import pickle

from lightfm import LightFM
from lightfm.data import Dataset

## Load data

In [3]:

raw_interactions = pd.read_csv("C:/Users/Paul/Desktop/TechLaps/Projekt/data/RAW_interactions.csv", sep=",")
raw_interactions = raw_interactions[["user_id", "recipe_id"]]

## Build model

In [4]:
# LightFM will not use our IDs, but rather internal indices
# It does this, because it needs consecutive non-negative integers (but the input could be anything)
# Therefore, we need a mapping between our IDs and the internal indices
# E.g. user_id = 38094 -> internal_user_id = 1, user_id = 1293707 -> internal_user_id = 2
# Very well explained: https://making.lyst.com/lightfm/docs/examples/dataset.html?highlight=dataset#building-the-id-mappings

# We can do this easily by using the Dataset class from the LightFM package
dataset = Dataset()
dataset.fit(users=raw_interactions["user_id"], items=raw_interactions["recipe_id"])

In [5]:
# To have an easy way of converting input ids to internal ids, we define a mappings class
class Mappings:
    def __init__(self, dataset: Dataset) -> None:
        """
        userid: user_id
        row: internal user id
        itemid: recipe_id
        column: internal recipe id
        """
        userid2row, _, itemid2col, _ = dataset.mapping()
        self.userid2row = userid2row
        self.itemid2col = itemid2col
        # Invert dictionaries to get mapping in other direction
        self.row2userid = {value: key for key, value in self.userid2row.items()}
        self.col2itemid = {v: k for k, v in self.itemid2col.items()}
        # Use like this: 
        # mappings = Mappings(dataset)
        # mappings.userid2row["axfafe24"]

# And use it:
mappings = Mappings(dataset)
# Example. This returns the internal user id of user_id=38094
mappings.userid2row[38094]

0

In [6]:
# Then we build the interaction matrix (a table with users as rows and recipes as columns, and a 1 in the cell if the user rated the recipe)
interactions, _ = dataset.build_interactions(raw_interactions.to_records(index=False))

In [7]:
# We could also add item_features (like the ratings)
# https://making.lyst.com/lightfm/docs/examples/dataset.html?highlight=dataset#building-the-interactions-matrix

In [8]:
# Now we can raw_datae model
# This might take a few minutes
model = LightFM(loss="warp", learning_rate=0.05, random_state=42)
model.fit(interactions=interactions, epochs=100)

# Save model to pickle file
# filename = "recommendation_model.pkl"
# with open(filename, 'wb') as file:  
#     pickle.dump(model, file)

<lightfm.lightfm.LightFM at 0x2084f4bd1c0>

In [10]:
# Load model from pickle file
filename = "recommendation_model.pkl" 
with open(filename, 'rb') as file:  
    model = pickle.load(file)

model

<lightfm.lightfm.LightFM at 0x208475e07f0>

## Test model
Get recommendations for one sample user and check if the recommendations make sense

In [11]:
# Specify the user for which predictions should be made
user_id = 38094
# Get the internal id (or: row) for this user
user_row = mappings.userid2row[user_id]
# Get the number of items in the dataset
_, n_items = dataset.interactions_shape()
# Get an array with all internal item ids
item_columns = np.arange(n_items) # [0, 1, 2, ..., 231636]
# Get the scores for each item (for our user)
scores = model.predict(user_ids=user_row, item_ids=item_columns)
# How to interpret:
# score[0] = recommendation score for internal item id 0
# score[1] = recommendation score for internal item id 1
# ...
# The item with the highest score is most likely to be a good recommendation

In [72]:
print("Enter the number of desired recommendations")
a=int(input())

Enter the number of desired recommendations
20


In [73]:
def get_top_sorted(scores: np.ndarray, top_n):
    
    """
    Get the top indices sorted descendingly from the scores list array.
    Args:
        scores: An array with scores.
        top_n: The number of top scores to be returned.
    Returns:
        ScoringList: The first element of the tuple is the index where the score was
                in the original array, the second element is the score itself.
    """
    best_idxs = np.argpartition(scores, -top_n)[-top_n:]
    return sorted(zip(best_idxs, scores[best_idxs]), key=lambda x: -x[1])

# Example: Use fuction to return top 5 recommendations
sorted_scores_top = get_top_sorted(scores, a)
sorted_scores_top

[(53993, -0.39487305),
 (202722, -0.63245183),
 (63981, -0.9174722),
 (158375, -1.0432777),
 (110776, -1.0805719),
 (208236, -1.1357785),
 (37033, -1.1401274),
 (25445, -1.1539602),
 (65928, -1.1792029),
 (191840, -1.1836978),
 (98002, -1.1842965),
 (217954, -1.1849487),
 (195108, -1.2183448),
 (153100, -1.2285028),
 (144241, -1.2350413),
 (112139, -1.2652851),
 (144977, -1.2684038),
 (70531, -1.2694908),
 (43420, -1.301842),
 (205806, -1.3077459)]

In [76]:
# Add results to a DataFrame
recommendations = pd.DataFrame(sorted_scores_top, columns=["internal_item_id", "score"])
# Add user_id
recommendations["user_id"] = user_id
# Add recipe_id
# Google something like "python apply lambda" to learn more about how this works
recommendations["recipe_id"] = recommendations["internal_item_id"].apply(lambda x: mappings.col2itemid[x])
# Drop internal_item_id and reorder other columns
recommendations = recommendations[["user_id", "recipe_id", "score"]]
recommendations

Unnamed: 0,user_id,recipe_id,score
0,38094,4065,-0.394873
1,38094,10123,-0.632452
2,38094,295797,-0.917472
3,38094,108524,-1.043278
4,38094,10045,-1.080572
5,38094,56322,-1.135779
6,38094,21284,-1.140127
7,38094,105594,-1.15396
8,38094,67654,-1.179203
9,38094,31607,-1.183698


In [77]:
## Get the recipe names for each recipe_id
# Load recipe data
raw_recipes = pd.read_csv("C:/Users/Paul/Desktop/TechLaps/Projekt/data/RAW_recipes.csv", sep=",")

# Define function that returns recipe name when given a recipe id
def get_recipe_name(recipe_id):
    return raw_recipes[raw_recipes["id"] == recipe_id]["name"].item()

# Apply this function to every row of the recommendations dataframe (with apply and lambda)
recommendations["recipe_name"] = recommendations.apply(lambda x: get_recipe_name(x["recipe_id"]), axis=1)
recommendations

Unnamed: 0,user_id,recipe_id,score,recipe_name
0,38094,4065,-0.394873,oatmeal chocolate chip cookies ii
1,38094,10123,-0.632452,soft batch oatmeal raisin cookies
2,38094,295797,-0.917472,brownie mix double chocolate chip cookies
3,38094,108524,-1.043278,almost tortuga rum cake
4,38094,10045,-1.080572,strawberry filling
5,38094,56322,-1.135779,boston market cornbread
6,38094,21284,-1.140127,cinnamon loaf
7,38094,105594,-1.15396,mom s chocolate cupcakes
8,38094,67654,-1.179203,low carb chili
9,38094,31607,-1.183698,bisquickie cinnamon rolls


In [78]:
## Get the names of the recipes our sample user
# Get all interactions from out test user
user_recipes = raw_interactions[raw_interactions["user_id"] == user_id]

# Apply function from above also to this dataframe
user_recipes["recipe_name"] = user_recipes.apply(lambda x: get_recipe_name(x["recipe_id"]), axis=1)
user_recipes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recipes["recipe_name"] = user_recipes.apply(lambda x: get_recipe_name(x["recipe_id"]), axis=1)


Unnamed: 0,user_id,recipe_id,recipe_name
0,38094,40893,white bean green chile pepper soup
6879,38094,16954,black beans with mango sauce
73153,38094,40753,warm spinach salad
108069,38094,34513,roasted asparagus shiitake mushrooms
126804,38094,69545,low carb key lime pie
154564,38094,49064,slow cooker hoppin john
159916,38094,80044,west indian pumpkin soup
176845,38094,30565,oh my d lux chocolate chip cookies
177313,38094,29493,honey roasted pork loin
183558,38094,34509,bananas au gratin


In [79]:
#user_recommendations.set_index('recipe_id').join(raw_recipes.set_index('ingridients'))

In [115]:
raw_recipes = pd.read_csv( "C:/Users/Paul/Desktop/TechLaps/Projekt/data/RAW_recipes.csv", sep=",")


In [116]:
to_adrian = user_recipes.set_index('recipe_id').join(raw_recipes.set_index('id'))



In [117]:
to_adrian.head()

Unnamed: 0_level_0,user_id,recipe_name,name,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
40893,38094,white bean green chile pepper soup,white bean green chile pepper soup,495,1533,2002-09-21,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9
16954,38094,black beans with mango sauce,black beans with mango sauce,40,9121,2002-01-08,"['60-minutes-or-less', 'time-to-make', 'course...","[342.2, 7.0, 59.0, 24.0, 32.0, 3.0, 20.0]",9,"['heat the oil in a medium sized skillet', 'ad...",one of my favorite mollie katzen recipes.,"['olive oil', 'red onions', 'garlic', 'jalapen...",12
40753,38094,warm spinach salad,warm spinach salad,25,51546,2002-09-20,"['30-minutes-or-less', 'time-to-make', 'course...","[188.0, 24.0, 19.0, 10.0, 7.0, 10.0, 3.0]",7,"['in hot , nonstick frying pan , brown the alm...",good variation to the typical salad.,"['sliced almonds', 'spinach', 'mushrooms', 'ol...",12
34513,38094,roasted asparagus shiitake mushrooms,roasted asparagus shiitake mushrooms,40,1533,2002-07-17,"['60-minutes-or-less', 'time-to-make', 'course...","[189.3, 12.0, 43.0, 49.0, 20.0, 5.0, 8.0]",8,"['preheat the oven to 450 degrees', 'in a larg...","a great side to roast chicken, these vegetable...","['fresh asparagus', 'shiitake mushroom', 'extr...",6
69545,38094,low carb key lime pie,low carb key lime pie,70,99254,2003-08-25,"['weeknight', 'time-to-make', 'course', 'main-...","[234.7, 30.0, 0.0, 7.0, 20.0, 62.0, 1.0]",6,"['soften cream cheese in mixing bowl , blend i...",this is a great substiute for the 'real' thing...,"['cream cheese', 'sugar-free lime gelatin', 'l...",7


In [118]:
#to_adrian = user_recipes.drop(['name', 'contributor_id', 'submitted'])

In [119]:
to_adrian.drop(['name'], axis = 1, inplace = True)
to_adrian.drop(['contributor_id'], axis = 1, inplace = True)
to_adrian.drop(['submitted'], axis = 1, inplace = True)
to_adrian.drop(['user_id'], axis = 1, inplace = True)

In [120]:
to_adrian.drop(['contributor_id'], axis = 1, inplace = True)

In [121]:
to_adrian.drop(['submitted'], axis = 1, inplace = True)

In [123]:
to_adrian.drop(['user_id'], axis = 1, inplace = True)

In [124]:
to_adrian.head()

Unnamed: 0_level_0,recipe_name,minutes,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
40893,white bean green chile pepper soup,495,"['weeknight', 'time-to-make', 'course', 'main-...","[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]",4,"['combine beans , onion , chilies , 1 / 2 teas...",easy soup for the crockpot.,"['great northern beans', 'yellow onion', 'dice...",9
16954,black beans with mango sauce,40,"['60-minutes-or-less', 'time-to-make', 'course...","[342.2, 7.0, 59.0, 24.0, 32.0, 3.0, 20.0]",9,"['heat the oil in a medium sized skillet', 'ad...",one of my favorite mollie katzen recipes.,"['olive oil', 'red onions', 'garlic', 'jalapen...",12
40753,warm spinach salad,25,"['30-minutes-or-less', 'time-to-make', 'course...","[188.0, 24.0, 19.0, 10.0, 7.0, 10.0, 3.0]",7,"['in hot , nonstick frying pan , brown the alm...",good variation to the typical salad.,"['sliced almonds', 'spinach', 'mushrooms', 'ol...",12
34513,roasted asparagus shiitake mushrooms,40,"['60-minutes-or-less', 'time-to-make', 'course...","[189.3, 12.0, 43.0, 49.0, 20.0, 5.0, 8.0]",8,"['preheat the oven to 450 degrees', 'in a larg...","a great side to roast chicken, these vegetable...","['fresh asparagus', 'shiitake mushroom', 'extr...",6
69545,low carb key lime pie,70,"['weeknight', 'time-to-make', 'course', 'main-...","[234.7, 30.0, 0.0, 7.0, 20.0, 62.0, 1.0]",6,"['soften cream cheese in mixing bowl , blend i...",this is a great substiute for the 'real' thing...,"['cream cheese', 'sugar-free lime gelatin', 'l...",7


In [None]:
#raw_interactions['recipe_id'].value_counts().head(20)
