## Import

In [1]:
import pandas as pd
import numpy as np
import pickle

from lightfm import LightFM
from lightfm.data import Dataset

## Load data

In [2]:
path = "../../mealwheel-data/"
raw_interactions = pd.read_csv(path + "RAW_interactions.csv", sep=",")
raw_interactions = raw_interactions[["user_id", "recipe_id"]]

## Build model

In [3]:
# LightFM will not use our IDs, but rather internal indices
# It does this, because it needs consecutive non-negative integers (but the input could be anything)
# Therefore, we need a mapping between our IDs and the internal indices
# E.g. user_id = 38094 -> internal_user_id = 1, user_id = 1293707 -> internal_user_id = 2
# Very well explained: https://making.lyst.com/lightfm/docs/examples/dataset.html?highlight=dataset#building-the-id-mappings

# We can do this easily by using the Dataset class from the LightFM package
dataset = Dataset()
dataset.fit(users=raw_interactions["user_id"], items=raw_interactions["recipe_id"])

In [7]:
# To have an easy way of converting input ids to internal ids, we define a mappings class
class Mappings:
    def __init__(self, dataset: Dataset) -> None:
        """
        userid: user_id
        row: internal user id
        itemid: recipe_id
        column: internal recipe id
        """
        userid2row, _, itemid2col, _ = dataset.mapping()
        self.userid2row = userid2row
        self.itemid2col = itemid2col
        # Invert dictionaries to get mapping in other direction
        self.row2userid = {value: key for key, value in self.userid2row.items()}
        self.col2itemid = {v: k for k, v in self.itemid2col.items()}
        # Use like this: 
        # mappings = Mappings(dataset)
        # mappings.userid2row["axfafe24"]

# And use it:
mappings = Mappings(dataset)
# Example. This returns the internal user id of user_id=38094
mappings.userid2row[38094]

0

In [8]:
# Then we build the interaction matrix (a table with users as rows and recipes as columns, and a 1 in the cell if the user rated the recipe)
interactions, _ = dataset.build_interactions(raw_interactions.to_records(index=False))

In [None]:
# We could also add item_features (like the ratings)
# https://making.lyst.com/lightfm/docs/examples/dataset.html?highlight=dataset#building-the-interactions-matrix

In [10]:
# Now we can raw_datae model
# This might take a few minutes
model = LightFM(loss="warp", learning_rate=0.05, random_state=42)
model.fit(interactions=interactions, epochs=100)

# Save model to pickle file
# filename = "recommendation_model.pkl"
# with open(filename, 'wb') as file:  
#     pickle.dump(model, file)

<lightfm.lightfm.LightFM at 0x21b1d94a390>

In [4]:
# Load model from pickle file
filename = "recommendation_model.pkl" 
with open(filename, 'rb') as file:  
    model = pickle.load(file)

model

<lightfm.lightfm.LightFM at 0x1fad7892198>

## Test model
Get recommendations for one sample user and check if the recommendations make sense

In [None]:
# Specify the user for which predictions should be made
user_id = 38094
# Get the internal id (or: row) for this user
user_row = mappings.userid2row[user_id]
# Get the number of items in the dataset
_, n_items = dataset.interactions_shape()
# Get an array with all internal item ids
item_columns = np.arange(n_items) # [0, 1, 2, ..., 231636]
# Get the scores for each item (for our user)
scores = model.predict(user_ids=user_row, item_ids=item_columns)
# How to interpret:
# score[0] = recommendation score for internal item id 0
# score[1] = recommendation score for internal item id 1
# ...
# The item with the highest score is most likely to be a good recommendation

In [None]:
# Define a function that sorts the scores and returns the top_n elements
def get_top_sorted(scores: np.ndarray, top_n):
    """
    Get the top indices sorted descendingly from the scores list array.
    Args:
        scores: An array with scores.
        top_n: The number of top scores to be returned.
    Returns:
        ScoringList: The first element of the tuple is the index where the score was
                in the original array, the second element is the score itself.
    """
    best_idxs = np.argpartition(scores, -top_n)[-top_n:]
    return sorted(zip(best_idxs, scores[best_idxs]), key=lambda x: -x[1])

# Example: Use fuction to return top 5 recommendations
sorted_scores_top_10 = get_top_sorted(scores, 10)
sorted_scores_top_10

In [None]:
# Add results to a DataFrame
recommendations = pd.DataFrame(sorted_scores_top_10, columns=["internal_item_id", "score"])
# Add user_id
recommendations["user_id"] = user_id
# Add recipe_id
# Google something like "python apply lambda" to learn more about how this works
recommendations["recipe_id"] = recommendations["internal_item_id"].apply(lambda x: mappings.col2itemid[x])
# Drop internal_item_id and reorder other columns
recommendations = recommendations[["user_id", "recipe_id", "score"]]
recommendations

In [None]:
# Get the recipe names for each recipe_id
# Your turn :)

In [None]:
# Get the names of the recipes our sample user
# Your turn :)