In [None]:
# enables auto-reload of files (%...function MUST BE WITHOUT SPACE!)
%load_ext autoreload
%autoreload 2

In [None]:
import warnings
import cornac
import utils
import pickle
import recommenders.tuning.parameter_sweep
from recommendation import healthy_recommender_util
from nutrition_scores import score_util
from nutrition_scores import filtering_util
from nutrition_scores import tfidf_util

warnings.filterwarnings("ignore")

-----------------------------------------------------------
# Preprocess Dataset
-----------------------------------------------------------

In [None]:
# Define data locations
data_location = '../../data/hummus_data/'
graph_location = '../../data/food_kg/'
additional_location = '../../data/hummus_data/' # if not present set recipe_tags=False

In [None]:
recipes_df, reviews_df, users_df, recipes_dict, user_dict, food_locator_dict, food_com_dict, data = utils.load_and_clean_data(
    data_location, additional_location, comment_relations=2.5, authorship_relations=6, recipe_tags=True, k_user=10,
    k_recipe=10, debug=True)

In [None]:
# Compute a dataframe consisting of nutrients normalized on 100g
normalized_ingredients = utils.normalize_ingredients(recipes_df)

In [None]:
# Calculate food scores.
pp_recipes = score_util.calculate_food_scores(recipes_df, normalized_ingredients, score_names=['who', 'fsa', 'nutri'])

In [None]:
# Update recipe dict to map the scores
who_dict = {}
fsa_dict = {}
nutri_dict = {}

for recipe_id in recipes_dict.keys():
    who_dict[recipe_id] = pp_recipes.iloc[recipe_id]['who_score']
    fsa_dict[recipe_id] = pp_recipes.iloc[recipe_id]['fsa_score']
    nutri_dict[recipe_id] = pp_recipes.iloc[recipe_id]['nutri_score']

In [None]:
# Build feature matrix
pp_interactions = reviews_df[['new_member_id', 'new_recipe_id', 'rating']]
pp_interactions = pp_interactions.rename(columns={'new_member_id': 'userID', 'new_recipe_id': 'itemID'})

In [None]:
# Store preprocessed data
output_path = './data/cornac/'
utils.ensure_dir(output_path)
pp_interactions.to_csv(output_path + 'foodData.csv', sep=',', index=False)
pp_recipes.to_csv(output_path + 'foodRecipes.csv', sep=',', index=False)

# Store the dicts
with open(output_path + 'who_dict.pkl', 'wb') as file:
    pickle.dump(who_dict, file)
with open(output_path + 'fsa_dict.pkl', 'wb') as file:
    pickle.dump(fsa_dict, file)
with open(output_path + 'nutri_dict.pkl', 'wb') as file:
    pickle.dump(nutri_dict, file)

-----------------------------------------------------------
# Split dataset, transform models, set-up parameters
-----------------------------------------------------------

In [None]:
# Read & split data
pp_interactions, pp_recipes, train, test, train_set, who_dict, fsa_dict, nutri_dict = healthy_recommender_util.load_and_split()
pp_interactions.head()

In [None]:
# Global model parameters
TOP_K = 10 # top k items to recommend
NUM_THREADS = 0 # use all cores
VERBOSE = False # if logs are shown

## Models & Params for tuning

In [None]:
# Hyperparams for tuning for testing
hyperparams = {
    "k": [50, 100, 200],
    "max_iter": [50, 100, 200],
    "learning_rate": [0.005, 0.01, 0.02],
    "lambda_reg": [0.002, 0.005, 0.01]
}
param_grid = recommenders.tuning.parameter_sweep.generate_param_grid(hyperparams)

In [None]:
# Define BPR models
models = [cornac.models.BPR(k=params["k"], max_iter=params["max_iter"], learning_rate=params["learning_rate"], lambda_reg=params["lambda_reg"], num_threads=NUM_THREADS, verbose=VERBOSE) for params in param_grid]

-----------------------------------------------------------
# Training, Prediction, Evaluation
-----------------------------------------------------------

In [None]:
# Training
healthy_recommender_util.train_multiple(models, train_set)

In [None]:
# Prediction
model_predictions = healthy_recommender_util.predict_multiple(models, train, store_results=False)

In [None]:
# Evaluation
evaluation = healthy_recommender_util.calc_scores(who_dict, fsa_dict, nutri_dict, test, model_predictions, TOP_K, store_results=False, normalize=True)

In [None]:
# Sort results
top_results = evaluation.sort_values(by=['map'], ascending=False)

In [None]:
# Display results
display(top_results)

--> Best MAP: k=100, max_iter=100, learning_rate= 0.01, lambda_reg= 0.005:

MAP: 0.023954677521471082,
NDCG: 0.06299806652128082,
P: 0.04356243949661181,
R: 0.05964687305096144


-----------------------------------------------------------
# Filtering & Post-Evaluation
-----------------------------------------------------------

## Filter by threshold

In [None]:
# Configs
configs = {
    "score_config": [(score_util.WHO_SCORE, who_dict), (score_util.FSA_SCORE, fsa_dict), (score_util.NUTRI_SCORE, nutri_dict)],
    "threshold": [0.25, 0.5, 0.75]
}
config_grid = recommenders.tuning.parameter_sweep.generate_param_grid(configs)

In [None]:
# Filtering
healthy_model_predictions = filtering_util.multi_filter_recipes_by_threshold(config_grid, model_predictions, normalized_threshold=True)

In [None]:
# Post-Evaluation
healthy_evaluation = healthy_recommender_util.calc_scores(who_dict, fsa_dict, nutri_dict, test, healthy_model_predictions, TOP_K, store_results=False, normalize=True)
healthy_top_results = healthy_evaluation.sort_values(by=['map'], ascending=False)
display(healthy_top_results)

## Filter by percentage

In [None]:
# Configs
configs = {
    "score_config": [(score_util.WHO_SCORE, who_dict), (score_util.FSA_SCORE, fsa_dict), (score_util.NUTRI_SCORE, nutri_dict)],
    "percentage": [0.25, 0.5, 0.75]
}
config_grid = recommenders.tuning.parameter_sweep.generate_param_grid(configs)

In [None]:
# Filtering
healthy_model_predictions = filtering_util.multi_filter_recipes_by_percentage(config_grid, model_predictions)

In [None]:
# Post-Evaluation
healthy_evaluation = healthy_recommender_util.calc_scores(who_dict, fsa_dict, nutri_dict, test, healthy_model_predictions, TOP_K, store_results=False, normalize=True)
healthy_top_results = healthy_evaluation.sort_values(by=['map'], ascending=False)
display(healthy_top_results)

## Replace unhealthy recipes

In [None]:
# Configs
configs = {
    "score_config": [(score_util.WHO_SCORE, who_dict), (score_util.FSA_SCORE, fsa_dict), (score_util.NUTRI_SCORE, nutri_dict)],
    "substitution_threshold": [0.03, 0.05, 0.07]
}
config_grid = recommenders.tuning.parameter_sweep.generate_param_grid(configs)

In [None]:
# Calculate tf-idf similarities of all recipes
cosine_similarities = tfidf_util.calc_tfidf_similarities(pp_recipes)

In [None]:
# Get healthy substitutions for each recipe
healthy_substitution_pairs = tfidf_util.get_healthy_substitutions(config_grid, cosine_similarities)

In [None]:
# Filtering
healthy_model_predictions = filtering_util.multi_exchange_recipes(model_predictions, healthy_substitution_pairs, TOP_K)

In [None]:
# Post-Evaluation
healthy_evaluation = healthy_recommender_util.calc_scores(who_dict, fsa_dict, nutri_dict, test, healthy_model_predictions, TOP_K, store_results=False, normalize=True)
healthy_top_results = healthy_evaluation.sort_values(by=['map'], ascending=False)
display(healthy_top_results)