In [1]:
from annoy import AnnoyIndex
from typing import Dict, List
from pathlib import Path
from ingredient2vec.recipe_loader import recipe_loader
import pandas as pd

RANDOM_STATE = 42

In [9]:
class RecommendationSystem:
    
    def __init__(self, name: str, path: Path, vec_size: int):
        
        self.name = name
        self.index_path = path
        self.vec_size = vec_size

In [2]:
def generate_test_data(rec_systems: List[RecommendationSystem],
                       dataset_path: Path, 
                       num_recipes: int, 
                       num_recommendations: int,
                       out_path: Path):
    
    # to store evaluation data
    evaluation_data = pd.DataFrame({
        "Rec_Name" : [],
        "Rec_Description" : [],
        "Rec_Ingredients" : [],
        "Rec_System" : [],
        "Origin_Name" : [],
        "Origin_Description" : [],
        "Origin_Ingredients" : []
    })
    
    # load dataset
    recipes = recipe_loader()
    
    # choose a sample of recipes to get recommendations for, fix across all systems
    sample = recipes.sample(n=num_recipes, random_state=RANDOM_STATE)
    
    # for each recommender
    for system in rec_systems:
        
            index = AnnoyIndex(system.vec_size, "angular")
            index.load(system.index_path)
            
            for recipe_index in sample.index.values:
                
                # get the IDs of the recommendations
                recommendation_ids = index.get_nns_by_item(recipe_index, num_recommendations)
                
                # get the full records for the recommendations 
                recommendations = recipes.iloc[recommendation_ids]
                
                # note which system gave these recommendations
                recommendations["Rec_System"] = system.name
                
                # rename columns
                recommendations = recommendations.rename({
                    "Name": "Rec_Name",
                    "Description" : "Rec_Description",
                    "Ingredients": "Rec_Ingredients",
                })
                
                # drop unneeded columns
                recommendations = recommendations[["Rec_Name","Rec_Description","Rec_Ingredients","Rec_System"]]
                
                # get the details of the originating recipe
                origin_recipe = sample.iloc[recipe_index]
                
                recommendations["Origin_Name"] = origin_recipe["Name"]
                recommendations["Origin_Description"] = origin_recipe["Description"]
                recommendations["Origin_Ingredients"] = origin_recipe["Ingredients"]
                
                # add to the master df
                pd.concat([evaluation_data, recommendations])
    
    evaluation_data.to_csv(out_path)
                