In [1]:
import csv
import numpy as np
from tqdm import tqdm

NUM_RECIPES  = 25

The purpose of this code is to determine the recipes to be used in our cold-start solution. Essentially, we want to choose the X recipes that are the least similar.

# Load EpiCurious Database

In [2]:
with open('./epicurious.csv', 'r') as read_obj:
    epicuriousString = list(csv.reader(read_obj))

epicuriousString.pop(0)
epicurious = []

for row in epicuriousString:
    temp = []
    for cell in row:
        try:
            temp.append(float(cell))
        except:
            temp.append(cell)
    epicurious.append(temp)


# Fine best NUM_RECIPES

This will implement a greedy policy. The first recipe will just be the one with the most tags, after that, it will be the recipe with the most unused tags

In [3]:
recipes = []

#Find the first recipe
bestTags = 0
bestRecipe = 0
i = 0

for row in epicurious:
    
    #Check if there are more tags
    if(sum(row[6:]) > bestTags):
        bestTags = sum(row[6:])
        bestRecipe = i
    i+=1
print('Best Recipe: ', epicurious[bestRecipe][0], ' has ', bestTags, ' tags')


Best Recipe:  Fruit Smoothie   has  37.0  tags


In [4]:
#Find the remaining recipes
TopRecipes = [bestRecipe]
totalTags = bestTags
usedTags = []
i=0
for cell in epicurious[bestRecipe][6:]:
    
    if(cell==1.0):
        usedTags.append(i)
    i+=1

for num in tqdm(range(NUM_RECIPES-1)):
    
    #Loop through existing rows
    i=0
    bestScore = 0
    bestRecipe = 0
    bestTags = []
    
    for recipe in epicurious:
        
        tags = []
        score = 0
        
        #Don't bother to evaluate the recipes we already found
        if(not (i in TopRecipes)):
            
            #Calculate the new tags
            j = 0
            for cell in recipe[6:]:
                if(not (j in usedTags)):
                    score+=cell
                    if(cell==1.0):
                        tags.append(j)
                j+=1
            
            if(score>bestScore):
                bestRecipe = i
                bestScore = score
                bestTags = tags
            
        i+=1
        
    #Save the best recipe
    TopRecipes.append(bestRecipe)
    usedTags+=bestTags


print('Proposed recipes use ', len(usedTags), ' of 674 tags')

100%|██████████| 24/24 [13:46<00:00, 34.45s/it]

Proposed recipes use  285  of 674 tags





# Save the data

In [5]:
writeData = []
for row in TopRecipes:
    writeData.append([epicurious[row][0], row])
    
with open('./coldStartRecipes.csv', 'w') as write_obj:
    writer=csv.writer(write_obj)
    writer.writerows(writeData)