In [0]:
import os, sys

class FileSystem:
  def __init__(self, colab_dir="ntds_2019", local_dir="./", data_dir="data"):
    IN_COLAB = 'google.colab' in sys.modules
    if (IN_COLAB):
      from google.colab import drive
      drive.mount('/gdrive')
      self.root_dir = os.path.join("/gdrive/My Drive/", colab_dir)
    else:
      self.root_dir = local_dir
    self.data_dir = data_dir
    self.change_directory = False

  def data_path(self, name):
    return os.path.join(self.data_dir, name) if self.change_directory else os.path.join(self.root_dir, self.data_dir, name)

  def path(self, name):
    return os.path.join("./", name) if self.change_directory else os.path.join("./", self.root_dir, name)

  def cd(self):
    %cd {self.root_dir}
    %ls
    self.change_directory = True

fs = FileSystem()
fs.cd()

KeyboardInterrupt: ignored

In [0]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from pandas.io.json import json_normalize
import itertools

In [0]:
nutrition_adj_mat = np.load(fs.data_path('Ingredient_Adjacency_Matrix_USDA_Nutrition_Info_Normalized.npy'))
combined_adj_mat = np.load(fs.data_path('Ingredient_Adjacency_Matrix_Combined.npy'))
combined_adj_mat_v2 = np.load(fs.data_path('Ingredient_Adjacency_Matrix_Combined_Multiplication.npy'))
ingredient_list = np.load(fs.data_path('Ingredient_List_USDA_Nutrition_Info.npy'))
vegan_ingredient_mask = np.load(fs.data_path('Vegan_Ingredient_Mask_USDA_Nutrition_Info_v2.npy'))

In [0]:
def suggestKBestReplacements(nutrition_adj_mat,ingr_number,k):
  adj_values = nutrition_adj_mat[ingr_number,:]
  maxvals = idx = (-adj_values).argsort()[:k]
  if (np.all(adj_values == 0)):
    maxvals = []
  return maxvals

def suggestKBestVeganReplacements(nutrition_adj_mat,ingr_number,k, nonvegan_mask):
  adj_values_2 = nutrition_adj_mat[ingr_number,:]
  adj_values_2[nonvegan_mask] = 0
  maxvals = idx = (-adj_values_2).argsort()[:k]
  if (np.all(adj_values_2 == 0)):
    maxvals = []
  return maxvals

In [0]:
ingr_number = 25
k = 5
maxvals = suggestKBestReplacements(nutrition_adj_mat,ingr_number,k)
print("Original Ingredient: {0}".format(ingredient_list[ingr_number]))
if (len(maxvals)==0):
  print("No replacements found!")
else: 
  print("{0} best replacements: {1}".format(k,ingredient_list[maxvals]))

Original Ingredient: beef, rib eye steak, boneless, lip off, separable lean and fat, trimmed to 0" fat, all grades, cooked, grilled
5 best replacements: ['fish, catfish, channel, wild, raw' 'fish, tilapia, raw'
 'cheese, mozzarella, low sodium' 'meatballs, meatless' 'cheese, brie']


Make it vegan!

In [0]:
nonvegan_mask = [not i for i in vegan_ingredient_mask]

In [0]:
#25 is beef
ingr_number = 25
k = 5
maxvals2 = suggestKBestVeganReplacements(nutrition_adj_mat,ingr_number,k, nonvegan_mask)
print("Original Ingredient: {0}".format(ingredient_list[ingr_number]))
if (len(maxvals2)==0):
  print("No replacements found!")
else: 
  print("{0} best replacements: {1}".format(k,ingredient_list[maxvals2]))

Original Ingredient: beef, rib eye steak, boneless, lip off, separable lean and fat, trimmed to 0" fat, all grades, cooked, grilled
5 best replacements: ['edamame, frozen, prepared'
 'tofu, raw, regular, prepared with calcium sulfate'
 'beans, kidney, all types, mature seeds, raw'
 "leavening agents, yeast, baker's, active dry" 'mushrooms, white, raw']


Try with the combined recipe/nutrition matrix

In [0]:
ingr_number = 25
k = 5
maxvals = suggestKBestReplacements(combined_adj_mat,ingr_number,k)
print("Original Ingredient: {0}".format(ingredient_list[ingr_number]))
if (len(maxvals)==0):
  print("No replacements found!")
else: 
  print("{0} best replacements: {1}".format(k,ingredient_list[maxvals]))

Original Ingredient: beef, rib eye steak, boneless, lip off, separable lean and fat, trimmed to 0" fat, all grades, cooked, grilled
5 best replacements: ['ground turkey, raw'
 'chicken, broilers or fryers, wing, meat and skin, raw'
 'beef, flank, steak, separable lean and fat, trimmed to 0" fat, all grades, cooked, broiled'
 'alcoholic beverage, distilled, gin, 90 proof'
 'alcoholic beverage, distilled, whiskey, 86 proof']


Try with combined matrix using addition method

In [0]:
ingr_number = 27
k = 5
maxvals = suggestKBestReplacements(combined_adj_mat_v2,ingr_number,k)
print("Original Ingredient: {0}".format(ingredient_list[ingr_number]))
if (len(maxvals)==0):
  print("No replacements found!")
else: 
  print("{0} best replacements: {1}".format(k,ingredient_list[maxvals]))

Original Ingredient: beverages, almond milk, unsweetened, shelf stable
5 best replacements: ['vanilla extract' 'wheat flour, white, all-purpose, unenriched'
 'water, bottled, generic' 'yogurt, greek, plain, nonfat'
 'blueberries, raw']
