## This notebook demonstrates the use of a simple meal planner expert system using Canada's Food and Nutrition database as the Knowledge Base

In [1]:
#import libraries
import numpy as np
import pandas as pd
import random
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from IPython.display import display

In [2]:
food_data_df = pd.read_csv('FoodNutritionData.csv')
display(food_data_df.head(5))

Unnamed: 0,FoodID,FoodDescription,FoodGroup,PROTValue,FATValue,CARBValue,STARValue,TSUGValue,TDFValue,TSATValue,MUFAValue,PUFAValue
0,2,Cheese souffle,Mixed Dishes,9.54,15.7,5.91,0.0,2.66,0.1,5.742,5.82,2.77
1,4,"Chop suey, with meat, canned",Mixed Dishes,4.07,2.8,5.29,0.0,3.4,1.1,0.364,1.54,0.75
2,5,"Chinese dish, chow mein, chicken",Mixed Dishes,6.76,2.8,8.29,3.99,1.74,1.0,0.49,0.613,1.226
3,6,Corn fritter,Baked Products,8.55,21.24,38.62,0.0,2.85,2.0,5.455,8.543,5.564
4,7,"Beef pot roast, with browned potatoes, peas an...",Mixed Dishes,21.29,5.25,10.72,0.0,1.44,1.6,1.872,2.552,0.709


In [3]:
#The below two functions are used to finf user input ingredients in our knowledge base and create a dataframe for them

def get_ingredient_data_row(ingredient_input,food_df):
    """This function finds the user ingredient input from the nutrition database and get nutrition data"""
    ingredient_name = ingredient_input[0] #name of the ingredient
    all_food_upper = food_df.FoodDescription.tolist() #list of all ingredients in dataframe
    all_food = [food.lower().replace(",", " ") for food in all_food_upper] #turn to lowercase and remove comma
    #using fuzzywuzzy, find the highest matching food name
    matched_food = process.extractOne(ingredient_name,all_food,scorer=fuzz.token_set_ratio)
    #get true name of ingredient
    matched_food = all_food_upper[all_food.index(matched_food[0])]
    #get nutrition values
    temp_df = food_df[food_df.FoodDescription == matched_food].copy()
    return {
        "FoodName":matched_food,
        "FoodGroup":temp_df.FoodGroup.tolist()[0],
        "TotalAmount":ingredient_input[1], #amount of ingredient with user
        "PROTValue":temp_df.PROTValue.tolist()[0],
        "FATValue":temp_df.FATValue.tolist()[0],
        "CARBValue":temp_df.CARBValue.tolist()[0],
        "STARValue":temp_df.STARValue.tolist()[0],
        "TSUGValue":temp_df.TSUGValue.tolist()[0],
        "TDFValue":temp_df.TDFValue.tolist()[0],
        "TSATValue":temp_df.TSATValue.tolist()[0],
        "MUFAValue":temp_df.MUFAValue.tolist()[0],
        "PUFAValue":temp_df.PUFAValue.tolist()[0]
        }

def get_ingredient_df(ingredient_inp_list,food_df):
    """Returns a dataframe with all user input ingredients and their nutrion info"""
    ingredient_list = []
    for ing in ingredient_inp_list:
        ingredient_list.append(get_ingredient_data_row(ing,food_df))
    return pd.DataFrame(ingredient_list)

In [4]:
class UserData():
    """This class stores and calculates user specific data"""
    def __init__(self):
        """Initialize User Data Variables"""
        self.age = None
        self.height = None
        self.gender = None
        self.weight = None
        self.prot_req = None
        self.carb_req = None
        self.fat_req = None
        self.ingredient_df = None
        
    def input_user_data(self,input_list):
        """Use input list to initialize user data"""
        self.age = input_list[0]
        self.height = input_list[1]
        self.gender = input_list[2]
        self.weight = input_list[3]                
        
    def calculate_daily_food_requirements(self):
        """This function uses the Harris-Benedict Equation for Basal Energy Expenditure to 
           to calculate the daily requirement of protein, fat and carbohydrates"""
        if self.gender == 1:
            multipliers = [655.1,9.6,1.9,4.7]
        else:
            multipliers = [66.5,13.8,5.0,6.8]
        #HBE to find BEE
        total_calories = multipliers[0] + multipliers[1] * self.weight\
                        + multipliers[2] * self.height + multipliers[3] * self.age
        self.prot_req = self.weight
        prot_per = ((self.weight * 4)/total_calories) * 100
        carb_per = 60
        fat_per = 100 - (prot_per + carb_per)
        self.carb_req = ((total_calories * carb_per)/100)/4
        self.fat_req = ((total_calories * fat_per)/100)/9
        print("You require",self.prot_req,"g of protein",self.carb_req,"g of carbohydrates",\
             self.fat_req,"g of fats per day")

In [5]:
user = UserData()
user.input_user_data([25,178,0,95]) #[Age,Height,Gender(0 for male 1 for female),weight]
user.calculate_daily_food_requirements()

You require 95 g of protein 365.625 g of carbohydrates 66.11111111111111 g of fats per day


In [6]:
#use database to type ingredients space separated, lower case and order dependent
#ie bread whole wheat cannot be written as whole wheat bread
# the second value is amount in grams
# quantity should be greater than that of carbohydrate requirement shown above
ingredient_list = [
    ["bread whole wheat homemade",1000.0],
    ["bread white commercial toasted",2000.0],
    ["beef brain raw",1000.0],
    ["beef ground regular raw", 2000.0],
    ["chicken broiler light meat raw",3000.0],
    ["cereal ready to eat fibre first multibran",500.0],
    ["milk fluid skim",500.0],
    ["cheese cheddar",1000.0],
    ["fish salmon atlantic wild raw",2000.0],
    ["mexican burrito with beans",1500.0],
    ["spinach raw",2000.0],
    ["apple red delicious raw",3000.0],
    ["tuna salad",1000.0],
    ["sweet potato boiled without skin",2500.0],
    ["banana raw",1000.0],
    ["beans kidney dark red",1200.0],
    ["Waffle buttermilk frozen",500],
    ["milk shake fast food chocolate",1000.0],
    ["cereal hot oats large flakes whole-grain", 1500.0],
    ["nuts walnuts english dried",1000.0],
    ["egg quail whole fresh raw",1000.0],
    ["egg chicken white dried powder glucose reduced",2000.0],
    ["fast foods dessert cookies chocolate chip",1500.0],
    ["salad dressing french homemade unspecified oil",1000.0],
    ["fish herring atlantic pickled",4000.0],
    ["apricot raw",1500.0],
    ["blackberry raw", 1000.0],
    ["plantain raw", 1000.0],
    ["lamb new zealand rib lean raw",2000.0],
    ["peanuts spanish raw", 1500.0],
    ["beans pink boiled salted", 2000.0],
    ["soybeans dry boiled salted", 1500.0],
    ["tofu fried", 1500.0],
    ["turkey pot pie frozen", 1000.0],
    ["nuts pecan dried", 1000.0],
    ["turkey meat and skin raw", 2000.0],
    ["snacks trail mix regular unsalted", 1500.0],
    ["soup turkey ready to serve", 1000.0],
    ["dessert frozen yogurt", 1000.0],
    ["beans kidney sprouted raw", 1500.0],
    ["bok choy pak-choi raw",1000.0],
    ["corn sweet canned", 1000.0],
    ["peas and carrots frozen unprepared", 2000.0]
]

In [7]:
#get dataframe from ingredients
user.ingredient_df = get_ingredient_df(ingredient_list,food_data_df)
display(user.ingredient_df.head(15))

Unnamed: 0,FoodName,FoodGroup,TotalAmount,PROTValue,FATValue,CARBValue,STARValue,TSUGValue,TDFValue,TSATValue,MUFAValue,PUFAValue
0,"Bread, whole wheat, homemade (2/3 whole wheat ...",Baked Products,1000.0,8.4,5.4,51.4,0.0,3.84,6.0,0.796,1.158,2.939
1,"Bread, white with raisins, commercial, toasted",Baked Products,2000.0,8.15,4.4,60.84,0.0,19.76,4.8,1.272,0.905,2.027
2,"Beef, brain, raw",Beef Products,1000.0,10.86,10.3,1.05,0.0,0.0,0.0,2.3,1.89,1.586
3,"Beef, ground, regular",Beef Products,2000.0,28.8,21.47,0.0,0.0,0.0,0.0,6.356,7.562,0.456
4,"Chicken, broiler, light meat and skin, raw",Poultry Products,3000.0,20.27,11.07,0.0,0.0,0.0,0.0,3.16,4.52,2.34
5,"Cereal, ready to eat, Fibre First Multibran, B...",Breakfast cereals,500.0,11.2,3.7,78.2,0.0,18.0,43.0,0.7,0.8,2.0
6,"Milk, fluid, skim",Dairy and Egg Products,500.0,3.37,0.08,4.96,0.0,5.09,0.0,0.056,0.022,0.003
7,"Cheese, processed, cheddar, cold pack",Dairy and Egg Products,1000.0,19.66,24.46,8.32,0.0,0.51,0.0,15.355,7.165,0.719
8,"Fish, salmon, atlantic, wild, raw",Finfish and Shellfish Products,2000.0,19.84,6.34,0.0,0.0,0.0,0.0,0.981,2.103,2.539
9,"Fast foods, mexican, burrito with beans",Fast Foods,1500.0,6.48,6.22,32.92,0.0,0.0,4.4,3.174,2.184,0.551


In [29]:
class Meal():
    """Meal class holds the information for one day's meal"""
    
    def __init__(self,user_data = None,random_state=None,no_of_ingredients=None,override_ingredients=None):
        """Creates a Random Meal from User Data OR from specified dataframe"""
        
        self.fitness = 0.0
        
        if override_ingredients is not None: #to allow for initialization using dataframe instead of random
            self.ingredients = override_ingredients
            return
        
        #get random sample of ingredients
        ingredient_df = user_data.ingredient_df.sample(n = no_of_ingredients,random_state = random_state)
        #get ingredient with max protein,carbohydrate,fat
        max_prot_ix = ingredient_df.PROTValue.idxmax()
        max_carb_ix = ingredient_df.CARBValue.idxmax()
        max_fat_ix = ingredient_df.FATValue.idxmax()
        #copy requirements for editing
        prot_req = user_data.prot_req
        carb_req = user_data.carb_req
        fat_req = user_data.fat_req
        ingred_vals = []
        for i,row in ingredient_df.iterrows():
            minimum_amount = 0
            maximum_amount = row.TotalAmount
            #get minimum amount of fat first as it is the least required nutrient
            if i == max_fat_ix and row.FATValue > 0: #get at least 50% fat from source with most fat
                minimum_amount = (0.5 * fat_req)/row.FATValue
                maximum_amount = min(row.TotalAmount,fat_req/row.FATValue)
            if i == max_prot_ix and row.PROTValue > 0: #get at least 50% protein from source with most protein
                minimum_amount = (0.5 * prot_req)/row.PROTValue
                maximum_amount = min(row.TotalAmount,prot_req/row.PROTValue)
            if i == max_fat_ix and row.CARBValue > 0: #get at least 50% carbohydrates from source with most carbohydrates
                minimum_amount = (0.5 * carb_req)/row.CARBValue
                maximum_amount = min(row.TotalAmount,carb_req/row.CARBValue)
            #calculate the amount using the total amount and minimum amount
            values = {
                "FoodName":row.FoodName,
                "FoodGroup":row.FoodGroup,
                "TotalAmount":row.TotalAmount,
                "MealAmount":float(random.random() * (row.TotalAmount - minimum_amount) + minimum_amount),
                "PROTValue":row.PROTValue,
                "FATValue":row.FATValue,
                "CARBValue":row.CARBValue,
                "STARValue":row.STARValue,
                "TSUGValue":row.TSUGValue,
                "TDFValue":row.TDFValue,
                "TSATValue":row.TSATValue,
                "MUFAValue":row.MUFAValue,
                "PUFAValue":row.PUFAValue
            }
            ingred_vals.append(values)
        self.ingredients = pd.DataFrame(ingred_vals)
               
    def update_fitness(self,prot_req,carb_req,fat_req):
        total_vals = {
            "PROTValue":0.0,
            "FATValue":0.0,
            "CARBValue":0.0,
            "STARValue":0.0,
            "TSUGValue":0.0,
            "TDFValue":0.0,
            "TSATValue":0.0,
            "MUFAValue":0.0,
            "PUFAValue":0.0
        }
        for i,row in self.ingredients.iterrows():
            for column in self.ingredients.columns:
                if column in total_vals:
                    total_vals[column] = total_vals[column] + (row.MealAmount * row[column])/100
        #apply rules to calculate fitnes
        
        #add the percentage of protein, carbohydrate and fat value met and subtract if extra
        fitness_multiplier = 1
        if total_vals["PROTValue"] > prot_req:
            fitness_multiplier = 2
        self.fitness = self.fitness + fitness_multiplier * (prot_req - total_vals["PROTValue"])/prot_req
        fitness_multiplier = 1
        if total_vals["CARBValue"] > carb_req:
            fitness_multiplier = 2
        self.fitness = self.fitness + fitness_multiplier * (carb_req - total_vals["CARBValue"])/carb_req
        fitness_multiplier = 1
        if total_vals["FATValue"] > fat_req:
            fitness_multiplier = 2
        self.fitness = self.fitness + fitness_multiplier * (fat_req - total_vals["FATValue"])/fat_req
        
        ing_per = total_vals["TSUGValue"]/total_vals["CARBValue"]
        if ing_per > 0.3: #if sugar is more than 30% of carb value then meal is not good
            self.fitness = self.fitness - ing_per
        else:
            self.fitness = self.fitness + ing_per
        ing_per = total_vals["TSATValue"]/total_vals["FATValue"]
        if ing_per > 0.3: #if saturated fat is more than 30% of fat value then meal is not good
            self.fitness = self.fitness - ing_per
        else:
            self.fitness = self.fitness + ing_per
        
        #add total dietary fibre percentage  
        #self.fitness = self.fitness + (total_vals["TDFValue"]/total_vals["CARBValue"])       
        #add total unsaturated fat percentage
        #self.fitness = self.fitness + ((total_vals["MUFAValue"] + total_vals["PUFAValue"])/total_vals["FATValue"])
        

class DailyMealPlanner():
    """Implementation of Expert System that gives a new meal plan daily"""
    def __init__(self,
                 user_data,
                 combinations = 100,
                 no_of_ingredients = 5,
                 mutation_rate=0.001,
                 elite_percentage=10,
                 random_state=1):
        """Initialize The Meal Plan System"""
        self.combinations = combinations
        self.no_of_ingredients = no_of_ingredients
        self.mutation_rate = mutation_rate
        self.elite_per_unit = elite_percentage/100.0
        self.random_state = random_state
        self.user_data = user_data
    
    def create_population(self):
        """Get Random Samples from ingredient data"""
        random.seed(self.random_state)
        self.current_generation = [] #create random new meals from user ingredients
        for i in range(0,self.combinations):
            self.current_generation.append(Meal(user_data=self.user_data,
                                                random_state=self.random_state,
                                                no_of_ingredients=self.no_of_ingredients))
            
    def update_generation_order(self):
        """Updates the fitness function of the meal and sets the order of the current generation"""
        for meal in self.current_generation:
            meal.update_fitness(self.user_data.prot_req,self.user_data.carb_req,self.user_data.fat_req)
        #rank meals in order of fitness
        self.current_generation = sorted(self.current_generation, key=lambda meal: meal.fitness, reverse=True)
        
    def perform_selection(self):
        """Performs the selection step of genetic algorithm"""
        total_selected = int(self.elite_per_unit * len(self.current_generation))
        selected_generations = []
        for i in range(0,total_selected):
            selected_generations.append(self.current_generation[i])
        # randomly pick candidates from the rest
        selected_generations.extend(random.sample(self.current_generation[int(self.elite_per_unit):], 10))
        self.current_generation = selected_generations
    
    def perform_ordered_cross_over(self):
        """Performs the ordered cross over step of genetic algorithm"""
        #shuffle selected generation
        self.current_generation = random.sample(self.current_generation,len(self.current_generation))
        children = []
        while len(children) < self.combinations:
            x,y = random.sample(range(len(self.current_generation)), 2)
            parent_meal_1 = self.current_generation[x]
            parent_meal_2 = self.current_generation[y]
            child_ingredients = [None] * self.no_of_ingredients #create empty list of ingredients
            p1, p2 = random.sample(range(self.no_of_ingredients), k=2) #get two points in list
            from_index = min(p1, p2) #arrange in min max
            to_index = max(p1, p2)
            parent_1_genes = parent_meal_1.ingredients.to_dict(orient='records')
            child_ingredients[from_index:to_index] = parent_1_genes[from_index:to_index] #copy records from parent 1
            parent_2_genes = parent_meal_2.ingredients.to_dict(orient='records')
            
            for i in range(0,len(child_ingredients)): #fill rest with parent 2 records
                if child_ingredients[i] is None:
                    ingredient_val = None
                    for ingredient in parent_2_genes:
                        if ingredient not in child_ingredients:
                            ingredient_val = ingredient
                            break
                    if ingredient_val is None: #didn't find unique ingredient, use parent 1 ingredient only
                        for ingredient in parent_1_genes:
                            if ingredient not in child_ingredients:
                                ingredient_val = ingredient
                                break
                    child_ingredients[i] = ingredient_val
            #create child from parent ingredients
            child = Meal(override_ingredients = pd.DataFrame(child_ingredients))
            #add to next generation 
            children.append(child)
        self.current_generation = children
        
    def perform_mutation(self):
        """Performs the mutation step of genetic algorithm"""
        mutated = []
        for meal in self.current_generation:
            #get ingredients of current meal
            ingredients = meal.ingredients.to_dict(orient='records')
            for i in range(self.no_of_ingredients):
                if (random.random() < self.mutation_rate):
                    j = int(random.random() * self.no_of_ingredients)
                    #exchange the meal amount of the two randomly selected meals make sure they don't exceed total amount
                    val_i = ingredients[i]['MealAmount']
                    val_j = ingredients[j]['MealAmount']
                    ingredients[i]['MealAmount'] = max(min(val_j,ingredients[i]['TotalAmount']),0)
                    ingredients[j]['MealAmount'] = max(min(val_i,ingredients[j]['TotalAmount']),0)
            meal.ingredients = pd.DataFrame(ingredients) #set changes to ingredients dataframe
        
    def next_generation(self):
        """Runs a single iteration of genetic algorithm"""
        self.update_generation_order()
        self.perform_selection()
        self.perform_ordered_cross_over()
        self.perform_mutation()
            
    def get_healthy_meal(self,generations = 100):
        """Runs the Genetic Algorithm to get a new meal"""
        self.create_population()
        for i in range(0,generations): #run for generations
            self.next_generation()
        self.update_generation_order()
        return self.current_generation[0] #return best generation

In [31]:
daily_meal_planner = DailyMealPlanner(user,50,5,0.002,10,25)
new_meal = daily_meal_planner.get_healthy_meal(50)
print("Meal Fitness:" ,new_meal.fitness)
display(new_meal.ingredients.head(5))

Meal Fitness: 0.4557168516762039


Unnamed: 0,FoodName,FoodGroup,TotalAmount,MealAmount,PROTValue,FATValue,CARBValue,STARValue,TSUGValue,TDFValue,TSATValue,MUFAValue,PUFAValue
0,"Banana, raw",Fruits and fruit juices,1000.0,106.771202,1.09,0.33,22.84,5.38,12.23,1.7,0.112,0.032,0.073
1,"Dessert, frozen yogourt",Sweets,1000.0,172.072068,3.0,3.6,21.6,0.0,19.92,0.0,2.326,0.986,0.1
2,"Nuts, pecans, dried",Nuts and Seeds,1000.0,82.631611,9.17,71.97,13.86,0.46,3.97,9.6,6.18,40.801,21.614
3,"Banana, raw",Fruits and fruit juices,1000.0,563.077808,1.09,0.33,22.84,5.38,12.23,1.7,0.112,0.032,0.073
4,"Banana, raw",Fruits and fruit juices,1000.0,358.677331,1.09,0.33,22.84,5.38,12.23,1.7,0.112,0.032,0.073
