## Genetic Altorithm Example

### Example from Towardsdatascience.com, Joos Korstanje <Genetic Algorithms in python using the DEAP library

The goal is to optimize weekly meal plan

In [11]:
pip install deap

Collecting deap
[?25l  Downloading https://files.pythonhosted.org/packages/99/d1/803c7a387d8a7e6866160b1541307f88d534da4291572fb32f69d2548afb/deap-1.3.1-cp37-cp37m-manylinux2010_x86_64.whl (157kB)
[K     |██                              | 10kB 22.1MB/s eta 0:00:01[K     |████▏                           | 20kB 14.0MB/s eta 0:00:01[K     |██████▏                         | 30kB 15.0MB/s eta 0:00:01[K     |████████▎                       | 40kB 16.0MB/s eta 0:00:01[K     |██████████▍                     | 51kB 15.3MB/s eta 0:00:01[K     |████████████▍                   | 61kB 17.0MB/s eta 0:00:01[K     |██████████████▌                 | 71kB 12.5MB/s eta 0:00:01[K     |████████████████▋               | 81kB 13.4MB/s eta 0:00:01[K     |██████████████████▋             | 92kB 12.4MB/s eta 0:00:01[K     |████████████████████▊           | 102kB 11.8MB/s eta 0:00:01[K     |██████████████████████▉         | 112kB 11.8MB/s eta 0:00:01[K     |████████████████████████▉      

In [36]:
import pandas as pd
import numpy as np
import random
from deap import base
from deap import creator
from deap import tools

In [37]:
# Goal percentage 
total_cal = 2500 * 7
percentage_prot = 0.3  # protein
percentage_carb = 0.5 # carbohydrate
percentage_fat = 0.2 


In [38]:
# Compute total calories per week
cal_prot = round(percentage_prot * total_cal)
cal_carb = round(percentage_carb * total_cal)
cal_fat = round(percentage_fat * total_cal)


In [4]:
cal_prot

5250

In [5]:
cal_carb

8750

In [6]:
cal_fat

3500

In [39]:
# Fixed information : calories per gram 
prot_cal_p_gram = 4
carb_cal_p_gram = 4 
fat_cal_p_gram = 9 

In [40]:
# Goal grams 
gram_prot = cal_prot / prot_cal_p_gram
gram_carb = cal_carb / carb_cal_p_gram
gram_fat = cal_fat / fat_cal_p_gram
print(gram_prot,gram_carb,gram_fat)

1312.5 2187.5 388.8888888888889


In [41]:
# per week: min, max, cal unit, prot g,  fat g, carb g
products_table = pd.DataFrame.from_records([
    ['Banana 1u', 0, 4, 89, 1, 0, 23],
    ['Mandarin 1u', 0, 4, 40, 1, 0, 10],
    ['Ananas 100g', 0, 7, 50, 1, 0, 13],
    ['Grapes 100g', 0, 7, 76, 1, 0, 17],
    ['Chocolate 1 bar', 0, 4, 230, 3, 13, 25],
    
    ['Hard Cheese 100g', 0, 8, 350, 28, 26, 2],
    ['Soft Cheese 100g', 0, 8, 374, 18, 33, 1],
    ['Pesto 100g', 0, 8, 303, 3, 30, 4],
    ['Hoummous 100g', 0, 8, 306, 7, 25, 11],
    ['Aubergine Paste 100g', 0, 4, 228, 1, 20, 8],
    
    ['Protein Shake', 0, 5, 160, 30, 3, 5],
    ['Veggie Burger 1', 0, 5, 220, 21, 12, 3],
    ['Veggie Burger 2', 0, 12, 165, 16, 9, 2],
    ['Boiled Egg', 0, 8, 155, 13, 11, 1],
    ['Backed Egg', 0, 16, 196, 14, 15, 1],
    
    ['Baguette Bread Half', 0, 3, 274, 10, 0, 52],
    ['Square Bread 1 slice', 0, 3, 97, 3, 1, 17],
    ['Cheese Pizza 1u', 0, 3, 903, 36, 47, 81],
    ['Veggie Pizza 1u', 0, 3, 766, 26, 35, 85],
    
    ['Soy Milk 200ml', 0, 1, 115, 8, 4, 11],
    ['Soy Chocolate Milk 250ml', 0, 3, 160, 7, 6,20],
    
])
products_table.columns = ['Name', 'Min', 'Max', 'Calories', 'Gram_Prot', 'Gram_Fat', 'Gram_Carb']

products_table

Unnamed: 0,Name,Min,Max,Calories,Gram_Prot,Gram_Fat,Gram_Carb
0,Banana 1u,0,4,89,1,0,23
1,Mandarin 1u,0,4,40,1,0,10
2,Ananas 100g,0,7,50,1,0,13
3,Grapes 100g,0,7,76,1,0,17
4,Chocolate 1 bar,0,4,230,3,13,25
5,Hard Cheese 100g,0,8,350,28,26,2
6,Soft Cheese 100g,0,8,374,18,33,1
7,Pesto 100g,0,8,303,3,30,4
8,Hoummous 100g,0,8,306,7,25,11
9,Aubergine Paste 100g,0,4,228,1,20,8


In [42]:
cal_data = products_table[['Gram_Prot', 'Gram_Fat', 'Gram_Carb']]

prot_data = list(cal_data['Gram_Prot'])
fat_data = list(cal_data['Gram_Fat'])
carb_data = list(cal_data['Gram_Carb'])

In [43]:
# DEAP toolbox set up 
def n_per_product():
  return random.choices ( range(0,10), k = 21 )

In [44]:
def evaluate(individual):
  individual = individual[0]
  tot_prot = sum(x*y for x,y in zip(prot_data,individual))
  tot_fat = sum(x*y for x,y in zip(fat_data,individual))
  tot_carb = sum(x*y for x,y in zip(carb_data,individual))
  cals = prot_cal_p_gram * tot_prot + carb_cal_p_gram * tot_carb + fat_cal_p_gram * tot_fat
  return abs(cals - total_cal), \
            abs(tot_prot - gram_prot), \
            abs(tot_fat - gram_fat), \
            abs(tot_carb - gram_carb), \

In [45]:
creator.create("FitnessMin",base.Fitness, weights = (-1.0,))
creator.create("Individual",list,fitness=creator.FitnessMin)



In [46]:
toolbox = base.Toolbox()


In [47]:
toolbox.register("n_per_product",n_per_product)

In [48]:
toolbox.register("individual",tools.initRepeat,creator.Individual,toolbox.n_per_product,n=1)
toolbox.register("population",tools.initRepeat,list,toolbox.individual)
toolbox.register("evaluate",evaluate)
toolbox.register("mate",tools.cxTwoPoint)
toolbox.register("mutate",tools.mutFlipBit,indpb=0.05)
toolbox.register("select",tools.selTournament,tournsize=3)


In [49]:
# optimization
# this is the definition of the total genetic algorithm is executed, it is almost literally copied from the deap library
def main():
    pop = toolbox.population(n=300)
    
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    # CXPB  is the probability with which two individuals
    #       are crossed
    #
    # MUTPB is the probability for mutating an individual
    CXPB, MUTPB = 0.5, 0.2
    
    # Extracting all the fitnesses of 
    fits = [ind.fitness.values[0] for ind in pop]
    
    # Variable keeping track of the number of generations
    g = 0
    
    # Begin the evolution
    while g < 5000:
        # A new generation
        g = g + 1
        #print("-- Generation %i --" % g)
        
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1[0], child2[0])
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant[0])
                del mutant.fitness.values
            
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        #print(min(fits), max(fits), mean, std)
    
    best = pop[np.argmin([toolbox.evaluate(x) for x in pop])]
    return best

In [50]:
best_solution = main()

In [58]:
products_table['univariate_choice'] = pd.Series(best_solution[0])
products_table.head()

Unnamed: 0,Name,Min,Max,Calories,Gram_Prot,Gram_Fat,Gram_Carb,multivariate_choice,univariate_choice
0,Banana 1u,0,4,89,1,0,23,1,1
1,Mandarin 1u,0,4,40,1,0,10,3,3
2,Ananas 100g,0,7,50,1,0,13,0,0
3,Grapes 100g,0,7,76,1,0,17,1,1
4,Chocolate 1 bar,0,4,230,3,13,25,7,7


In [51]:
products_table['multivariate_choice'] = pd.Series(best_solution[0])

In [59]:
# in this second version, we optimize for the four components of the shopping list: calories, protein, fat and carbs
# if we need to make everything as important, we should add a weight to them
# we know that there are 30% protein calories, 20% fat and 50% carbs.
weights = (-1., -1. / 0.3, -1. / 0.2, -1./0.5)

In [60]:
creator.create("FitnessMin", base.Fitness, weights=weights)
creator.create("Individual", list, fitness=creator.FitnessMin)



In [62]:
def evaluate(individual):
    individual = individual[0]
    tot_prot = sum(x*y for x,y in zip(prot_data,individual))
    tot_fat = sum(x*y for x,y in zip(fat_data,individual))
    tot_carb = sum(x*y for x,y in zip(carb_data,individual))
    cals = prot_cal_p_gram * tot_prot + carb_cal_p_gram * tot_carb + fat_cal_p_gram * tot_fat
    
    
    return abs(cals - total_cal), \
            abs(tot_prot - gram_prot), \
            abs(tot_fat - gram_fat), \
            abs(tot_carb - gram_carb), \

In [63]:
# this is the setup of the deap library: registering the different function into the toolbox
toolbox = base.Toolbox()

toolbox.register("n_per_product", n_per_product)

toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.n_per_product, n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

In [64]:
def main():
    pop = toolbox.population(n=300)
    
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    # CXPB  is the probability with which two individuals
    #       are crossed
    #
    # MUTPB is the probability for mutating an individual
    CXPB, MUTPB = 0.5, 0.2
    
    # Extracting all the fitnesses of 
    fits = [ind.fitness.values[0] for ind in pop]
    
    # Variable keeping track of the number of generations
    g = 0
    
    # Begin the evolution
    while g < 5000:
        # A new generation
        g = g + 1
        #print("-- Generation %i --" % g)
        
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1[0], child2[0])
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant[0])
                del mutant.fitness.values
            
                
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        #print(min(fits), max(fits), mean, std)
    
    
    best = pop[np.argmin([sum(toolbox.evaluate(x)) for x in pop])]
    return best

In [65]:
best_solution = main()

In [66]:
products_table['multivariate_choice'] = pd.Series(best_solution[0])

In [67]:
products_table['univariate_gr_prot'] = products_table['univariate_choice'] * products_table['Gram_Prot']
products_table['univariate_gr_fat'] = products_table['univariate_choice'] * products_table['Gram_Fat']
products_table['univariate_gr_carb'] = products_table['univariate_choice'] * products_table['Gram_Carb']
products_table['univariate_cal'] = products_table['univariate_choice'] * products_table['Calories']

products_table['multivariate_gr_prot'] = products_table['multivariate_choice'] * products_table['Gram_Prot']
products_table['multivariate_gr_fat'] = products_table['multivariate_choice'] * products_table['Gram_Fat']
products_table['multivariate_gr_carb'] = products_table['multivariate_choice'] * products_table['Gram_Carb']
products_table['multivariate_cal'] = products_table['multivariate_choice'] * products_table['Calories']

In [68]:
summary = pd.DataFrame.from_records(
[
    [products_table['univariate_gr_prot'].sum(), products_table['multivariate_gr_prot'].sum(), gram_prot],
    [products_table['univariate_gr_fat'].sum(), products_table['multivariate_gr_fat'].sum(), gram_fat],
    [products_table['univariate_gr_carb'].sum(), products_table['multivariate_gr_carb'].sum(), gram_carb],
    [products_table['univariate_cal'].sum(), products_table['multivariate_cal'].sum(), sum((cal_prot, cal_carb, cal_fat))]
])
summary.columns = ['univariate', 'multivariate', 'goal']
summary.index = ['prot', 'fat', 'carb', 'cal']
summary["univ_error"] = (summary["goal"] - summary["univariate"]).apply(abs)
summary["multiv_error"] = (summary["goal"] - summary["multivariate"]).apply(abs)
summary

Unnamed: 0,univariate,multivariate,goal,univ_error,multiv_error
prot,723,900,1312.5,589.5,412.5
fat,1080,947,388.888889,691.111111,558.111111
carb,1222,1344,2187.5,965.5,843.5
cal,17886,17875,17500.0,386.0,375.0


In [69]:
summary["univ_error"].sum(), summary["multiv_error"].sum()

(2632.1111111111113, 2189.1111111111113)

In [70]:
# Shopping list
products_table[['Name', 'multivariate_choice', 'univariate_choice']]

Unnamed: 0,Name,multivariate_choice,univariate_choice
0,Banana 1u,5,1
1,Mandarin 1u,0,3
2,Ananas 100g,2,0
3,Grapes 100g,4,1
4,Chocolate 1 bar,0,7
5,Hard Cheese 100g,4,7
6,Soft Cheese 100g,3,3
7,Pesto 100g,0,2
8,Hoummous 100g,2,9
9,Aubergine Paste 100g,6,7
