In [1]:
import random
import numpy as np
import pandas as pd
import scipy.stats as stats
from itertools import product
from tqdm.notebook import tqdm


In [2]:
ingridientes = pd.read_csv('nutritional_data_by_ingridient.csv')

In [3]:
ingridientes

Unnamed: 0,index,group,calories,lipids,carbohydrates,protein,name
0,1,3,357.00,89.25,0.00,0.00,ALGODON DE AZUCAR
1,2,3,341.00,85.13,0.00,0.00,"ALMIBAR DE FRUTAS, INDUSTRIALIZADO"
2,3,3,125.10,23.36,1.89,3.43,"ARROZ CON LECHE, PREPARACION ESTANDARIZADA"
3,4,3,0.00,0.00,0.00,0.00,AZUCAR Y ESTEVIA (SPLENDA NATURALS)
4,5,3,380.00,98.09,0.00,0.12,"AZUCAR, MASCABADO"
...,...,...,...,...,...,...,...
1803,1807,22,247.00,27.00,11.00,10.00,"PIZZA, ""LITTLE CAESARS PIZZA 14´´HOT-N-READY P..."
1804,1808,22,244.00,23.00,12.00,11.00,"PIZZA, ""LITTLE CAESARS PIZZA 14´´HOT-N-READY T..."
1805,1809,22,228.00,26.00,9.00,10.00,"PIZZA, ""LITTLE CAESARS PIZZA 14´´HOT-N-READY"" ..."
1806,1810,22,212.00,27.00,7.00,10.00,"PIZZA, ""LITTLE CAESARS PIZZA 14´´HOT-N-READY"" ..."


In [4]:
np.random.seed(0)

In [5]:
random.seed(0)

In [6]:
NUMBER_OF_INGREDIENTS = 1807

In [7]:
def get_nutritional_datum(ingredient_index):
    ingredient = ingridientes.loc[ingredient_index]
    return ingredient[['calories', 'lipids', 'carbohydrates', 'protein']].tolist()

In [8]:
def calculate_nutritional_values(chromosome: np.ndarray) -> np.ndarray:
    nutritional_data = [get_nutritional_datum(ingredient) for ingredient in chromosome]
    return np.sum(nutritional_data, axis=0)

In [9]:
def calculate_euclidean_distance(source: np.ndarray, target: np.ndarray):
    return np.linalg.norm(source.copy() - target.copy())

In [10]:
def initialize_population(
    population_size: int,
    chromosome_size: int,
    target_nutritional_values: np.ndarray,
) -> list[tuple[list, float]]:
    population = []
    for _ in range(population_size):
        chromosome = np.random.randint(
            1, NUMBER_OF_INGREDIENTS + 1, size=chromosome_size
        )
        chromosome_nutritional_data = calculate_nutritional_values(chromosome)
        chromosome_fitness = calculate_euclidean_distance(
            target_nutritional_values, chromosome_nutritional_data
        )
        population.append([chromosome, chromosome_fitness])
    return population

In [11]:
def tournament_selection(population: list, tournament_size: int = 3):
    selected_contestants = []
    for _ in range(len(population)):
        competitors = random.sample(population, tournament_size)
        winner = min(competitors, key=lambda individual: individual[1])
        selected_contestants.append(winner)
    return selected_contestants

In [12]:
def two_point_crossover(parent1, parent2, target_nutritional_values):
    chromosome_size = len(parent1)
    crossover_point1, crossover_point2 = sorted(
        random.sample(range(1, chromosome_size), 2)
    )

    offspring1 = []
    offspring1.extend(parent1[:crossover_point1]) 
    offspring1.extend(parent2[crossover_point1:crossover_point2])
    offspring1.extend(parent1[crossover_point2:])

    offspring2 = []
    offspring2.extend(parent2[:crossover_point1])
    offspring2.extend(parent1[crossover_point1:crossover_point2])
    offspring2.extend(parent2[crossover_point2:])

    fitness_offspring1 = calculate_euclidean_distance(
        target_nutritional_values, calculate_nutritional_values(offspring1)
    )
    fitness_offspring2 = calculate_euclidean_distance(
        target_nutritional_values, calculate_nutritional_values(offspring2)
    )


    return ([offspring1, fitness_offspring1], [offspring2, fitness_offspring2])

In [13]:
def crossover(population, crossover_rate, target_nutritional_values):
    offspring = []
    for i in range(0, len(population) - 1, 2):
        if np.random.rand() < crossover_rate:
            (child1, child2) = two_point_crossover(
                population[i][0], population[i + 1][0], target_nutritional_values
            )

            offspring.extend([child1, child2])
    return offspring

In [14]:
def mutate(offspring: list, mutation_rate: float):
    for child in offspring:
        chromosome = child[0]
        mutation_mask = np.random.rand(len(chromosome)) < mutation_rate
        mutation_values = np.random.randint(
            1, NUMBER_OF_INGREDIENTS + 1, size=len(chromosome)
        )
        chromosome = np.where(mutation_mask, mutation_values, chromosome)
        child[0] = chromosome
    return offspring

In [15]:
def elitism(population, population_size):
    sorted_population = sorted(population, key=lambda x: x[1])
    elite_individuals = sorted_population[:population_size]
    return elite_individuals

In [16]:
DEFAULT_POPULATION_SIZE = 10
DEFAULT_CROSSOVER_RATE = 0.1
DEFAULT_MUTATION_RATE = 0.05
DEFAULT_NUMBER_OF_GENERATIONS = 100


def genetic(
    target_nutritional_values: list[float],
    chromosome_size: int,
    population_size: int = DEFAULT_POPULATION_SIZE,
    crossover_rate: float = DEFAULT_CROSSOVER_RATE,
    mutation_rate: float = DEFAULT_MUTATION_RATE,
    number_of_generations: int = DEFAULT_NUMBER_OF_GENERATIONS,
):
    population = initialize_population(
        population_size, chromosome_size, target_nutritional_values
    )

    for _ in range(number_of_generations):
        selected_population = tournament_selection(population)

        offspring = crossover(
            selected_population, crossover_rate, target_nutritional_values
        )

        mutate(offspring, mutation_rate)

        population = elitism(population + offspring, population_size)

    return population[0][0]

In [17]:
TARGET_NUTRITIONAL_VALUES = [254.28, 42.85, 6.66, 5.71]

In [18]:
meal = genetic(TARGET_NUTRITIONAL_VALUES, 4)

In [19]:
meal

array([1421,  315, 1207, 1511])

In [20]:
ingridientes.loc[meal]

Unnamed: 0,index,group,calories,lipids,carbohydrates,protein,name
1421,1425,20,184.25,36.4,1.39,5.93,"FRIJOLES CON ARROZ, PREPARACION ESTANDARIZADA"
315,319,5,16.48,4.2,0.0,0.0,TE O INFUSION CON AZUCAR U OTRO ENDULZANTE (MI...
1207,1211,17,12.0,2.16,0.16,0.59,PEPINO SIN CASCARA
1511,1515,20,70.95,9.83,0.98,5.35,"POLLO CON PAPAS, PREPARACION ESTANDARIZADA"


In [21]:
calculate_nutritional_values(meal)

array([283.68,  52.59,   2.53,  11.87])

Pruena ANOVA

In [22]:
population_sizes = [10, 20, 30]
crossover_rates = [0.1, 0.5, 0.9]
mutation_rates = [0.01, 0.05, 0.1]

In [23]:
configurations = list(product(population_sizes, crossover_rates, mutation_rates))

In [24]:
results = []
for config in tqdm(configurations):
    population_size, crossover_rate, mutation_rate = config
    config_results = []
    for _ in range(20):
        meal = genetic(
            TARGET_NUTRITIONAL_VALUES,
            4,
            population_size=population_size,
            crossover_rate=crossover_rate,
            mutation_rate=mutation_rate
        )
        nutritional_values = calculate_nutritional_values(meal)
        config_results.append(nutritional_values)
    results.append(config_results)

  0%|          | 0/27 [00:00<?, ?it/s]

In [26]:
results_df = pd.DataFrame(results[0], columns=['calories', 'lipids', 'carbohydrates', 'protein'])
for i, config_result in enumerate(results[1:], start=1):
    config_df = pd.DataFrame(config_result, columns=['calories', 'lipids', 'carbohydrates', 'protein'])
    results_df = pd.concat([results_df, config_df], ignore_index=True)

In [27]:
results_df

Unnamed: 0,calories,lipids,carbohydrates,protein
0,253.18,26.15,3.33,23.93
1,238.67,48.76,3.32,5.26
2,316.69,36.62,9.97,20.67
3,214.70,27.51,3.87,17.04
4,275.12,62.54,2.80,6.80
...,...,...,...,...
535,440.02,58.42,16.39,17.00
536,256.59,40.61,7.14,14.58
537,243.41,39.68,3.86,16.09
538,214.61,34.61,6.81,7.58


In [29]:
configurations_repeated = np.repeat(configurations, 20, axis=0)

In [32]:
configurations_df = pd.DataFrame(configurations_repeated, columns=['population_size', 'crossover_rate', 'mutation_rate'])

In [34]:
results_df = pd.concat([results_df, configurations_df], axis=1)

In [35]:
results_df

Unnamed: 0,calories,lipids,carbohydrates,protein,population_size,crossover_rate,mutation_rate
0,253.18,26.15,3.33,23.93,10.0,0.1,0.01
1,238.67,48.76,3.32,5.26,10.0,0.1,0.01
2,316.69,36.62,9.97,20.67,10.0,0.1,0.01
3,214.70,27.51,3.87,17.04,10.0,0.1,0.01
4,275.12,62.54,2.80,6.80,10.0,0.1,0.01
...,...,...,...,...,...,...,...
535,440.02,58.42,16.39,17.00,30.0,0.9,0.10
536,256.59,40.61,7.14,14.58,30.0,0.9,0.10
537,243.41,39.68,3.86,16.09,30.0,0.9,0.10
538,214.61,34.61,6.81,7.58,30.0,0.9,0.10


In [36]:
grouped_data = results_df.groupby(['population_size', 'crossover_rate', 'mutation_rate'])

In [39]:
calories_groups = [group['calories'].values for name, group in grouped_data]
lipids_groups = [group['lipids'].values for name, group in grouped_data]
carbohydrates_groups = [group['carbohydrates'].values for name, group in grouped_data]
protein_groups = [group['protein'].values for name, group in grouped_data]

In [None]:
calories_anova = stats.f_oneway(*calories_groups)
lipids_anova = stats.f_oneway(*lipids_groups)
carbohydrates_anova = stats.f_oneway(*carbohydrates_groups)
protein_anova = stats.f_oneway(*protein_groups)