In [1]:
import random
import numpy as np
import pandas as pd

In [31]:
df = pd.read_csv('data/knapsack.csv')
df.head()

Unnamed: 0,Item,Weight,Value
0,Laptop,3.0,1500
1,Headphones,1.0,300
2,Book,2.0,120
3,Jacket,2.0,250
4,Camera,1.0,800


In [27]:
MAX_WEIGHT = 15

In [5]:
def create_individual(length):
    """Create a random individual."""
    return [np.random.randint(0, 2) for _ in range(length)]

In [6]:
def compute_fitness(individual):
    """Compute the fitness of an individual. More 1s means higher fitness."""
    total_weight = sum([bit * df['Weight'][i] for i, bit in enumerate(individual)])
    if total_weight > MAX_WEIGHT:
        return 0
    return sum([bit * df['Value'][i] for i, bit in enumerate(individual)])

In [17]:
def select_parents(population, fitnesses):
    """Select two parents based on their fitness. Higher fitness -> higher chance to be selected."""
    total_fitness = sum(fitnesses)
    if total_fitness == 0:
        return random.choices(population, k=2)
    selection_probs = [fitness/total_fitness for fitness in fitnesses]
    return random.choices(population, k=2, weights=selection_probs)

In [82]:
def crossover(parent1, parent2):
    """Perform uniform crossover. Return two children."""
    child1 = [parent1[i] if np.random.random() < 0.5 else parent2[i] for i in range(len(parent1))]
    child2 = [parent1[i] if np.random.random() < 0.5 else parent2[i] for i in range(len(parent1))]
    return child1, child2

In [83]:
def mutate(individual):
    """Flip a random bit."""
    rand_idx = np.random.randint(0, len(individual))
    individual[rand_idx] = 1 - individual[rand_idx]
    return individual

In [84]:
def run_genetic_algorithm(seq_length, population_size, generations, mutation_prob, keep_best):
    """
    Run the genetic algorithm.

    seq_length: length of the bit string
    population_size: number of individuals in the population
    generations: number of generations to run
    mutation_prob: probability of mutation
    keep_best: whether to keep the best individual from the previous generation

    Return the final population.
    """

    # Initialize population
    population = [create_individual(seq_length) for _ in range(population_size)]
    fitnesses = [compute_fitness(individual) for individual in population]

    for generation in range(generations):
        # Create new generation through selection, crossover, and mutation

        # Initialize new population
        new_population = []
        if keep_best:
            sorted_fitness_idxs = np.argsort(fitnesses, axis=0)
            new_population.append(population[sorted_fitness_idxs[-1]])
            new_population.append(population[sorted_fitness_idxs[-2]])

        for _ in range((population_size - len(new_population)) // 2):
            # Select two parents
            parent1, parent2 = select_parents(population, fitnesses)

            # Crossover parents
            child1, child2 = crossover(parent1, parent2)

            # Mutate children
            if np.random.random() < mutation_prob:
                child1 = mutate(child1)
            if np.random.random() < mutation_prob:
                child2 = mutate(child2)

            # Add children to new population
            new_population.extend([child1, child2])

        # Update population and compute new fitnesses
        population = new_population
        fitnesses = [compute_fitness(individual) for individual in population]

        # Print out the best fitness in this generation
        best_fitness = max(fitnesses)
        print(f"Generation {generation}, Best Fitness: {best_fitness}")

    return population

In [86]:
# Run the GA
final_population = run_genetic_algorithm(seq_length=len(df), population_size=1000, generations=100, mutation_prob=0.2, keep_best=True)

Generation 0, Best Fitness: 6090
Generation 1, Best Fitness: 6140
Generation 2, Best Fitness: 6140
Generation 3, Best Fitness: 6140
Generation 4, Best Fitness: 6190
Generation 5, Best Fitness: 6190
Generation 6, Best Fitness: 6190
Generation 7, Best Fitness: 6330
Generation 8, Best Fitness: 6330
Generation 9, Best Fitness: 6330
Generation 10, Best Fitness: 6350
Generation 11, Best Fitness: 6350
Generation 12, Best Fitness: 6350
Generation 13, Best Fitness: 6410
Generation 14, Best Fitness: 6410
Generation 15, Best Fitness: 6410
Generation 16, Best Fitness: 6410
Generation 17, Best Fitness: 6410
Generation 18, Best Fitness: 6420
Generation 19, Best Fitness: 6420
Generation 20, Best Fitness: 6420
Generation 21, Best Fitness: 6420
Generation 22, Best Fitness: 6420
Generation 23, Best Fitness: 6420
Generation 24, Best Fitness: 6420
Generation 25, Best Fitness: 6420
Generation 26, Best Fitness: 6430
Generation 27, Best Fitness: 6430
Generation 28, Best Fitness: 6430
Generation 29, Best Fitn

In [87]:
fitnesses = [compute_fitness(individual) for individual in final_population]
best_idx = np.argmax(fitnesses)
best_individual = final_population[best_idx]

print(f"Best individual: {best_individual}")
print(f"Fitness: {fitnesses[best_idx]}€")
print(f"Weight: {sum([bit * df['Weight'][i] for i, bit in enumerate(best_individual)])}kg")
print()

print("Items in knapsack:")
for i, bit in enumerate(best_individual):
    if bit == 1:
        print(f"- {df['Item'][i]} ({df['Value'][i]}€)")

Best individual: [1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
Fitness: 6600€
Weight: 15.0kg

Items in knapsack:
- Laptop (1500€)
- Headphones (300€)
- Camera (800€)
- Sunglasses (200€)
- Smartphone (1000€)
- Tablet (700€)
- Tent (800€)
- GPS Device (250€)
- Binoculars (300€)
- Solar Panel (550€)
- Swiss Army Knife (200€)
