In [1]:
import numpy as np
import random
import time

def solve_knapsack_ga(weights, values, capacity,
                               population_size=200,
                               num_generations=500,
                               base_mutation_rate=0.05,
                               tournament_size=5,
                               elitism=True,
                               penalty_coef=1.0):
    n = len(weights)

    # --- FITNESS WITH SOFT PENALTY ---
    def fitness(ind):
        w = (weights * ind).sum()
        v = (values  * ind).sum()
        if w <= capacity:
            return v
        else:
            # Penalize overweight
            return v - penalty_coef * (w - capacity)

    # --- GREEDY INITIAL SOLUTION ---
    def greedy_init():
        ind = np.zeros(n, dtype=int)
        remaining = capacity
        # pick by highest value/weight ratio
        for i in np.argsort(-values/weights):
            if weights[i] <= remaining:
                ind[i] = 1
                remaining -= weights[i]
        return ind

    # --- RANDOM INDIVIDUAL ---
    def create_random():
        return np.random.randint(0, 2, size=n)

    # --- TOURNAMENT SELECTION ---
    def tournament(pop, fits):
        aspirants = random.sample(range(len(pop)), tournament_size)
        best = max(aspirants, key=lambda i: fits[i])
        return pop[best].copy()

    # --- UNIFORM CROSSOVER ---
    def crossover(p1, p2):
        mask = np.random.rand(n) < 0.5
        c1 = np.where(mask, p1, p2)
        c2 = np.where(mask, p2, p1)
        return c1, c2

    # --- REPAIR HEURISTIC ---
    def repair(ind):
        overweight = (weights * ind).sum() - capacity
        if overweight <= 0:
            return ind
        # drop items of lowest v/w until fit
        ratios = values / weights
        for i in np.argsort(ratios):
            if ind[i] == 1:
                ind[i] = 0
                overweight -= weights[i]
                if overweight <= 0:
                    break
        return ind

    # --- MUTATION (adaptive rate) ---
    def mutate(ind, curr_rate):
        for i in range(n):
            if random.random() < curr_rate:
                ind[i] ^= 1
        return ind

    # --- INITIAL POPULATION (1 greedy + rest random) ---
    pop = [greedy_init()] + [create_random() for _ in range(population_size-1)]

    # --- GA MAIN LOOP ---
    best_val = -1
    best_ind = None
    history = []

    for gen in range(num_generations):
        fits = [fitness(ind) for ind in pop]
        mean_fit = np.mean(fits)
        max_fit  = np.max(fits)
        history.append((max_fit, mean_fit))

        # update global best
        if max_fit > best_val:
            best_val = max_fit
            best_ind = pop[int(np.argmax(fits))].copy()

        # build next gen
        new_pop = []
        if elitism:
            new_pop.append(best_ind.copy())

        # linearly decay mutation rate
        curr_mut_rate = base_mutation_rate * (1 - gen / num_generations)

        while len(new_pop) < population_size:
            p1 = tournament(pop, fits)
            p2 = tournament(pop, fits)
            c1, c2 = crossover(p1, p2)
            c1 = repair(mutate(c1, curr_mut_rate))
            c2 = repair(mutate(c2, curr_mut_rate))
            new_pop.extend([c1, c2])

        pop = new_pop[:population_size]

    return best_val, best_ind, history

def generate_knapsack_instance(n, weight_range=(1, 100), value_range=(1, 100), capacity_factor=0.5):
    weights = np.random.randint(weight_range[0], weight_range[1]+1, size = n)
    values  = np.random.randint(value_range[0], value_range[1]+1, size = n)
    capacity = int(weights.sum() * capacity_factor)
    return weights, values, capacity

def run_dp(weights, values, capacity):
    t0 = time.perf_counter()
    value, selected = solve_knapsack_dp(weights, values, capacity)
    return value, selected, time.perf_counter() - t0

def run_ga(weights, values, capacity, params):
    t0 = time.perf_counter()
    value, selected = solve_knapsack_ga(
        weights, values, capacity,
        population_size=params['population_size'],
        num_generations=params['num_generations'],
        mutation_rate=params['mutation_rate'],
        tournament_size=params['tournament_size'],
        elitism=params['elitism'], 
        #penalty_coef=params.get('penalty_coef', None)
    )
    return value, selected, time.perf_counter() - t0

# Benchmark function with quality measure
def benchmark_with_quality(ns, repeats=5, ga_params=None):
    if ga_params is None:
        ga_params = {
            'population_size': 200,
            'num_generations': 1000,
            'mutation_rate': 0.01,
            'tournament_size': 5,
            'elitism': True,}
            #'penalty_coef': 1.0}
    results = []
    for n in ns:
        dp_times, ga_times = [], []
        dp_vals, ga_vals = [], []
        for _ in range(repeats):
            w, v, C = generate_knapsack_instance(n)
            val_dp, _, t_dp = run_dp(w, v, C)
            val_ga, _, t_ga = run_ga(w, v, C, ga_params)
            dp_times.append(t_dp)
            ga_times.append(t_ga)
            dp_vals.append(val_dp)
            ga_vals.append(val_ga)
        mean_dp_val = np.mean(dp_vals)
        mean_ga_val = np.mean(ga_vals)
        results.append({
            'n_items': n,
            'dp_time_mean': np.mean(dp_times),
            'ga_time_mean': np.mean(ga_times),
            'dp_value_mean': mean_dp_val,
            'ga_value_mean': mean_ga_val,
            'quality_ratio': mean_ga_val / mean_dp_val
        })
    return pd.DataFrame(results)

# Define and run
ns = [10, 20, 30, 50, 100, 200, 300, 400, 500, 600, 700, 800]
ga_params = {'population_size': 200,
    'num_generations': 1000,
    'mutation_rate': 0.01,
    'tournament_size': 5,
    'elitism': True,} 
    #'penalty_coef':1.0}

df_quality = benchmark_with_quality(ns, repeats=5, ga_params=ga_params)

# Display resultsdisplay(df_quality)
display(df_quality)


GA best value = 1857 in 1.864s
Selected items: [ 0  3  4  5  8  9 10 13 16 17 18 21 23 24 25 30 31 32 33 35 38 39 40 41
 43 47 49]
