In [1]:
"""
Knapsack Problem Algorithms Module

This module implements various algorithms for solving the 0/1 Knapsack Problem:
1. Dynamic Programming (DP) approach
2. Genetic Algorithm (GA) approach 
3. Hybrid approach combining both techniques
"""

import numpy as np
import random
import time


def generate_knapsack_instance(n, weight_range=(1, 100), value_range=(1, 100), capacity_factor=0.5):
    """
    Generate a random knapsack problem instance.
    
    Parameters:
    -----------
    n : int
        Number of items
    weight_range : tuple
        (min_weight, max_weight) range for item weights
    value_range : tuple
        (min_value, max_value) range for item values
    capacity_factor : float
        Fraction of total weight to use as capacity (0.0-1.0)
    
    Returns:
    --------
    dict
        A dictionary containing weights, values, and capacity
    """
    weights = np.random.randint(weight_range[0], weight_range[1] + 1, size=n)
    values = np.random.randint(value_range[0], value_range[1] + 1, size=n)
    capacity = int(np.sum(weights) * capacity_factor)
    
    return {
        'weights': weights,
        'values': values,
        'capacity': capacity
    }


def solve_knapsack_dp(weights, values, capacity):
    """
    Solve the knapsack problem using dynamic programming.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
    
    Returns:
    --------
    tuple
        (optimal_value, selected_items)
    """
    n = len(weights)
    
    # Initialize DP table
    dp = np.zeros((n + 1, capacity + 1), dtype=int)
    
    # Build table in bottom-up manner
    for i in range(1, n + 1):
        for w in range(capacity + 1):
            if weights[i-1] <= w:
                dp[i, w] = max(values[i-1] + dp[i-1, w-weights[i-1]], dp[i-1, w])
            else:
                dp[i, w] = dp[i-1, w]
    
    # Backtrack to find selected items
    selected = np.zeros(n, dtype=bool)
    w = capacity
    for i in range(n, 0, -1):
        if dp[i, w] != dp[i-1, w]:
            selected[i-1] = True
            w -= weights[i-1]
    
    return dp[n, capacity], selected


def solve_knapsack_ga(weights, values, capacity, population_size=100, num_generations=200, 
                      mutation_rate=0.01, tournament_size=3, elitism=True):
    """
    Solve the knapsack problem using a genetic algorithm.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
    population_size : int
        Size of the population
    num_generations : int
        Number of generations to evolve
    mutation_rate : float
        Probability of mutation for each gene
    tournament_size : int
        Number of individuals to select for tournament selection
    elitism : bool
        Whether to keep the best individual in each generation
    
    Returns:
    --------
    tuple
        (best_value, selected_items)
    """
    n = len(weights)
    
    def create_individual():
        """Create a random binary string of length n"""
        return np.random.randint(0, 2, size=n)
    
    def fitness(individual):
        """Calculate fitness of an individual"""
        total_weight = np.sum(weights * individual)
        if total_weight > capacity:
            return 0  # Penalize solutions that exceed capacity
        return np.sum(values * individual)
    
    def tournament_selection(population, fitnesses):
        """Select an individual using tournament selection"""
        tournament = random.sample(list(range(len(population))), tournament_size)
        tournament_fitnesses = [fitnesses[i] for i in tournament]
        return population[tournament[np.argmax(tournament_fitnesses)]].copy()
    
    def crossover(parent1, parent2):
        """One-point crossover"""
        crossover_point = random.randint(1, n-1)
        child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
        child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
        return child1, child2
    
    def mutate(individual):
        """Mutate an individual with given mutation rate"""
        for i in range(n):
            if random.random() < mutation_rate:
                individual[i] = 1 - individual[i]  # Flip bit
        return individual
    
    # Initialize population
    population = np.array([create_individual() for _ in range(population_size)])
    
    best_individual = None
    best_fitness = -1
    
    # Evolve population
    for _ in range(num_generations):
        # Calculate fitness for each individual
        fitnesses = np.array([fitness(ind) for ind in population])
        
        # Find the best individual in this generation
        current_best_idx = np.argmax(fitnesses)
        if fitnesses[current_best_idx] > best_fitness:
            best_fitness = fitnesses[current_best_idx]
            best_individual = population[current_best_idx].copy()
        
        # Create new population
        new_population = np.zeros((population_size, n), dtype=int)
        
        # Elitism: keep the best individual
        if elitism:
            new_population[0] = population[current_best_idx].copy()
            start_idx = 1
        else:
            start_idx = 0
        
        # Create new individuals through selection, crossover, and mutation
        for i in range(start_idx, population_size, 2):
            parent1 = tournament_selection(population, fitnesses)
            parent2 = tournament_selection(population, fitnesses)
            
            if i + 1 < population_size:
                child1, child2 = crossover(parent1, parent2)
                new_population[i] = mutate(child1)
                new_population[i+1] = mutate(child2)
            else:
                new_population[i] = mutate(parent1)
        
        population = new_population
    
    return best_fitness, best_individual


def solve_knapsack_hybrid(weights, values, capacity, threshold=50):
    """
    Hybrid approach: Use DP for small subproblems and GA for the complete problem.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
    threshold : int
        Maximum number of items to consider for DP-based initialization
    
    Returns:
    --------
    tuple
        (best_value, selected_items)
    """
    n = len(weights)
    
    if n <= threshold:
        # For small problems, just use DP
        return solve_knapsack_dp(weights, values, capacity)
    
    # For larger problems, we'll use a hybrid approach:
    # 1. Solve a subproblem with DP (using the most valuable items by value/weight ratio)
    # 2. Use the DP solution to seed the GA population
    # 3. Run GA on the complete problem with this seeded population
    
    # Calculate value-to-weight ratios
    ratios = values / weights
    sorted_indices = np.argsort(-ratios)  # Sort in descending order
    
    # Take the top 'threshold' items for DP
    dp_indices = sorted_indices[:threshold]
    dp_weights = weights[dp_indices]
    dp_values = values[dp_indices]
    
    # Adjust capacity for the subproblem
    sub_capacity = min(capacity, int(np.sum(dp_weights) * 0.7))
    
    # Solve the subproblem with DP
    _, dp_selected = solve_knapsack_dp(dp_weights, dp_values, sub_capacity)
    
    # Create a full-size solution with the DP results
    initial_solution = np.zeros(n, dtype=bool)
    initial_solution[dp_indices] = dp_selected
    
    # Now run GA with this initial solution
    population_size = 100
    num_generations = 200
    mutation_rate = 0.01
    
    # Initialize GA population with one individual from DP and the rest random
    population = np.zeros((population_size, n), dtype=int)
    population[0] = initial_solution
    for i in range(1, population_size):
        population[i] = np.random.randint(0, 2, size=n)
    
    # Now run a modified GA with this seeded population
    def fitness(individual):
        total_weight = np.sum(weights * individual)
        if total_weight > capacity:
            return 0
        return np.sum(values * individual)
    
    def tournament_selection(pop, fitnesses, size=3):
        tournament = random.sample(list(range(len(pop))), size)
        tournament_fitnesses = [fitnesses[i] for i in tournament]
        return pop[tournament[np.argmax(tournament_fitnesses)]].copy()
    
    def crossover(parent1, parent2):
        crossover_point = random.randint(1, n-1)
        child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
        child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
        return child1, child2
    
    def mutate(individual, rate=mutation_rate):
        for i in range(n):
            if random.random() < rate:
                individual[i] = 1 - individual[i]
        return individual
    
    best_individual = initial_solution
    best_fitness_val = fitness(best_individual)
    
    # Evolve population
    for _ in range(num_generations):
        fitnesses = np.array([fitness(ind) for ind in population])
        
        current_best_idx = np.argmax(fitnesses)
        if fitnesses[current_best_idx] > best_fitness_val:
            best_fitness_val = fitnesses[current_best_idx]
            best_individual = population[current_best_idx].copy()
        
        new_population = np.zeros((population_size, n), dtype=int)
        new_population[0] = population[current_best_idx].copy()  # Elitism
        
        for i in range(1, population_size, 2):
            parent1 = tournament_selection(population, fitnesses)
            parent2 = tournament_selection(population, fitnesses)
            
            if i + 1 < population_size:
                child1, child2 = crossover(parent1, parent2)
                new_population[i] = mutate(child1)
                new_population[i+1] = mutate(child2)
            else:
                new_population[i] = mutate(parent1)
        
        population = new_population
    
    return best_fitness_val, best_individual


# Additional utility functions for the knapsack problem

def calculate_solution_metrics(weights, values, selected):
    """
    Calculate metrics for a given knapsack solution.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    selected : list or array
        Binary selection array
        
    Returns:
    --------
    dict
        Dictionary with total_weight and total_value
    """
    total_weight = np.sum(weights * selected)
    total_value = np.sum(values * selected)
    
    return {
        'total_weight': total_weight,
        'total_value': total_value
    }


def knapsack_greedy(weights, values, capacity):
    """
    Solve the knapsack problem using a greedy approach (by value/weight ratio).
    This is not optimal but can be used as a baseline.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
        
    Returns:
    --------
    tuple
        (total_value, selected_items)
    """
    n = len(weights)
    # Calculate value-to-weight ratios
    ratios = values / weights
    
    # Sort items by ratio (descending)
    indices = np.argsort(-ratios)
    
    # Greedy selection
    selected = np.zeros(n, dtype=bool)
    total_weight = 0
    total_value = 0
    
    for i in indices:
        if total_weight + weights[i] <= capacity:
            selected[i] = True
            total_weight += weights[i]
            total_value += values[i]
    
    return total_value, selected


def visualize_solution(weights, values, capacity, selected, title="Knapsack Solution"):
    """
    Visualize a knapsack solution.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
    selected : list or array
        Binary selection array
    title : str
        Plot title
    """
    import matplotlib.pyplot as plt
    
    # Calculate metrics
    metrics = calculate_solution_metrics(weights, values, selected)
    
    # Create visualization
    plt.figure(figsize=(12, 6))
    
    # Left plot: Selected vs. Not Selected
    plt.subplot(1, 2, 1)
    plt.scatter(weights[~selected], values[~selected], color='lightgray', alpha=0.7, label='Not Selected')
    plt.scatter(weights[selected], values[selected], color='blue', alpha=0.7, label='Selected')
    plt.xlabel('Weight')
    plt.ylabel('Value')
    plt.title('Item Selection')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend()
    
    # Right plot: Value-to-Weight Ratio
    plt.subplot(1, 2, 2)
    ratios = values / weights
    plt.scatter(np.arange(len(ratios)), ratios, c=selected, cmap='coolwarm')
    plt.xlabel('Item Index')
    plt.ylabel('Value/Weight Ratio')
    plt.title('Value-to-Weight Ratio')
    plt.colorbar(label='Selected', ticks=[0, 1])
    plt.grid(True, linestyle='--', alpha=0.7)
    
    plt.suptitle(f"{title}\nTotal Value: {metrics['total_value']}, Total Weight: {metrics['total_weight']}/{capacity}")
    plt.tight_layout()
    plt.subplots_adjust(top=0.85)
    
    return plt.gcf()

In [1]:
"""
Knapsack Problem Algorithms Module

This module implements various algorithms for solving the 0/1 Knapsack Problem:
1. Dynamic Programming (DP) approach
2. Genetic Algorithm (GA) approach 
3. Hybrid approach combining both techniques
"""

import numpy as np
import random
import time


def generate_knapsack_instance(n, weight_range=(1, 100), value_range=(1, 100), capacity_factor=0.5):
    """
    Generate a random knapsack problem instance.
    
    Parameters:
    -----------
    n : int
        Number of items
    weight_range : tuple
        (min_weight, max_weight) range for item weights
    value_range : tuple
        (min_value, max_value) range for item values
    capacity_factor : float
        Fraction of total weight to use as capacity (0.0-1.0)
    
    Returns:
    --------
    dict
        A dictionary containing weights, values, and capacity
    """
    weights = np.random.randint(weight_range[0], weight_range[1] + 1, size=n)
    values = np.random.randint(value_range[0], value_range[1] + 1, size=n)
    capacity = int(np.sum(weights) * capacity_factor)
    
    return {
        'weights': weights,
        'values': values,
        'capacity': capacity
    }


def solve_knapsack_dp(weights, values, capacity):
    """
    Solve the knapsack problem using dynamic programming.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
    
    Returns:
    --------
    tuple
        (optimal_value, selected_items)
    """
    n = len(weights)
    
    # Initialize DP table
    dp = np.zeros((n + 1, capacity + 1), dtype=int)
    
    # Build table in bottom-up manner
    for i in range(1, n + 1):
        for w in range(capacity + 1):
            if weights[i-1] <= w:
                dp[i, w] = max(values[i-1] + dp[i-1, w-weights[i-1]], dp[i-1, w])
            else:
                dp[i, w] = dp[i-1, w]
    
    # Backtrack to find selected items
    selected = np.zeros(n, dtype=bool)
    w = capacity
    for i in range(n, 0, -1):
        if dp[i, w] != dp[i-1, w]:
            selected[i-1] = True
            w -= weights[i-1]
    
    return dp[n, capacity], selected


def solve_knapsack_ga(weights, values, capacity, population_size=100, num_generations=200, 
                      mutation_rate=0.01, tournament_size=3, elitism=True):
    """
    Solve the knapsack problem using a genetic algorithm.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
    population_size : int
        Size of the population
    num_generations : int
        Number of generations to evolve
    mutation_rate : float
        Probability of mutation for each gene
    tournament_size : int
        Number of individuals to select for tournament selection
    elitism : bool
        Whether to keep the best individual in each generation
    
    Returns:
    --------
    tuple
        (best_value, selected_items)
    """
    n = len(weights)
    
    def create_individual():
        """Create a random binary string of length n"""
        return np.random.randint(0, 2, size=n)
    
    def fitness(individual):
        """Calculate fitness of an individual"""
        total_weight = np.sum(weights * individual)
        if total_weight > capacity:
            return 0  # Penalize solutions that exceed capacity
        return np.sum(values * individual)
    
    def tournament_selection(population, fitnesses):
        """Select an individual using tournament selection"""
        tournament = random.sample(list(range(len(population))), tournament_size)
        tournament_fitnesses = [fitnesses[i] for i in tournament]
        return population[tournament[np.argmax(tournament_fitnesses)]].copy()
    
    def crossover(parent1, parent2):
        """One-point crossover"""
        crossover_point = random.randint(1, n-1)
        child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
        child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
        return child1, child2
    
    def mutate(individual):
        """Mutate an individual with given mutation rate"""
        for i in range(n):
            if random.random() < mutation_rate:
                individual[i] = 1 - individual[i]  # Flip bit
        return individual
    
    # Initialize population
    population = np.array([create_individual() for _ in range(population_size)])
    
    best_individual = None
    best_fitness = -1
    
    # Evolve population
    for _ in range(num_generations):
        # Calculate fitness for each individual
        fitnesses = np.array([fitness(ind) for ind in population])
        
        # Find the best individual in this generation
        current_best_idx = np.argmax(fitnesses)
        if fitnesses[current_best_idx] > best_fitness:
            best_fitness = fitnesses[current_best_idx]
            best_individual = population[current_best_idx].copy()
        
        # Create new population
        new_population = np.zeros((population_size, n), dtype=int)
        
        # Elitism: keep the best individual
        if elitism:
            new_population[0] = population[current_best_idx].copy()
            start_idx = 1
        else:
            start_idx = 0
        
        # Create new individuals through selection, crossover, and mutation
        for i in range(start_idx, population_size, 2):
            parent1 = tournament_selection(population, fitnesses)
            parent2 = tournament_selection(population, fitnesses)
            
            if i + 1 < population_size:
                child1, child2 = crossover(parent1, parent2)
                new_population[i] = mutate(child1)
                new_population[i+1] = mutate(child2)
            else:
                new_population[i] = mutate(parent1)
        
        population = new_population
    
    return best_fitness, best_individual


def solve_knapsack_hybrid(weights, values, capacity, threshold=50):
    """
    Hybrid approach: Use DP for small subproblems and GA for the complete problem.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
    threshold : int
        Maximum number of items to consider for DP-based initialization
    
    Returns:
    --------
    tuple
        (best_value, selected_items)
    """
    n = len(weights)
    
    if n <= threshold:
        # For small problems, just use DP
        return solve_knapsack_dp(weights, values, capacity)
    
    # For larger problems, we'll use a hybrid approach:
    # 1. Solve a subproblem with DP (using the most valuable items by value/weight ratio)
    # 2. Use the DP solution to seed the GA population
    # 3. Run GA on the complete problem with this seeded population
    
    # Calculate value-to-weight ratios
    ratios = values / weights
    sorted_indices = np.argsort(-ratios)  # Sort in descending order
    
    # Take the top 'threshold' items for DP
    dp_indices = sorted_indices[:threshold]
    dp_weights = weights[dp_indices]
    dp_values = values[dp_indices]
    
    # Adjust capacity for the subproblem
    sub_capacity = min(capacity, int(np.sum(dp_weights) * 0.7))
    
    # Solve the subproblem with DP
    _, dp_selected = solve_knapsack_dp(dp_weights, dp_values, sub_capacity)
    
    # Create a full-size solution with the DP results
    initial_solution = np.zeros(n, dtype=bool)
    initial_solution[dp_indices] = dp_selected
    
    # Now run GA with this initial solution
    population_size = 100
    num_generations = 200
    mutation_rate = 0.01
    
    # Initialize GA population with one individual from DP and the rest random
    population = np.zeros((population_size, n), dtype=int)
    population[0] = initial_solution
    for i in range(1, population_size):
        population[i] = np.random.randint(0, 2, size=n)
    
    # Now run a modified GA with this seeded population
    def fitness(individual):
        total_weight = np.sum(weights * individual)
        if total_weight > capacity:
            return 0
        return np.sum(values * individual)
    
    def tournament_selection(pop, fitnesses, size=3):
        tournament = random.sample(list(range(len(pop))), size)
        tournament_fitnesses = [fitnesses[i] for i in tournament]
        return pop[tournament[np.argmax(tournament_fitnesses)]].copy()
    
    def crossover(parent1, parent2):
        crossover_point = random.randint(1, n-1)
        child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
        child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
        return child1, child2
    
    def mutate(individual, rate=mutation_rate):
        for i in range(n):
            if random.random() < rate:
                individual[i] = 1 - individual[i]
        return individual
    
    best_individual = initial_solution
    best_fitness_val = fitness(best_individual)
    
    # Evolve population
    for _ in range(num_generations):
        fitnesses = np.array([fitness(ind) for ind in population])
        
        current_best_idx = np.argmax(fitnesses)
        if fitnesses[current_best_idx] > best_fitness_val:
            best_fitness_val = fitnesses[current_best_idx]
            best_individual = population[current_best_idx].copy()
        
        new_population = np.zeros((population_size, n), dtype=int)
        new_population[0] = population[current_best_idx].copy()  # Elitism
        
        for i in range(1, population_size, 2):
            parent1 = tournament_selection(population, fitnesses)
            parent2 = tournament_selection(population, fitnesses)
            
            if i + 1 < population_size:
                child1, child2 = crossover(parent1, parent2)
                new_population[i] = mutate(child1)
                new_population[i+1] = mutate(child2)
            else:
                new_population[i] = mutate(parent1)
        
        population = new_population
    
    return best_fitness_val, best_individual


# Additional utility functions for the knapsack problem

def calculate_solution_metrics(weights, values, selected):
    """
    Calculate metrics for a given knapsack solution.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    selected : list or array
        Binary selection array
        
    Returns:
    --------
    dict
        Dictionary with total_weight and total_value
    """
    total_weight = np.sum(weights * selected)
    total_value = np.sum(values * selected)
    
    return {
        'total_weight': total_weight,
        'total_value': total_value
    }


def knapsack_greedy(weights, values, capacity):
    """
    Solve the knapsack problem using a greedy approach (by value/weight ratio).
    This is not optimal but can be used as a baseline.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
        
    Returns:
    --------
    tuple
        (total_value, selected_items)
    """
    n = len(weights)
    # Calculate value-to-weight ratios
    ratios = values / weights
    
    # Sort items by ratio (descending)
    indices = np.argsort(-ratios)
    
    # Greedy selection
    selected = np.zeros(n, dtype=bool)
    total_weight = 0
    total_value = 0
    
    for i in indices:
        if total_weight + weights[i] <= capacity:
            selected[i] = True
            total_weight += weights[i]
            total_value += values[i]
    
    return total_value, selected


def visualize_solution(weights, values, capacity, selected, title="Knapsack Solution"):
    """
    Visualize a knapsack solution.
    
    Parameters:
    -----------
    weights : list or array
        List of item weights
    values : list or array
        List of item values
    capacity : int
        Knapsack capacity
    selected : list or array
        Binary selection array
    title : str
        Plot title
    """
    import matplotlib.pyplot as plt
    
    # Calculate metrics
    metrics = calculate_solution_metrics(weights, values, selected)
    
    # Create visualization
    plt.figure(figsize=(12, 6))
    
    # Left plot: Selected vs. Not Selected
    plt.subplot(1, 2, 1)
    plt.scatter(weights[~selected], values[~selected], color='lightgray', alpha=0.7, label='Not Selected')
    plt.scatter(weights[selected], values[selected], color='blue', alpha=0.7, label='Selected')
    plt.xlabel('Weight')
    plt.ylabel('Value')
    plt.title('Item Selection')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend()
    
    # Right plot: Value-to-Weight Ratio
    plt.subplot(1, 2, 2)
    ratios = values / weights
    plt.scatter(np.arange(len(ratios)), ratios, c=selected, cmap='coolwarm')
    plt.xlabel('Item Index')
    plt.ylabel('Value/Weight Ratio')
    plt.title('Value-to-Weight Ratio')
    plt.colorbar(label='Selected', ticks=[0, 1])
    plt.grid(True, linestyle='--', alpha=0.7)
    
    plt.suptitle(f"{title}\nTotal Value: {metrics['total_value']}, Total Weight: {metrics['total_weight']}/{capacity}")
    plt.tight_layout()
    plt.subplots_adjust(top=0.85)
    
    return plt.gcf()