<a href="https://colab.research.google.com/github/biruk50/Medium_articles/blob/main/Prefix_Sum_Heuristic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from collections import deque,namedtuple
from google.colab import files  # For file upload in Google Colab
from typing import List, Dict
import math
import time
import heapq

In [2]:
class Node:
    def __init__(self, level, value, weight, bound, taken):
        self.level = level  # Current level in decision tree
        self.value = value  # Total value so far
        self.weight = weight  # Total weight so far
        self.bound = bound  # Upper bound of the value
        self.taken = taken  # Items taken so far

    def __lt__(self, other):
        return self.bound > other.bound  # Max-heap for priority queue


def calculate_bound(node, capacity, items):
    if node.weight >= capacity:
        return 0  # Exceeded capacity, bound is 0

    bound = node.value
    total_weight = node.weight
    level = node.level

    while level < len(items) and total_weight + items[level].weight <= capacity:
        total_weight += items[level].weight
        bound += items[level].value
        level += 1

    if level < len(items):
        bound += (capacity - total_weight) * (items[level].value / items[level].weight)  # Fractional value

    return bound


def branch_and_bound_knapsack(items, capacity):
    items = sorted(items, key=lambda x: x.value / x.weight, reverse=True)  # Sort by value-to-weight ratio
    pq = []  # Priority queue (max-heap)
    root = Node(level=-1, value=0, weight=0, bound=calculate_bound(Node(-1, 0, 0, 0, []), capacity, items), taken=[])
    heapq.heappush(pq, root)
    max_value = 0
    best_taken = []

    while pq:
        current = heapq.heappop(pq)

        if current.bound > max_value and current.level < len(items) - 1:
            next_level = current.level + 1

            # Branch where we take the item
            if current.weight + items[next_level].weight <= capacity:
                taken_with = current.taken + [1]
                node_with = Node(
                    level=next_level,
                    value=current.value + items[next_level].value,
                    weight=current.weight + items[next_level].weight,
                    bound=calculate_bound(Node(next_level, current.value + items[next_level].value,
                                               current.weight + items[next_level].weight, 0, []), capacity, items),
                    taken=taken_with,
                )
                if node_with.value > max_value:
                    max_value = node_with.value
                    best_taken = node_with.taken
                heapq.heappush(pq, node_with)

            # Branch where we don't take the item
            taken_without = current.taken + [0]
            node_without = Node(
                level=next_level,
                value=current.value,
                weight=current.weight,
                bound=calculate_bound(Node(next_level, current.value, current.weight, 0, []), capacity, items),
                taken=taken_without,
            )
            heapq.heappush(pq, node_without)

    return max_value, best_taken


In [62]:
def heuristic_knapsack(items, capacity):
    items = sorted(items, key=lambda x: x.value/x.weight, reverse=True)

    length=len(items)

    # Precompute prefix sums
    prefix_weights = [0] * (length + 1)
    prefix_values = [0] * (length + 1)

    for i in range(length):
        prefix_weights[i+1] = prefix_weights[i] + items[i].weight
        prefix_values[i+1] = prefix_values[i] + items[i].value

    def calculate_bound(remaining_cap, start_idx):
      base_weight = prefix_weights[start_idx]
      low, high = start_idx, length

      # Only right end changes
      while high > low:
        mid = (low + high +1) // 2
        if prefix_weights[mid] - base_weight <= remaining_cap:
            low = mid
        else:
            high = mid -1

      int_part = prefix_values[low] - prefix_values[start_idx]
      bound = int_part

      if low < length:
        remaining = remaining_cap - (prefix_weights[low] - base_weight)
        bound += remaining * (items[low].value / items[low].weight)
      return bound, int_part


    remaining_cap = capacity
    total_value = 0
    taken = [0] * length

    for i in range(length):
        if items[i].weight > remaining_cap:
            continue

        bound_with, int_with = calculate_bound(remaining_cap,i)

        bound_without, int_without = (0, 0)
        if i+1 != length:
            bound_without, int_without = calculate_bound(remaining_cap, i + 1)

        if int_with >= int_without and bound_with >= bound_without:
            taken[i] = 1
            total_value += items[i].value
            remaining_cap -= items[i].weight

    return total_value, taken

In [5]:
def greedy_knapsack(items, capacity):
    items = sorted(enumerate(items), key=lambda x: x[1].value / x[1].weight, reverse=True)

    total_value = 0
    remaining_cap = capacity
    taken = [0] * len(items)

    for idx, item in items:
        if item.weight <= remaining_cap:
            taken[idx] = 1
            total_value += item.value
            remaining_cap -= item.weight

    return total_value, taken


In [64]:
from collections import namedtuple
from google.colab import files

# Define the Item named tuple
Item = namedtuple("Item", ['index', 'value', 'weight', 'cluster_id'])

# Upload and read file
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

with open(file_name, 'r') as file:
    input_data = file.read().strip()

# Remove blank lines
lines = [line.strip() for line in input_data.split('\n') if line.strip()]

try:
    item_count = int(lines[0])
    if len(lines) != item_count + 2:
        raise ValueError(f"Expected {item_count + 2} lines, found {len(lines)}")

    items = []
    for i in range(1, item_count + 1):
        id_str, profit_str, weight_str = lines[i].split()
        items.append(Item(index=int(id_str), value=int(profit_str), weight=int(weight_str), cluster_id=-1))

    capacity = int(lines[-1])
    print(f"Parsed {item_count} items with knapsack capacity {capacity}")
except Exception as e:
    print("Error parsing file:", str(e))


start_time = time.time()
# prefix heursitic
max2, taken2 = heuristic_knapsack(items, capacity)
end_time = time.time()
elapsed_time = end_time - start_time

print("prefix heuristic Solution:")
print("Max Value:", max2)
print("Items Taken:", taken2)
print(f"Algorithm took: {elapsed_time} seconds")

"""
start_time = time.time()
# Branch and Bound
max_value, taken = branch_and_bound_knapsack(items, capacity)
end_time = time.time()
elapsed_time = end_time - start_time

print("Branch and Bound Solution:")
print("Max Value:", max_value)
print("Items Taken:", taken)
print(f"Algorithm took: {elapsed_time} seconds")
"""

start_time = time.time()
# simple heursitic
max3, taken3 = greedy_knapsack(items, capacity)
end_time = time.time()
elapsed_time = end_time - start_time

print("greedy heuristic Solution:")
print("Max Value:", max3)
print("Items Taken:", taken3)
print(f"Algorithm took: {elapsed_time} seconds")

Saving eg 1.txt to eg 1 (7).txt
Parsed 1000 items with knapsack capacity 10000000000
prefix heuristic Solution:
Max Value: 9999571962
Items Taken: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

1013775