# Lab 2: Benchmarking Binary Heaps

> Date: 09/27/2024
> Author: Leigh Goetsch
> Class: CSC3310
>
> ### Learning Outcomes
>
> - Implement a binary heap in Python
> - Benchmark the performance of a binary heap
> - Analyze the performance of a binary heap

## Introduction

This lab focuses on benchmarking the performance of a binary heap. The goal is to compare the performance of operations from the heapq library to gain a better understanding of the efficiency of a binary heap.


## Design Benchmarks

Plan out how you intend to benchmark the following operations:


### heapify

Create a heap from a collection. Note that this is not the same procedure as min-heapify but rather the build min heap procedure.


In [None]:
def min_heapify(input_list, i, n):
    """
    Function takes a list and an index and returns a min heapified list.
    """
    left = 2*i
    right = 2*i + 1
    smallest = i
    if left <= n and input_list[left] < input_list[i]:
        smallest = left
    else:
        smallest = i
    if right <= n and input_list[right] < input_list[smallest]:
        smallest = right
    if smallest != i:
        input_list[i], input_list[smallest] = input_list[smallest], input_list[i]
        min_heapify(input_list, smallest, n)


def heapify(input_list):
    """
    Function takes a list and returns a heapified list.
    """
    n = len(input_list)
    for i in range(n//2, 0, -1):
        min_heapify(input_list, i, n)
    return input_list

### heappush

Push an element to the heap and restore the heap property if necessary.


In [None]:
def heappush(heap, item):
    """
    Function takes a heap and an item and returns a heap with the item added.
    """
    # add the item to the end of the heap
    heap.append(item)

    # restore the heap property
    i = len(heap) - 1
    while i > 1 and heap[i//2] < heap[i]:
        heap[i], heap[i//2] = heap[i//2], heap[i]
        i = i//2

    return heap

### create_heap_one_by_one

Implement a function that creates a heap by adding each element one by one, i.e. invoke heappush n times for n elements.


In [None]:
def create_heap_one_by_one(input_list):
    """
    Function takes a list and returns a heapified list.
    """
    heap = []
    for item in input_list:
        heap = heappush(heap, item)
    return heap

### heappop

pop and return the smallest item from the heap, maintaining the heap invariant.


In [None]:
def heappop(heap):
    """
    Function takes a heap and returns the smallest element.
    """
    n = len(heap) - 1
    if n < 1:
        raise ValueError('heap underflow')
    min = heap[1]
    heap[1] = heap[n]
    min_heapify(heap, 1, n-1)
    return min

### heapsort

sort a list of numbers using the heapsort algorithm.


In [None]:
def heap_sort(input_list):
    """
    Function takes a list and returns a sorted list.
    """

### Benchmark Functions


In [None]:
import time


def benchmark_heapify(input_list):
    input_list = input_list.copy()
    start_time = time.perf_counter()
    heapify(input_list)
    end_time = time.perf_counter()
    elapsed = end_time - start_time
    return elapsed


def benchmark_heappush(heap, item):
    heap = heap.copy()
    start_time = time.perf_counter()
    heappush(heap, item)
    end_time = time.perf_counter()
    elapsed = end_time - start_time
    return elapsed


def benchmark_create_heap(input_list):
    input_list = input_list.copy()
    start_time = time.perf_counter()
    create_heap_one_by_one(input_list)
    end_time = time.perf_counter()
    elapsed = end_time - start_time
    return elapsed


def benchmark_heappop(heap):
    heap = heap.copy()
    start_time = time.perf_counter()
    heappop(heap)
    end_time = time.perf_counter()
    elapsed = end_time - start_time
    return elapsed


def benchmark_heap_sort(input_list):
    input_list = input_list.copy()
    start_time = time.perf_counter()
    heap_sort(input_list)
    end_time = time.perf_counter()
    elapsed = end_time - start_time
    return elapsed

## Perform Benchmarks

Implement and run the benchmarks


In [None]:
# set up benchmarking lists
import random

heap_sizes = [100, 500, 1000, 5000, 10000, 25000]
benchmark_lists = {
    "best": [list(range(n)) for n in heap_sizes],
    "worst": [list(range(n, 0, -1)) for n in heap_sizes],
    "average": [random.sample(range(n), n) for n in heap_sizes]
}


# run benchmarks
for case, lst in benchmark_lists.items():
    for algo in benchmark_results.keys():
        print(f"Running {algo.__name__} on {case} case")
        benchmark_results[algo][case] = [
            benchmark(algo, lst) for lst in benchmark_lists[case]]

## Validating Formal Run Times

Using functions from lab 1 to estimate the big-o complexity for each of the heapq operations.


In [None]:
import numpy as np
from scipy.stats import linregress

# fit a linear regression model to the log of the list
# sizes (s) and run times (r) to estimate the slope (m)
# log r = m log s + b


def estimate_slope(list_sizes, run_times):
    '''
    Function takes a list of list sizes and a list of run times
    and returns the slope of the linear regression model.
    '''
    m, b, _, _, _ = linregress(np.log(list_sizes), np.log(run_times))
    return m


def get_complexity(m):
    '''
    Function takes the slope of the linear regression model
    and returns the complexity of the algorithm.
    '''
    if m == 0:
        return "Constant"
    elif m < 1:
        return "Sub-linear (e.g., log n)"
    elif m == 1:
        return "Linear"
    elif m > 1 and m < 2:
        return "Between linear and quadratic (e.g., n log n)"
    elif m == 2:
        return "Quadratic (e.g., n^2)"
    elif m > 2 and m < 3:
        return "Between quadratic and cubic (e.g., n^2 log n)"
    elif m == 3:
        return "Cubic (e.g., n^3)"
    else:
        return "Out of Scope"

In [None]:
# Validate the the formal run time complexity of the algorithms
for operation, results in benchmark_results.items():
    print(f"Algorithm: {operation.__name__}")
    for case, run_times in results.items():
        m = estimate_slope(list_sizes, run_times)
        complexity = get_complexity(m)
        print(f"Case: {case}")
        print(f"Estimated Slope: {m}")
        print(f"Complexity: {complexity}")
        print()

## Reflection Questions
