## Priority Queues and Disjoint Sets

### Binary Maximum Heap

In [10]:
class binary_max_heap():
    def __init__(self, max_size, given_heap):
        self.size = len(given_heap)
        self.max_size = max_size
        self.max_heap = given_heap + ['-'] * (self.max_size - self.size)
        
    def parent(self, i):
        return (i - 1) // 2
    
    def left_child(self, i):
        return 2 * i + 1
    
    def right_child(self, i):
        return 2 * (i + 1)
    
    def sift_up(self, i):
        while i > 0 and self.max_heap[i] > self.max_heap[self.parent(i)]:
            self.max_heap[i], self.max_heap[self.parent(i)] = self.max_heap[self.parent(i)], self.max_heap[i]
            i = self.parent(i)
    
    def sift_down(self, i):
        max_node = i
        left = self.left_child(i)
        if left <= self.size - 1 and self.max_heap[left] > self.max_heap[max_node]:
            max_node = left
        right = self.right_child(i)
        if right <= self.size - 1 and self.max_heap[right] > self.max_heap[max_node]:
            max_node = right
        if i != max_node:
            self.max_heap[i], self.max_heap[max_node] = self.max_heap[max_node], self.max_heap[i]
            self.sift_down(max_node)
            
    def get_max(self):
        return self.max_heap[0]
    
    def insert(self, p):
        if self.size == self.max_size:
            return 'ERROR'
        self.size += 1
        self.max_heap[self.size - 1] = p
        self.sift_up(self.size - 1)
        
    def extract_max(self):
        result = self.max_heap[0]
        self.max_heap[0], self.max_heap[self.size - 1] = self.max_heap[self.size - 1], self.max_heap[0]
        self.max_heap.pop()
        self.size -= 1
        self.sift_down(0)
        return result
    
    def remove(self, i):
        if i >= self.size:
            return 'ERROR'
        self.max_heap[i] = float('inf')
        self.sift_up(i)
        self.extract_max()
    
    def change_priority(self, i, p):
        old_priority = self.max_heap[i]
        self.max_heap[i] = p
        if p > old_priority:
            self.sift_up(i)
        else:
            self.sift_down(i)
    
    def print_heap(self):
        return self.max_heap[:self.size]

    
if __name__ == '__main__':
    max_size = 13
    given_heap = [42, 29, 18, 14, 7, 18, 12, 11]
    assert len(given_heap) <= max_size
    heap = binary_max_heap(max_size, given_heap)
    print('Given heap: ', heap.print_heap())
    heap.insert(30)
    print('30 is inserted into heap: ', heap.print_heap())
    print('Maximum value extracted: ', heap.extract_max())
    print('Maximum value is extracted from heap: ', heap.print_heap())
    print('Maximum value extracted: ', heap.extract_max())
    print('Maximum value is extracted from heap: ', heap.print_heap())
    heap.remove(4)
    print('5th element is removed from heap: ', heap.print_heap())
    heap.change_priority(2, 50)
    print('3nd element is changed to 50 inside heap: ', heap.print_heap())
    print('Maximum value not extracted: ', heap.get_max())

Given heap:  [42, 29, 18, 14, 7, 18, 12, 11]
30 is inserted into heap:  [42, 30, 18, 29, 7, 18, 12, 11, 14]
Maximum value extracted:  42
Maximum value is extracted from heap:  [30, 29, 18, 14, 7, 18, 12, 11]
Maximum value extracted:  30
Maximum value is extracted from heap:  [29, 14, 18, 11, 7, 18, 12]
5th element is removed from heap:  [29, 14, 18, 11, 12, 18]
3nd element is changed to 50 inside heap:  [50, 14, 29, 11, 12, 18]
Maximum value not extracted:  50


### Binary Minimum Heap

In [21]:
class binary_min_heap():
    def __init__(self, max_size, given_heap):
        self.size = len(given_heap)
        self.max_size = max_size
        self.min_heap = given_heap + ['-'] * (self.max_size - self.size)
        
    def parent(self, i):
        return (i - 1) // 2
    
    def left_child(self, i):
        return 2 * i + 1
    
    def right_child(self, i):
        return 2 * (i + 1)
    
    def sift_up(self, i):
        while i > 0 and self.min_heap[i] < self.min_heap[self.parent(i)]:
            self.min_heap[i], self.min_heap[self.parent(i)] = self.min_heap[self.parent(i)], self.min_heap[i]
            i = self.parent(i)
    
    def sift_down(self, i):
        min_node = i
        right = self.right_child(i)
        if right <= self.size - 1 and self.min_heap[right] < self.min_heap[min_node]:
            min_node = right
        left = self.left_child(i)
        if left <= self.size - 1 and self.min_heap[left] < self.min_heap[min_node]:
            min_node = left
        if i != min_node:
            self.min_heap[i], self.min_heap[min_node] = self.min_heap[min_node], self.min_heap[i]
            self.sift_down(min_node)
            
    def get_min(self):
        return self.min_heap[0]
    
    def insert(self, p):
        if self.size == self.max_size:
            return 'ERROR'
        self.size += 1
        self.min_heap[self.size - 1] = p
        self.sift_up(self.size - 1)
        
    def extract_min(self):
        result = self.min_heap[0]
        self.min_heap[0], self.min_heap[self.size - 1] = self.min_heap[self.size - 1], self.min_heap[0]
        self.min_heap.pop()
        self.size -= 1
        self.sift_down(0)
        return result
    
    def remove(self, i):
        if i >= self.size:
            return 'ERROR'
        self.min_heap[i] = - float('inf')
        self.sift_up(i)
        self.extract_min()
    
    def change_priority(self, i, p):
        old_priority = self.min_heap[i]
        self.min_heap[i] = p
        if p < old_priority:
            self.sift_up(i)
        else:
            self.sift_down(i)
    
    def print_heap(self):
        return self.min_heap[:self.size]

    
if __name__ == '__main__':
    max_size = 13
    given_heap = [7, 9, 13, 16, 15, 18, 14, 40]
    assert len(given_heap) <= max_size
    heap = binary_min_heap(max_size, given_heap)
    print('Given heap: ', heap.print_heap())
    heap.insert(8)
    print('8 is inserted into heap: ', heap.print_heap())
    print('Minimum value extracted: ', heap.extract_min())
    print('Minimum value is extracted from heap: ', heap.print_heap())
    print('Minimum value extracted: ', heap.extract_min())
    print('Minimum value is extracted from heap: ', heap.print_heap())
    heap.remove(4)
    print('5th element is removed from heap: ', heap.print_heap())
    heap.change_priority(2, 1)
    print('3nd element is changed to 50 inside heap: ', heap.print_heap())
    print('Minimum value not extracted: ', heap.get_min())

Given heap:  [7, 9, 13, 16, 15, 18, 14, 40]
8 is inserted into heap:  [7, 8, 13, 9, 15, 18, 14, 40, 16]
Minimum value extracted:  7
Minimum value is extracted from heap:  [8, 9, 13, 16, 15, 18, 14, 40]
Minimum value extracted:  8
Minimum value is extracted from heap:  [9, 15, 13, 16, 40, 18, 14]
5th element is removed from heap:  [9, 14, 13, 16, 15, 18]
3nd element is changed to 50 inside heap:  [1, 14, 9, 16, 15, 18]
Minimum value not extracted:  1


    For Heap implementation, one of the best method is to use "heapq" module:

    import heapq
    heapq.heapify(my_heap)
    heapq.heappush(my_heap, value)
    heapq.heappop(my_heap)
    heapq.heappushpop(my_heap, value)
    heapq.heapreplace(my_heap, value)
    heapq.nlargest(1, my_heap)[0]
    
    "heaqp" module itself represents "Binary Minimum Heap"

### Binary Maximum Heap (Implementation with Heapq)

In [42]:
import heapq

class MaxHeap():
    def __init__(self, heap):
        self.heap = list(map(lambda x: -1 * x, heap))
        
    def heapify_max(self):
        heapq.heapify(self.heap)

    def get_max(self):
        return -1 * self.heap[0]
    
    def heappush_max(self, value):
        value *= -1
        heapq.heappush(self.heap, value)

    def heappop_max(self):
        result = -1 * self.heap[0]
        heapq.heappop(self.heap)
        return result
    
    def print_heap(self):
        print(list(map(lambda x: -1 * x, self.heap)))

if __name__ == "__main__":  
    my_list = [8, 9, 13, 2, 5, 5, 3, 7]
    heap = MaxHeap(my_list)
    heap.heapify_max()
    print('Given list is converted to heap: ', end = ' ')
    heap.print_heap()
    print('Maximum value extracted:', heap.heappop_max())
    print('Maximum value is extracted from heap: ', end = ' ')
    heap.print_heap()
    heap.heappush_max(10)
    print('10 is inserted into heap: ', end = ' ')
    heap.print_heap()
    print('Maximum value not extracted:', heap.get_max())

Given list is converted to heap:  [13, 9, 8, 7, 5, 5, 3, 2]
Maximum value extracted: 13
Maximum value is extracted from heap:  [9, 7, 8, 2, 5, 5, 3]
10 is inserted into heap:  [10, 9, 8, 7, 5, 5, 3, 2]
Maximum value not extracted: 10


### Heap Sort

In [3]:
def left_child(i):
    return 2 * i + 1

def right_child(i):
    return 2 * (i + 1)

def sift_down(A, i, n):
    max_node = i
    left = left_child(i)
    if left <= n and A[left] > A[max_node]:
        max_node = left
    right = right_child(i)
    if right <= n and A[right] > A[max_node]:
        max_node = right
    if i != max_node:
        A[i], A[max_node] = A[max_node], A[i]
        sift_down(A, max_node, n)

def build_heap(A):
    n = len(A) - 1
    for i in range(n // 2, -1, -1):
        sift_down(A, i, n)
    return A

def heap_sort(A):
    n = len(A) - 1
    build_heap(A)
    for _ in range(len(A) - 1):
        if A[0] > A[n]:
            A[0], A[n] = A[n], A[0]
        n -= 1
        sift_down(A, 0, n)
    return A

In [4]:
heap_sort([2, 2, 19, 1, 5, 11, 5, 14, 7, 16, 2, 13, 7, 15, 6, 1, 4, 9, 6, 11])

[1, 1, 2, 2, 2, 4, 5, 5, 6, 6, 7, 7, 9, 11, 11, 13, 14, 15, 16, 19]

### Convert Array Into Heap

**Task:** The first step of the HeapSort algorithm is to create a heap from the array you want to sort. By the
way, did you know that algorithms based on Heaps are widely used for external sort, when you need
to sort huge files that don’t fit into memory of a computer?

Your task is to implement this first step and convert a given array of integers into a heap. You will
do that by applying a certain number of swaps to the array. Swap is an operation which exchanges
elements $a_i$ and $a_j$ of the array $a$ for some $i$ and $j$. You will need to convert the array into a heap using
only $O(n)$ swaps, as was described in the lectures. Note that you will need to use a min-heap instead
of a max-heap in this problem.

**Input Format:** The first line of the input contains single integer $n$. The next line contains $n$ space-separated
integers $a_i$.

**Constraints:** $1 \leq n \leq 100000; 0 \leq i, j \leq n - 1; 0 \leq a_0, a_1, \dotsc, a_{n−1} \leq 10^9$. All 𝑎𝑖 are distinct.

**Output Format:** The first line of the output should contain single integer $m$ — the total number of swaps.
$m$ must satisfy conditions $0 \leq m \leq 4n$. The next $m$ lines should contain the swap operations used
to convert the array $a$ into a heap. Each swap is described by a pair of integers $i, j$ — the 0-based
indices of the elements to be swapped. After applying all the swaps in the specified order the array
must become a heap, that is, for each $i$ where $0 \leq i \leq n − 1$ the following conditions must be true:

1) If $2i + 1 \leq n − 1$, then $a_i < a_{2i+1}$

2) If $2i + 2 \leq n − 1$, then $a_i < a_{2i+2}$.

Note that all the elements of the input array are distinct. Note that any sequence of swaps that has
length at most $4n$ and after which your initial array becomes a correct heap will be graded as correct.

In [4]:
def build_heap_naive(data):
    swaps = []
    for i in range(len(data)):
        for j in range(i + 1, len(data)):
            if data[i] > data[j]:
                swaps.append((i, j))
                data[i], data[j] = data[j], data[i]
    return swaps

def left_child(i):
    return 2 * i + 1

def right_child(i):
    return 2 * (i + 1)

def sift_down(A, i, n, count, swaps):
    min_node = i
    right = right_child(i)
    if right <= n and A[right] < A[min_node]:
        min_node = right
    left = left_child(i)
    if left <= n and A[left] < A[min_node]:
        min_node = left
    if i != min_node:
        count += 1
        swaps.append((i, min_node))
        A[i], A[min_node] = A[min_node], A[i]
        sift_down(A, min_node, n, count, swaps)

def build_heap(A):
    n = len(A) - 1
    count = 0
    swaps = []
    for i in range(n // 2, -1, -1):
        sift_down(A, i, n, count, swaps)
    return swaps

def main():
    n = int(input())
    data = list(map(int, input().split()))
    assert len(data) == n
    swaps = build_heap(data)
    print(len(swaps))
    for i, j in swaps:
        print(i, j)

if __name__ == "__main__":
    main()

10
23 12 15 2 5 32 1 7 3 10
5
2 6
1 3
3 8
0 2
2 6


### Parallel Processing

**Task:** You have a program which is parallelized and uses $n$ independent threads to process the given list of $m$
jobs. Threads take jobs in the order they are given in the input. If there is a free thread, it immediately
takes the next job from the list. If a thread has started processing a job, it doesn’t interrupt or stop
until it finishes processing the job. If several threads try to take jobs from the list simultaneously, the
thread with smaller index takes the job. For each job you know exactly how long will it take any thread
to process this job, and this time is the same for all the threads. You need to determine for each job
which thread will process it and when will it start processing.

**Input Format:** The first line of the input contains integers $n$ and $m$.
The second line contains $m$ integers $t_i$ — the times in seconds it takes any thread to process $i$-th job.
The times are given in the same order as they are in the list from which threads take jobs.
Threads are indexed starting from $0$.

**Constraints:** $n \leq 10^5; 1 \leq m \leq 10^5; 0 \leq t_i \leq 10^9$.

**Output Format:** Output exactly $m$ lines. $i$-th line (0-based index is used) should contain two spaceseparated
integers — the 0-based index of the thread which will process the $i$-th job and the time
in seconds when it will start processing that job.

In [1]:
from collections import namedtuple
import heapq

Process = namedtuple("Process", ["thread", "started_at"])

def parallel_process_naive(thread, process_times):
    result = []
    starting_times = [0] * thread
    for time in process_times:
        next_process = min(range(thread), key = lambda x: starting_times[x])
        result.append(Process(next_process, starting_times[next_process]))
        starting_times[next_process] += time
    return result

def parallel_process(thread, process_times):
    result = []
    my_heap = []
    heapq.heapify(my_heap)
    for i in range(len(process_times)):
        if i < thread:
            heapq.heappush(my_heap, (process_times[i], i))
            result.append(Process(i, 0))
        else:
            minimum = heapq.heappop(my_heap)
            heapq.heappush(my_heap, (minimum[0] + process_times[i], minimum[1]))
            result.append(Process(minimum[1], minimum[0]))
    return result

def main():
    thread, amount = map(int, input().split())
    process_times = list(map(int, input().split()))
    assert amount == len(process_times)

    parallel_processes = parallel_process(thread, process_times)

    for time in parallel_processes:
        print(time.thread, time.started_at)

if __name__ == "__main__":
    main()

4 10
5 3 1 2 6 4 7 1 2 9
0 0
1 0
2 0
3 0
2 1
3 2
1 3
0 5
0 6
3 6


### Merging Tables

**Task:** There are $n$ tables stored in some database. The tables are numbered from $1$ to $n$. All tables share
the same set of columns. Each table contains either several rows with real data or a symbolic link to
another table. Initially, all tables contain data, and $i$-th table has $r_i$ rows. You need to perform $m$ of
the following operations:

1 - Consider table number $destination_i$. Traverse the path of symbolic links to get to the data. That is, while $destination_i$ contains a symbolic link instead of real data, do $destination_i ← symlink(destination_i)$

2 - Consider the table number $source_i$ and traverse the path of symbolic links from it in the same manner as for $destination_i$.

3 - Now, $destination_i$ and $source_i$ are the numbers of two tables with real data. If $destination_i \neq source_i$, copy all the rows from table $source_i$ to table $destination_i$, then clear the table $source_i$ and instead of real data put a symbolic link to $destination_i$ into it.

4 - Print the maximum size among all $n$ tables (recall that size is the number of rows in the table).

If the table contains only a symbolic link, its size is considered to be $0$.

**Input Format:** The first line of the input contains two integers $n$ and $m$ — the number of tables in the
database and the number of merge queries to perform, respectively. The second line of the input contains $n$ integers $r_i$ — the number of rows in the $i$-th table. Then follow $m$ lines describing merge queries. Each of them contains two integers $destination_i$ and $source_i$ — the numbers of the tables to merge.

**Constraints:** $1 \leq n, m \leq 100000; 0 \leq r_i \leq 10000; 1 \leq destination_i, source_i \leq n$.

**Output Format:** For each query print a line containing a single integer — the maximum of the sizes of all
tables (in terms of the number of rows) after the corresponding operation.

In [4]:
class Database:
    def __init__(self, row_counts):
        self.row_counts = row_counts
        self.max_row_count = max(row_counts)
        n_tables = len(row_counts)
        self.ranks = [1] * n_tables
        self.parents = list(range(n_tables))

    def merge(self, src, dst):
        src_parent = self.get_parent(src)
        dst_parent = self.get_parent(dst)

        if src_parent == dst_parent:
            return False

        # merge two components
        # use union by rank heuristic
        # update max_row_count with the new maximum table size
        return True

    def get_parent(self, table):
        # find parent and compress path
        return self.parents[table]


def main():
    n_tables, n_queries = map(int, input().split())
    counts = list(map(int, input().split()))
    assert len(counts) == n_tables
    db = Database(counts)
    for i in range(n_queries):
        dst, src = map(int, input().split())
        db.merge(dst - 1, src - 1)
        print(db.max_row_count)

if __name__ == "__main__":
    main()