# Heapsort

A heapsort is an in-place sorting algorithm that treats an array like a binary tree and moves the largest values to the end of the heap until the full array is sorted.  

The main steps in a heapsort are:
1. Convert the array into a maxheap (a complete binary tree with decreasing values) 
2. Swap the top element with the last element in the array (putting it in it's correct final position)
3. Repeat with `arr[:len(arr)-1]` (all but the sorted elements)

## Visualization of a heapsort
![animation of a heap sort](https://upload.wikimedia.org/wikipedia/commons/4/4d/Heapsort-example.gif)

["Heapsort example"](https://commons.wikimedia.org/wiki/File:Heapsort-example.gif) by Swfung8. Used under [CC BY-SA 3.0](https://creativecommons.org/licenses/by-sa/3.0/deed.en).

## Problem statement

In the cell below, see if you can code a `heapsort` function that takes an array (or Python list) and performs a heapsort on it. You will have to complete the heapify

In [121]:
#my heap
class MaxHeap:
    
    #define:
    #1.CBT
    #2.Max parent
    
    def __init__(self,cbtsize = 5):
        
        self.cbt = [None for _ in range(cbtsize)]
        self.cbtsize = cbtsize
        self.nextidx = 0
    
    #因為一直能pop出最大的, 某方面也是sorting, 如果把 pop val 不要刪除, 一直放在後面,
    #就會變成 in-place sorting
    def remove(self):
        
        if (self.nextidx) == 0:
            return 
        
        #1.root 和最後一個交換, 直接刪掉最後一個
        remove_val = self.cbt[0]
        
        tmp = self.cbt[self.nextidx-1]
        
        #真的刪掉的話, 最後一個set 成 None 即可, 如果改 = remove_val, 就是把最大的一直放後面
        #最後就變成in-place sorting
        
        #self.cbt[self.nextidx-1] = None
        self.cbt[self.nextidx-1] = remove_val
        self.cbt[0] = tmp
        self.nextidx -=1
        
        
        #2.新的root down-heapfiy 下去
        cur_idx = 0

        #down-heapfiy
        while cur_idx < self.nextidx:
            
            #get idx
            lchild_idx = 2*cur_idx +1
            rchild_idx = 2*cur_idx +2
            #get value
            parent_val = self.cbt[cur_idx]
            lchild_val = None
            rchild_val = None
            
            if lchild_idx < self.nextidx:
                lchild_val = self.cbt[lchild_idx]
            if rchild_idx < self.nextidx:
                rchild_val = self.cbt[rchild_idx]            

            #parent 要跟左右child 比大小, 且往最大的child 換(走)
            # 三人比較分兩次
            
            #因為 左右都有可能遇到 val None, 所以有可能 max_val 就是 parent_val
            max_val = parent_val
            
            if lchild_val is not None:
                max_val = max(max_val,lchild_val)
                
            if rchild_val is not None:
                max_val = max(max_val,rchild_val)            
            
            #看看 min_val 是哪邊？
            if max_val == parent_val:
                return remove_val
            elif max_val == lchild_val:
                self.cbt[cur_idx] = lchild_val
                self.cbt[lchild_idx] = parent_val
                cur_idx = lchild_idx
            else:
                self.cbt[cur_idx] = rchild_val
                self.cbt[rchild_idx] = parent_val
                cur_idx = rchild_idx          
            
            
        
        return remove_val
        
    def insert(self,data):
        
        #step1:insert 永遠放最後
        
        self.cbt[self.nextidx] = data
        
        if self.nextidx == 0:
            self.nextidx+=1
            return
        
        #step2:up_heapfiy
        #這次是 MaxHeap, parent 要比較大
        cur_idx = self.nextidx

        
        while cur_idx > 0:
            
            parent_idx = (cur_idx-1)//2
            
            if self.cbt[parent_idx] < self.cbt[cur_idx]:
                tmp = self.cbt[parent_idx]
                self.cbt[parent_idx] = self.cbt[cur_idx]
                self.cbt[cur_idx] = tmp
            
            
            cur_idx = parent_idx
        
        
        self.nextidx +=1
        
        
        #if size full, enlarge the size
        
        if self.nextidx == self.cbtsize:
            copytmp = self.cbt
            self.cbtsize = 2*self.cbtsize
            self.cbt = [None for _ in range(self.cbtsize)]
            
            for idx in range(len(copytmp)):
                self.cbt[idx] = copytmp[idx]
                
            
        
        
        
        
        
        
maxheap = MaxHeap()
arr = [3, 7, 4, 6, 1, 0, 9, 8, 9, 4, 3, 5]
print('insert test')

for idx in range(len(arr)):
    maxheap.insert(arr[idx])
    print(maxheap.cbt)

    
print('remove test')
print(maxheap.remove())
print(maxheap.cbt)
print(maxheap.remove())
print(maxheap.cbt)
print(maxheap.remove())
print(maxheap.cbt)
print(maxheap.remove())
print(maxheap.cbt)
print(maxheap.remove())
print(maxheap.cbt)
print('insert test')
maxheap.insert(2)
print(maxheap.cbt)

insert test
[3, None, None, None, None]
[7, 3, None, None, None]
[7, 3, 4, None, None]
[7, 6, 4, 3, None]
[7, 6, 4, 3, 1, None, None, None, None, None]
[7, 6, 4, 3, 1, 0, None, None, None, None]
[9, 6, 7, 3, 1, 0, 4, None, None, None]
[9, 8, 7, 6, 1, 0, 4, 3, None, None]
[9, 9, 7, 8, 1, 0, 4, 3, 6, None]
[9, 9, 7, 8, 4, 0, 4, 3, 6, 1, None, None, None, None, None, None, None, None, None, None]
[9, 9, 7, 8, 4, 0, 4, 3, 6, 1, 3, None, None, None, None, None, None, None, None, None]
[9, 9, 7, 8, 4, 5, 4, 3, 6, 1, 3, 0, None, None, None, None, None, None, None, None]
remove test
9
[9, 8, 7, 6, 4, 5, 4, 3, 0, 1, 3, 9, None, None, None, None, None, None, None, None]
9
[8, 6, 7, 3, 4, 5, 4, 3, 0, 1, 9, 9, None, None, None, None, None, None, None, None]
8
[7, 6, 5, 3, 4, 1, 4, 3, 0, 8, 9, 9, None, None, None, None, None, None, None, None]
7
[6, 4, 5, 3, 0, 1, 4, 3, 7, 8, 9, 9, None, None, None, None, None, None, None, None]
6
[5, 4, 4, 3, 0, 1, 3, 6, 7, 8, 9, 9, None, None, None, None, None, N

In [122]:
'''
The main steps in a heapsort are:

1.Convert the array into a maxheap (a complete binary tree with decreasing values)
2.Swap the top element with the last element in the array (putting it in it's correct final position)
3.Repeat with arr[:len(arr)-1] (all but the sorted elements)

'''


def heapsort(arr):
    #heapify(arr, len(arr), 0)

    maxheap = MaxHeap()
    #1
    heapify(arr,maxheap)

    
    #2 keep pop out all maxhep
    #will get the decending result
    for _ in range(maxheap.nextidx):
        maxheap.remove()

    
    #rewrite for reference result
    idx = 0
    for item in maxheap.cbt:
        if item == None:
            break
        arr[idx]=item
        idx +=1
    
    return arr
    
    
def heapify(arr,maxheap):
    """
    :param: arr - array to heapify
    n -- number of elements in the array
    i -- index of the current node
    TODO: Converts an array (in place) into a maxheap, a complete binary tree with the largest values at the top
    """

        
    for idx in range(len(arr)):
        maxheap.insert(arr[idx])
        print(maxheap.cbt)
    
    
    
arr = [3, 7, 4, 6, 1, 0, 9, 8, 9, 4, 3, 5]
heapsort(arr)

[3, None, None, None, None]
[7, 3, None, None, None]
[7, 3, 4, None, None]
[7, 6, 4, 3, None]
[7, 6, 4, 3, 1, None, None, None, None, None]
[7, 6, 4, 3, 1, 0, None, None, None, None]
[9, 6, 7, 3, 1, 0, 4, None, None, None]
[9, 8, 7, 6, 1, 0, 4, 3, None, None]
[9, 9, 7, 8, 1, 0, 4, 3, 6, None]
[9, 9, 7, 8, 4, 0, 4, 3, 6, 1, None, None, None, None, None, None, None, None, None, None]
[9, 9, 7, 8, 4, 0, 4, 3, 6, 1, 3, None, None, None, None, None, None, None, None, None]
[9, 9, 7, 8, 4, 5, 4, 3, 6, 1, 3, 0, None, None, None, None, None, None, None, None]


[0, 1, 3, 3, 4, 4, 5, 6, 7, 8, 9, 9]

<span class="graffiti-highlight graffiti-id_1h50lwk-id_kuae7he"><i></i><button>Hide Solution</button></span>

In [118]:
# Solution
# down-heapify 在 remove 有用
# 基本上就是 拿一個已經有值的array, 持續用 down-heapify

# 這個 function : 給我一個要調整的 index i, 還有要參考的arrary 大小 n, 我幫你把這個 idx i 調到對的 maxheap 位置
# 假設 arry size = 10, 我想把第 5 個 element 做 maxheapfiy 也就是放到他該放的地方, 而這整個array 他都可以rewrite
# 那就是 heapify(arr,10,5), 如果他在裡面第一次下降後, 還沒到對的位置, 最後的 recursive call 會讓他持續下降, 
# 直到位置對

def heapify(arr, n, i):
    # Using i as the index of the current node, find the 2 child nodes (if the array were a binary tree)
    # and find the largest value.   If one of the children is larger swap the values and recurse into that subree
    
    # consider current index as largest
    largest_index = i 
    left_node = 2 * i + 1     
    right_node = 2 * i + 2     
  
    # compare with left child
    if left_node < n and arr[i] < arr[left_node]: 
        largest_index = left_node
  
    # compare with right child
    if right_node < n and arr[largest_index] < arr[right_node]: 
        largest_index = right_node
  
    # if either of left / right child is the largest node
    # 只要最大的改過, 就拿原本跟他交換
    if largest_index != i: 
        arr[i], arr[largest_index] = arr[largest_index], arr[i] 
        #那因為換過之後, 你不確定這個最大的 index value 是不是還要繼續往下
        #因此繼續call 下去
        heapify(arr, n, largest_index) 
        
def heapsort(arr):
    # 1:
    # First convert the array into a maxheap by calling heapify on each node, starting from the end   
    # now that you have a maxheap, 
    
    #把一個原本亂排的  array, 每個都呼叫一次 heapify, 自然就建出一個 有著maxheap 含意的 array
    #heapify 含義看上面
    
    n = len(arr) 
    print('n:',n)
    # Build a maxheap. 
    # i 從後面給起 ex: size = 5, i = 5 -> 0
    for i in range(n, -1, -1): #i 從 n 起沒關係, heapify 裡面是寫  < n
        print(i)
        heapify(arr, n, i) 

    
    #2: 其實就是remove 
    
    # you can swap the first element (largest) to the end (final position)
    # and make the array minus the last element into maxheap again.   
    # 因為root 要再down-heapfiy 一次, 但要看的範圍可以慢慢縮小, 因為有些已經放好了
    #Continue to do this until the whole
    # array is sorted
    
    # 接下來, 因為你知道root 一定是最大的, 所以如果把root換到尾, 此時root是個奇怪的數, 沒關係
    # 再呼叫一次 heapify (arr,,root=0) 去調整, 他就會又讓 arr[0] 是最大
    # 持續一直做, 就會讓尾巴到頭 是 大到小的排列
    
    # 至於第二個參數為什麼放 i, 是因為第一次尾巴已經確定是最大, 所以當你要第一次要調整時, 不用再rewrite array 的最後一個元素
    # 也就是說, 這個array 最多只看到 最後一個index 的前一個, 也就是 len(arr)-1-1 =參數二, 
    # 但因為 heapify 裡面是寫成 < 參數二, 所以輸入 len(arr)-1 即可, 也就是 n-1
    
    # One by one extract elements 
    for i in range(n-1, 0, -1): 
        arr[i], arr[0] = arr[0], arr[i] # swap 
        heapify(arr, i, 0) 

In [123]:
def test_function(test_case):
    heapsort(test_case[0])
    if test_case[0] == test_case[1]:
        print("Pass")
    else:
        print("False")

In [124]:
arr = [3, 7, 4, 6, 1, 0, 9, 8, 9, 4, 3, 5]
solution = [0, 1, 3, 3, 4, 4, 5, 6, 7, 8, 9, 9]

test_case = [arr, solution]

test_function(test_case)


[3, None, None, None, None]
[7, 3, None, None, None]
[7, 3, 4, None, None]
[7, 6, 4, 3, None]
[7, 6, 4, 3, 1, None, None, None, None, None]
[7, 6, 4, 3, 1, 0, None, None, None, None]
[9, 6, 7, 3, 1, 0, 4, None, None, None]
[9, 8, 7, 6, 1, 0, 4, 3, None, None]
[9, 9, 7, 8, 1, 0, 4, 3, 6, None]
[9, 9, 7, 8, 4, 0, 4, 3, 6, 1, None, None, None, None, None, None, None, None, None, None]
[9, 9, 7, 8, 4, 0, 4, 3, 6, 1, 3, None, None, None, None, None, None, None, None, None]
[9, 9, 7, 8, 4, 5, 4, 3, 6, 1, 3, 0, None, None, None, None, None, None, None, None]
Pass


In [125]:
arr = [5, 5, 5, 3, 3, 3, 4, 4, 4, 4]
solution = [3, 3, 3, 4, 4, 4, 4, 5, 5, 5]
test_case = [arr, solution]
test_function(test_case)


[5, None, None, None, None]
[5, 5, None, None, None]
[5, 5, 5, None, None]
[5, 5, 5, 3, None]
[5, 5, 5, 3, 3, None, None, None, None, None]
[5, 5, 5, 3, 3, 3, None, None, None, None]
[5, 5, 5, 3, 3, 3, 4, None, None, None]
[5, 5, 5, 4, 3, 3, 4, 3, None, None]
[5, 5, 5, 4, 3, 3, 4, 3, 4, None]
[5, 5, 5, 4, 4, 3, 4, 3, 4, 3, None, None, None, None, None, None, None, None, None, None]
Pass


In [126]:
arr = [99]
solution = [99]
test_case = [arr, solution]
test_function(test_case)


[99, None, None, None, None]
Pass


In [127]:
arr = [0, 1, 2, 5, 12, 21, 0]
solution = [0, 0, 1, 2, 5, 12, 21]
test_case = [arr, solution]
test_function(test_case)


[0, None, None, None, None]
[1, 0, None, None, None]
[2, 0, 1, None, None]
[5, 2, 1, 0, None]
[12, 5, 1, 0, 2, None, None, None, None, None]
[21, 5, 12, 0, 2, 1, None, None, None, None]
[21, 5, 12, 0, 2, 1, 0, None, None, None]
Pass
