In [1]:
import time
import numpy as np
import math
from copy import deepcopy

The file contains all of the integers between 1 and 10,000 (inclusive, with no repeats) in unsorted order. The integer in the ith row of the file gives you the ith entry of an input array.

Your task is to compute the total number of comparisons used to sort the given input file by QuickSort. As you know, the number of comparisons depends on which elements are chosen as pivots, so we'll ask you to explore three different pivoting rules.

You should not count comparisons one-by-one. Rather, when there is a recursive call on a subarray of length m, you should simply add m−1 to your running total of comparisons. (This is because the pivot element is compared to each of the other m−1 elements in the subarray in this recursive call.)

WARNING: The Partition subroutine can be implemented in several different ways, and different implementations can give you differing numbers of comparisons. For this problem, you should implement the Partition subroutine exactly as it is described in the video lectures (otherwise you might get the wrong answer).

DIRECTIONS FOR THIS PROBLEM:

1. For the first part of the programming assignment, you should always use the first element of the array as the pivot element.

2. Compute the number of comparisons (as in Problem 1), always using the final element of the given array as the pivot element. Again, be sure to implement the Partition subroutine exactly as it is described in the video lectures.
Recall from the lectures that, just before the main Partition subroutine, you should exchange the pivot element (i.e., the last element) with the first element.

3. Compute the number of comparisons (as in Problem 1), using the "median-of-three" pivot rule. [The primary motivation behind this rule is to do a little bit of extra work to get much better performance on input arrays that are nearly sorted or reverse sorted.] In more detail, you should choose the pivot as follows. Consider the first, middle, and final elements of the given array. (If the array has odd length it should be clear what the "middle" element is; for an array with even length 2k, use the kth element as the "middle" element. So for the array 4 5 6 7, the "middle" element is the second one ---- 5 and not 6!) Identify which of these three elements is the median (i.e., the one whose value is in between the other two), and use this as your pivot. As discussed in the first and second parts of this programming assignment, be sure to implement Partition exactly as described in the video lectures (including exchanging the pivot element with the first element just before the main Partition subroutine).
EXAMPLE: For the input array 8 2 4 5 7 1 you would consider the first (8), middle (4), and last (1) elements; since 4 is the median of the set {1,4,8}, you would use 4 as your pivot element.
SUBTLE POINT: A careful analysis would keep track of the comparisons made in identifying the median of the three candidate elements. You should NOT do this. That is, as in the previous two problems, you should simply add m−1 to your running total of comparisons every time you recurse on a subarray with length m.

Be careful, after passing the array, the original's array element changed.

In [62]:
def ChoosePivot(array,num):
    n = len(array)
    if num == 1:
        return 0
    elif num == 2:
        return n-1
    #Median of three
    elif num ==3: 
        first_element = array[0]
        final_element = array[n-1]
        middle_element = array[math.ceil(n/2)-1]
        
        if n == 2: #2
            if first_element < final_element:
                return 0
            else:
                return 1
        else:
        #Median of the three
            if (final_element < first_element < middle_element) or (middle_element < first_element < final_element):
                return 0
            elif (first_element < middle_element < final_element) or (final_element < middle_element < first_element):
                return math.ceil(n/2)-1
            elif (middle_element < final_element < first_element) or (first_element < final_element < middle_element):
                return n-1

In [63]:
def partition (array,index_p):
    '''Partition array around a pivot element. Rearrange array so that left of pivot is less than pivot and right 
    of pivot is greater than pivot. Invariant: everything looked at so far is partitioned.'''
    
    #Swap pivot to the 1st element of the array
    p = array[index_p]
    array[index_p] = array[0]
    array[0] = p

    #Main partition subroutine
    i = 1
    for j in range (1,len(array)):
    #if the number less than pivot swap it before the index i, so that it becomes left part of the array and less than pivot
    #if the number higher than pivot, do nothing
        if array[j] < p:
            array[i], array[j] = array[j], array[i]
            i = i+1
    
    #Make the pivot into its rightful position
    array[0],array[i-1] = array[i-1],array[0]
    return array, i-1
    

In [64]:
def quick_sort(orig_array,num):
    array = orig_array[:]
    quick_sort.counter += len(array)-1
    n = len(array)
    if n == 1:
        return array
    #Choose pivot p
    index_p = ChoosePivot(array,num)
    #Partition array around P
    array,index_p = partition(array,index_p)
    
    #Recursively sort the first part (before pivot) and second part (after pivot)
    if array[:index_p] != [] and array[index_p+1:] != []:
        first_half = quick_sort(array[:index_p],num)
        second_half = quick_sort(array[index_p+1:],num)
        return first_half + [array[index_p]]+ second_half
    elif array[:index_p] == [] and array[index_p+1:] != []:
        second_half = quick_sort(array[index_p+1:],num)
        return [array[index_p]]+ second_half
    elif array[:index_p] != [] and array[index_p+1:] == []:
        first_half = quick_sort(array[:index_p],num)
        return first_half + [array[index_p]]

In [65]:
start_time = time.time()
quick_sort.counter=0
print(quick_sort([1,3,5,2,4,6],1))
print(quick_sort.counter) #11
print("--- %s seconds ---" % (time.time() - start_time))

[1, 2, 3, 4, 5, 6]
11
--- 0.0007691383361816406 seconds ---


In [66]:
start_time = time.time()
quick_sort.counter = 0
print(quick_sort([1,5,3,2,4],3))
print(quick_sort.counter) #6
print("--- %s seconds ---" % (time.time() - start_time))

[1, 2, 3, 4, 5]
6
--- 0.0009107589721679688 seconds ---


In [67]:
with open("week3_file/week3.txt", "r") as f:
    ques_arr = [int(line.strip()) for line in f if line.strip().isdigit()]
print(len(ques_arr))

10000


In [68]:
start_time = time.time()
test = ques_arr[:10]
quick_sort.counter = 0
sorted_array = quick_sort(test,1)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter) #25
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
#test = ques_arr[:10]
quick_sort.counter = 0
sorted_array = quick_sort(test,2)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter) #31
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
#test = ques_arr[:10]
quick_sort.counter = 0
sorted_array = quick_sort(test,3)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter)  #21
print("--- %s seconds ---" % (time.time() - start_time))

Number of comparison:  25
--- 0.0010399818420410156 seconds ---
Number of comparison:  31
--- 0.0008959770202636719 seconds ---
Number of comparison:  21
--- 0.00036978721618652344 seconds ---


In [69]:
start_time = time.time()
test = ques_arr[:100]
quick_sort.counter = 0
sorted_array = quick_sort(test,1)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter) #620
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
#test = ques_arr[:100]
quick_sort.counter = 0
sorted_array = quick_sort(test,2)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter) #573
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
#test = ques_arr[:100]
quick_sort.counter = 0
sorted_array = quick_sort(test,3)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter)  #502
print("--- %s seconds ---" % (time.time() - start_time))

Number of comparison:  620
--- 0.0010530948638916016 seconds ---
Number of comparison:  573
--- 0.0014231204986572266 seconds ---
Number of comparison:  502
--- 0.0012078285217285156 seconds ---


In [70]:
start_time = time.time()
test = ques_arr[:1000]
quick_sort.counter = 0
sorted_array = quick_sort(test,1)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter) #11175 
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
test = ques_arr[:1000]
quick_sort.counter = 0
sorted_array = quick_sort(test,2)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter) #10957 
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
test = ques_arr[:1000]
quick_sort.counter = 0
sorted_array = quick_sort(test,3)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter)  #9735
print("--- %s seconds ---" % (time.time() - start_time))

Number of comparison:  11175
--- 0.00954890251159668 seconds ---
Number of comparison:  10957
--- 0.0061490535736083984 seconds ---
Number of comparison:  9735
--- 0.008684873580932617 seconds ---


In [71]:
start_time = time.time()
test = ques_arr
quick_sort.counter = 0
sorted_array = quick_sort(test,1)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter) #162085
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
quick_sort.counter = 0
sorted_array = quick_sort(test,2)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter)  #164123
print("--- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
quick_sort.counter = 0
sorted_array = quick_sort(test,3)
#print('Sorted arrays: ',sorted_array)
print('Number of comparison: ',quick_sort.counter)  #138382
print("--- %s seconds ---" % (time.time() - start_time))

Number of comparison:  162085
--- 0.09326791763305664 seconds ---
Number of comparison:  164123
--- 0.08128905296325684 seconds ---
Number of comparison:  138382
--- 0.062458038330078125 seconds ---


In [None]:
#More readable but slower ChoosePivot
def ChoosePivot(array,num):
    n = len(array)
    if num == 1:
        return 0
    elif num == 2:
        return n-1
    #Median of three
    elif num ==3: 
        first_element = array[0]
        last_element = array[n-1]
        middle_element = array[math.ceil(n/2)-1]

        G = {}
        G[first_element] = 0
        G[middle_element] = math.ceil(n/2)-1
        G[last_element] = n-1
        
        if n == 2: #2
            if first_element < last_element:
                return G[first_element]
            else:
                return G[last_element] #equals to 1 when n==2
        else:
        #Median of the three
            median = np.median([first_element,middle_element,last_element])
            return G[median]



In [None]:
def ChoosePivot(array,num):
    n = len(array)
    if num == 1:
        return 0
    elif num == 2:
        return n-1
    #Median of three
    elif num ==3: 
        first_element = array[0]
        last_element = array[n-1]
        middle_element = array[math.ceil(n/2)-1]
        index = [0,math.ceil(n/2)-1,n-1]
        elements = [first_element,middle_element,last_element]
        
        
        if n == 2: #2
            if first_element < last_element:
                return 0
            else:
                return n-1 #equals to 1 when n==2
        else:
        #Median of the three
            median_index = np.argsort(elements)[len(elements)//2]
            return index[median_index]

