In [74]:
import random
import time
import csv

# Sorter

## Quick Sort

In [21]:
class QuickSort:
    def __init__(self):
        self.arr = None
        self.random_seed = None
        
    def set_random_seed(self, seed):
        self.random_seed = seed
        
    def fit(self, arr):
        self.arr = arr
        
    def partition(self, low, high, pivot_index):
        # swap the pivot with the last element
        self.arr[pivot_index], self.arr[high] = self.arr[high], self.arr[pivot_index]
        
        pivot = self.arr[high]
        i = low - 1
        for j in range(low, high):
            if self.arr[j] <= pivot:
                i += 1
                self.arr[i], self.arr[j] = self.arr[j], self.arr[i]
                
        self.arr[i + 1], self.arr[high] = self.arr[high], self.arr[i + 1]
        
        return i + 1
    
    def find_median_of_medians(self, arr):
        if len(arr) <= 5:
            return sorted(arr)[len(arr) // 2]
        
        # Divide the array into subarrays of size 5 or less
        subarrays = [arr[j : j + 5] for j in range(0, len(arr), 5)]
        
        # Calculate the median of each subarray
        medians = [sorted(subarray)[len(subarray) // 2] for subarray in subarrays]
        
        median_of_medians = self.find_median_of_medians(medians)
        
        return median_of_medians
    
    def select_pivot_median_of_medians(self, low, high):
        '''return the index of median_of_medians'''
        pivot = self.find_median_of_medians(self.arr[low : high + 1])
        return self.arr.index(pivot)
    
    def select(self, low, high, i):
        ''' return the value of the ith order statistic i = 0..(n-1)'''
        if (high - low + 1) == 1:
            return self.arr[low]
        
        pivot = self.find_median_of_medians(self.arr[low : high + 1])
        pivot_index = self.arr.index(pivot)
        
        pivot_idx_after_part = self.partition(low, high, pivot_index)
        
        # print("pivot: {}, pivot_index: {}".format(pivot, pivot_index))
        # print("partitioned array: ", self.arr[low : high + 1])
        
        if i == pivot_idx_after_part:
            return self.arr[pivot_idx_after_part]
        elif i < pivot_idx_after_part:
            return self.select(low, pivot_idx_after_part - 1, i)
        else:
            return self.select(pivot_idx_after_part + 1, high, i - pivot_idx_after_part - 1)

    def select_pivot_exact_median(self, low, high):
        pivot = self.select(low, high, (low + high + 1) // 2)
        # print("low: {}, high: {}, pivot_index: {}, pivot: {}".format(low, high, self.arr.index(pivot), pivot))
        return self.arr.index(pivot)
    
    def select_pivot_random(self, low, high):
        self.set_random_seed(10)
        random.seed(self.random_seed)
        return random.randint(low, high)
    
    def select_pivot_first(self, low, high):
        # print("pivot: ", self.arr[low])
        return low
    
    def select_pivot_last(self, low, high):
        # print("pivot: ", self.arr[high])
        return high
    
    def select_pivot_median_of_three(self, low, high):
        mid = (low + high) // 2
        candidates = [(low, self.arr[low]), (mid, self.arr[mid]), (high, self.arr[high])]
        median = sorted(candidates, key=lambda x: x[1])[1]
        # print("pivot: ", median[1])
        return median[0]
    
    def select_pivot_method(self, method):
        if method == "random":
            return self.select_pivot_random
        elif method == "median_of_medians":
            return self.select_pivot_median_of_medians
        elif method == "exact_median":
            return self.select_pivot_exact_median
        elif method == "first":
            return self.select_pivot_first
        elif method == "last":
            return self.select_pivot_last
        elif method == "median_of_three":
            return self.select_pivot_median_of_three
        else:
            raise ValueError("Invalid pivot selection method")
    
    def quicksort(self, low, high, select_method="random"):
        select_pivot = self.select_pivot_method(select_method)
        
        if low < high:
            pivot_index = select_pivot(low, high)
            pivot_idx_after_part = self.partition(low, high, pivot_index)
            self.quicksort(low, pivot_idx_after_part - 1, select_method)
            self.quicksort(pivot_idx_after_part + 1, high, select_method)
            
    def print_array(self):
        print(self.arr)

In [25]:
sorter = QuickSort() # Instantiate the QuickSort class
arr = generate_random_array(20)
sorter.fit(arr)
sorter.print_array()
sorter.quicksort(0, 20 - 1, select_method="random")
sorter.print_array()

[2, 9, 3, 14, 19, 20, 17, 1, 7, 16, 11, 6, 10, 12, 13, 15, 5, 4, 18, 8]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]


## Merge Sort

In [52]:
def merge_sort(arr):
    if len(arr) <= 1:
        return arr

    mid = len(arr) // 2
    left_half = arr[:mid]
    right_half = arr[mid:]

    left_half = merge_sort(left_half)
    right_half = merge_sort(right_half)

    return merge(left_half, right_half)

def merge(left, right):
    result = []
    i = j = 0

    while i < len(left) and j < len(right):
        if left[i] <= right[j]:
            result.append(left[i])
            i += 1
        else:
            result.append(right[j])
            j += 1

    while i < len(left):
        result.append(left[i])
        i += 1

    while j < len(right):
        result.append(right[j])
        j += 1

    return result

## Insertion Sort

In [53]:
def insertion_sort(arr):
    n = len(arr)
    
    for i in range(1, n):
        key = arr[i]  # Current element to be inserted
        j = i - 1
        
        while j >= 0 and arr[j] > key:
            arr[j + 1] = arr[j]  # Shift elements greater than key to the right
            j -= 1
        
        arr[j + 1] = key  # Insert the current element at the correct position
    
    return arr

# Experiments

In [22]:
import platform

# Get system-related information
system = platform.system()
processor = platform.processor()
python_version = platform.python_version()

# Display the experiment environment
print("Experiment Environment:")
print(f"Operating System: {system}")
print(f"Processor: {processor}")
print(f"Python Version: {python_version}")


Experiment Environment:
Operating System: Darwin
Processor: arm
Python Version: 3.9.12


## array generate functions

In [46]:
# array generate functions

def generate_random_array(n):
    random.seed(123)
    arr = random.sample(range(1, n+1), n)
    return arr

# Generate an array of a given size in ascending order
def generate_sorted_array(n):
    return list(range(n))

# Generate an array of a given size in descending order
def generate_reverse_sorted_array(size):
    return list(range(size, 0, -1))

# Generate an array of same item
def generate_identical_item_array(size):
    return [0 for i in range(size)]

# Generate an array of X% identical items
def generate_X_percent_identical_item_array(size, X):
    random.seed(123)
    
    arr = []
    identical_count = int((X / 100) * size)
    
    # Generate identical elements
    identical_elements = random.randint(1, size)
    arr.extend([identical_elements] * identical_count)
    
    # Generate remaining distinct elements
    distinct_elements = random.sample(range(1, size + 1), size - identical_count)
    arr.extend(distinct_elements)
    
    # Shuffle the array
    random.shuffle(arr)
    
    return arr

## Quick Sort

In [35]:
def measure_quicksort_execution_time(array_generator, array_size, num_iterations, pivot_selection_method):
    sorter = QuickSort() # Instantiate the QuickSort class

    execution_times = []
    for _ in range(num_iterations):
        arr = array_generator(array_size)
        # Set the array to be sorted using the fit function
        sorter.fit(arr)
        
        start_time = time.time()
        sorter.quicksort(0, array_size - 1, pivot_selection_method)
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
    
    return execution_times

def run_experiment(array_generator, array_size, num_iterations):
    pivot_selection_method = {
        "Median of Medians": "median_of_medians", 
        "Median of Three": "median_of_three", 
        "Exact Medians": "exact_median", 
        "Random": "random", 
        "Last": "last"
    }
    
    execution_times = {}
    
    # Measure the execution times for each pivot selection
    for method, para in pivot_selection_method.items():
        execution_times[method] = None
        try:
            execution_times[method] = measure_quicksort_execution_time(array_generator, array_size, num_iterations, para)
        except:
            print(f"Recursion depth exceeded for pivot selection method: {method}")
    
    print(f"array size: {array_size}, num_iterations: {num_iterations}")
    print()
    
    for technique, times in execution_times.items():
        try:
            avg_execution_time = round(sum(times) / len(times), 5)
            print(f"{technique}: {avg_execution_time} seconds")
        except:
            print(f"{technique}: recursion depth exceeded")
        
    print("------------------------------------------------------------------")

In [39]:
num_iterations = 10
array_sizes = [1000, 10000, 50000]
print("unsorted array: ")
print("------------------------------------------------------------------")
for array_size in array_sizes:
    run_experiment(generate_random_array, array_size, num_iterations)

unsorted array: 
------------------------------------------------------------------
array size: 1000, num_iterations: 10

Median of Medians: 0.0079 seconds
Median of Three: 0.00182 seconds
Exact Medians: 0.01194 seconds
Random: 0.0066 seconds
Last: 0.00172 seconds
------------------------------------------------------------------
array size: 10000, num_iterations: 10

Median of Medians: 0.21473 seconds
Median of Three: 0.02217 seconds
Exact Medians: 0.64904 seconds
Random: 0.07108 seconds
Last: 0.02122 seconds
------------------------------------------------------------------
array size: 50000, num_iterations: 10

Median of Medians: 4.67672 seconds
Median of Three: 0.12962 seconds
Exact Medians: 14.28389 seconds
Random: 0.38024 seconds
Last: 0.13583 seconds
------------------------------------------------------------------


In [48]:
num_iterations = 10
array_sizes = [1000, 10000, 50000]
print("sorted array: ")
print("last element pivot selection is ignored since the partition process exceeds the maximum recursion depth when n = 3000")
print("------------------------------------------------------------------")
for array_size in array_sizes:
    run_experiment(generate_sorted_array, array_size, num_iterations)

sorted array: 
last element pivot selection is ignored since the partition process exceeds the maximum recursion depth when n = 3000
------------------------------------------------------------------
array size: 1000, num_iterations: 10

Median of Medians: 0.01396 seconds
Median of Three: 0.0015 seconds
Exact Medians: 0.02282 seconds
Random: 0.00724 seconds
Last: 0.07887 seconds
------------------------------------------------------------------
Recursion depth exceeded for pivot selection method: Last
array size: 10000, num_iterations: 10

Median of Medians: 0.31855 seconds
Median of Three: 0.01907 seconds
Exact Medians: 0.95826 seconds
Random: 0.07873 seconds
Last: recursion depth exceeded
------------------------------------------------------------------
Recursion depth exceeded for pivot selection method: Last
array size: 50000, num_iterations: 10

Median of Medians: 6.352 seconds
Median of Three: 0.11567 seconds
Exact Medians: 19.23203 seconds
Random: 0.40909 seconds
Last: recursio

In [50]:
num_iterations = 10
array_sizes = [1000, 10000, 50000]
print("reverse sorted array: ")
print("last element pivot selection is ignored since the partition process exceeds the maximum recursion depth when n = 3000")
print("------------------------------------------------------------------")
for array_size in array_sizes:
    run_experiment(generate_reverse_sorted_array, array_size, num_iterations)

reverse sorted array: 
last element pivot selection is ignored since the partition process exceeds the maximum recursion depth when n = 3000
------------------------------------------------------------------
array size: 1000, num_iterations: 10

Median of Medians: 0.0082 seconds
Median of Three: 0.00249 seconds
Exact Medians: 0.01311 seconds
Random: 0.00665 seconds
Last: 0.05155 seconds
------------------------------------------------------------------
Recursion depth exceeded for pivot selection method: Last
array size: 10000, num_iterations: 10

Median of Medians: 0.21925 seconds
Median of Three: 0.03358 seconds
Exact Medians: 0.653 seconds
Random: 0.07327 seconds
Last: recursion depth exceeded
------------------------------------------------------------------
Recursion depth exceeded for pivot selection method: Last
array size: 50000, num_iterations: 10

Median of Medians: 4.80904 seconds
Median of Three: 0.20177 seconds
Exact Medians: 14.44561 seconds
Random: 0.38254 seconds
Last: 

In [54]:
num_iterations = 10
array_sizes = [1000]
print("array of identical items: ")
print("------------------------------------------------------------------")
for array_size in array_sizes:
    run_experiment(generate_identical_item_array, array_size, num_iterations)

array of identical items: 
------------------------------------------------------------------
array size: 1000, num_iterations: 10

Median of Medians: 0.11402 seconds
Median of Three: 0.07922 seconds
Exact Medians: 27.55839 seconds
Random: 0.08656 seconds
Last: 0.07918 seconds
------------------------------------------------------------------


In [55]:
num_iterations = 10
array_sizes = [10000]
print("array of identical items: ")
print("------------------------------------------------------------------")
for array_size in array_sizes:
    run_experiment(generate_identical_item_array, array_size, num_iterations)

array of identical items: 
------------------------------------------------------------------
Recursion depth exceeded for pivot selection method: Median of Medians
Recursion depth exceeded for pivot selection method: Median of Three
Recursion depth exceeded for pivot selection method: Exact Medians
Recursion depth exceeded for pivot selection method: Random
Recursion depth exceeded for pivot selection method: Last
array size: 10000, num_iterations: 10

Median of Medians: recursion depth exceeded
Median of Three: recursion depth exceeded
Exact Medians: recursion depth exceeded
Random: recursion depth exceeded
Last: recursion depth exceeded
------------------------------------------------------------------


In [51]:
array_size = 1000
arr = generate_same_item_array(array_size)
sorter = QuickSort()
sorter.fit(arr)

sorter.quicksort(0, array_size - 1, "median_of_medians")

In [19]:
num_iterations = 10
array_size = 100000
print("unsorted array: ")
run_experiment(generate_random_array, array_size, num_iterations)

unsorted array: 
array size: 100000, num_iterations: 10

Median of Medians: 18.5159 seconds
Median of Three: 0.25883 seconds
Exact Medians: 55.99713 seconds
Random: 0.77953 seconds
Last: 0.28821 seconds
------------------------------------------------------------------


In [20]:
num_iterations = 10
array_size = 100000
print("sorted array: ")
run_experiment(generate_sorted_array, array_size, num_iterations)

sorted array: 
Recursion depth exceeded for pivot selection method: Last
array size: 100000, num_iterations: 10

Median of Medians: 29.81761 seconds
Median of Three: 0.23863 seconds
Exact Medians: 77.42721 seconds
Random: 0.83138 seconds
Last: recursion depth exceeded
------------------------------------------------------------------


In [4]:
num_iterations = 10
array_size = 100000
print("reverse sorted array: ")
run_experiment(generate_reverse_sorted_array, array_size, num_iterations)

reverse sorted array: 
Recursion depth exceeded for pivot selection method: Last
array size: 100000, num_iterations: 10

Median of Medians: 18.93792 seconds
Median of Three: 0.42591 seconds
Exact Medians: 56.67557 seconds
Random: 0.78412 seconds
Last: recursion depth exceeded
------------------------------------------------------------------


## Merge Sort

In [31]:
def measure_merge_sort_execution_time(array_generator, array_size, num_iterations):
    execution_times = []
    for _ in range(num_iterations):
        arr = array_generator(array_size)
        
        start_time = time.time()
        sorted_arr = merge_sort(arr)
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
    
    return execution_times

In [32]:
num_iterations = 10
array_sizes = [1000, 10000, 50000, 100000]

print("unsorted array: ")
for array_size in array_sizes:
    execution_times = measure_merge_sort_execution_time(generate_random_array, array_size, num_iterations)
    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
    print(f"array size: {array_size}, num_iteration: {num_iterations} merge_sort: {avg_execution_time} seconds")
print("------------------------------------------------------------------")

print("sorted array: ")
for array_size in array_sizes:
    execution_times = measure_merge_sort_execution_time(generate_sorted_array, array_size, num_iterations)
    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
    print(f"array size: {array_size}, num_iteration: {num_iterations} merge_sort: {avg_execution_time} seconds")
print("------------------------------------------------------------------")

print("reverse sorted array: ")
for array_size in array_sizes:
    execution_times = measure_merge_sort_execution_time(generate_reverse_sorted_array, array_size, num_iterations)
    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
    print(f"array size: {array_size}, num_iteration: {num_iterations} merge_sort: {avg_execution_time} seconds")
print("------------------------------------------------------------------")

print("array with identical items: ")
for array_size in array_sizes:
    execution_times = measure_merge_sort_execution_time(generate_identical_item_array, array_size, num_iterations)
    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
    print(f"array size: {array_size}, num_iteration: {num_iterations} merge_sort: {avg_execution_time} seconds")

unsorted array: 
array size: 1000, num_iteration: 10 merge_sort: 0.00382 seconds
array size: 10000, num_iteration: 10 merge_sort: 0.02358 seconds
array size: 50000, num_iteration: 10 merge_sort: 0.12992 seconds
array size: 100000, num_iteration: 10 merge_sort: 0.27687 seconds
------------------------------------------------------------------
sorted array: 
array size: 1000, num_iteration: 10 merge_sort: 0.00148 seconds
array size: 10000, num_iteration: 10 merge_sort: 0.01849 seconds
array size: 50000, num_iteration: 10 merge_sort: 0.1054 seconds
array size: 100000, num_iteration: 10 merge_sort: 0.22188 seconds
------------------------------------------------------------------
reverse sorted array: 
array size: 1000, num_iteration: 10 merge_sort: 0.0015 seconds
array size: 10000, num_iteration: 10 merge_sort: 0.01882 seconds
array size: 50000, num_iteration: 10 merge_sort: 0.10879 seconds
array size: 100000, num_iteration: 10 merge_sort: 0.22572 seconds
---------------------------------

In [33]:
arr = [4, 2, 7, 1, 5, 3]
sorted_arr = merge_sort(arr)
print(sorted_arr)
print(arr)

[1, 2, 3, 4, 5, 7]
[4, 2, 7, 1, 5, 3]


## Insertion Sort

In [64]:
def measure_insertion_sort_execution_time(array_generator, array_size, num_iterations):
    execution_times = []
    for _ in range(num_iterations):
        arr = array_generator(array_size)
        
        start_time = time.time()
        sorted_arr = insertion_sort(arr)
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
    
    return execution_times

In [65]:
num_iterations = 10
array_sizes = [1000, 10000, 50000]
sorting_algo = "insertion_sort"

print("unsorted array: ")
for array_size in array_sizes:
    execution_times = measure_insertion_sort_execution_time(generate_random_array, array_size, num_iterations)
    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
    print(f"array size: {array_size}, num_iteration: {num_iterations}, {sorting_algo}: {avg_execution_time} seconds")
print("------------------------------------------------------------------")

print("sorted array: ")
for array_size in array_sizes:
    execution_times = measure_insertion_sort_execution_time(generate_sorted_array, array_size, num_iterations)
    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
    print(f"array size: {array_size}, num_iteration: {num_iterations}, {sorting_algo}: {avg_execution_time} seconds")
print("------------------------------------------------------------------")

print("reverse sorted array: ")
for array_size in array_sizes:
    execution_times = measure_insertion_sort_execution_time(generate_reverse_sorted_array, array_size, num_iterations)
    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
    print(f"array size: {array_size}, num_iteration: {num_iterations}, {sorting_algo}: {avg_execution_time} seconds")
print("------------------------------------------------------------------")

print("array with identical items: ")
for array_size in array_sizes:
    execution_times = measure_insertion_sort_execution_time(generate_identical_item_array, array_size, num_iterations)
    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
    print(f"array size: {array_size}, num_iteration: {num_iterations}, {sorting_algo}: {avg_execution_time} seconds")

unsorted array: 
array size: 1000, num_iteration: 10 merge_sort: 0.02546 seconds
array size: 10000, num_iteration: 10 merge_sort: 2.2029 seconds
array size: 50000, num_iteration: 10 merge_sort: 55.48509 seconds
array size: 100000, num_iteration: 10 merge_sort: 221.39495 seconds
------------------------------------------------------------------
sorted array: 
array size: 1000, num_iteration: 10 merge_sort: 0.0001 seconds
array size: 10000, num_iteration: 10 merge_sort: 0.00109 seconds
array size: 50000, num_iteration: 10 merge_sort: 0.00517 seconds
array size: 100000, num_iteration: 10 merge_sort: 0.01036 seconds
------------------------------------------------------------------
reverse sorted array: 
array size: 1000, num_iteration: 10 merge_sort: 0.04203 seconds
array size: 10000, num_iteration: 10 merge_sort: 4.31559 seconds
array size: 50000, num_iteration: 10 merge_sort: 108.22271 seconds
array size: 100000, num_iteration: 10 merge_sort: 437.30624 seconds
--------------------------

In [63]:
arr = generate_same_item_array(10)
print(arr)
sorted_arr = insertion_sort(arr)
print(sorted_arr)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


## Compare the performance with X% identical elements in array

In [111]:
# Generate an array of X% identical items
def generate_random_array(size, percent_identical=0):
    random.seed(123)
    
    arr = []
    identical_count = int((percent_identical / 100) * size)
    
    # Generate identical elements
    identical_elements = random.randint(1, size)
    arr.extend([identical_elements] * identical_count)
    
    # Generate remaining distinct elements
    distinct_elements = random.sample(range(1, size + 1), size - identical_count)
    arr.extend(distinct_elements)
    
    # Shuffle the array
    random.shuffle(arr)
    
    return arr

In [112]:
def measure_quicksort_execution_time(array_generator, array_size, num_iterations, pivot_selection_method, percent_identical=0):
    sorter = QuickSort() # Instantiate the QuickSort class

    execution_times = []
    for _ in range(num_iterations):
        arr = array_generator(array_size, percent_identical)
        # Set the array to be sorted using the fit function
        sorter.fit(arr)
        
        start_time = time.time()
        sorter.quicksort(0, array_size - 1, pivot_selection_method)
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
        
    return execution_times

def measure_merge_sort_execution_time(array_generator, array_size, num_iterations, percent_identical=0):
    execution_times = []
    for _ in range(num_iterations):
        arr = array_generator(array_size, percent_identical)
        
        start_time = time.time()
        sorted_arr = merge_sort(arr)
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
    
    return execution_times

def measure_insertion_sort_execution_time(array_generator, array_size, num_iterations, percent_identical=0):
    execution_times = []
    for _ in range(num_iterations):
        arr = array_generator(array_size, percent_identical)
        
        start_time = time.time()
        sorted_arr = insertion_sort(arr)
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
    
    return execution_times

In [113]:
def run_sorting_experiment(array_sizes, num_iterations, identical_percentages):
    with open('sorting_results.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Array Size', 'Identical Percentage', 'Quick Sort Time (Median of Three)', 'Quick Sort Time (Random)', 'Merge Sort Time', 'Insertion Sort Time'])
        
        for size in array_sizes:
            for percentage in identical_percentages:
                row = [size, percentage]
                
                arr = generate_random_array(size, percentage)
                
                print(f"========== array size: {size}, {percentage}% identical start ==========\n")
                
                # Measure Quick Sort execution time (Median of Three)
                try:
                    execution_times = measure_quicksort_execution_time(generate_random_array, size, num_iterations, "median_of_three", percentage)
                    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
                    row.append(avg_execution_time)
                    print(f"Quick Sort - Median of Three: {avg_execution_time} seconds")
                except:
                    print("Recursion depth exceeded for pivot selection method: Median of Three")
                    row.append("NA")
                    
                # Measure Quick Sort execution time (Random)
                try:
                    execution_times = measure_quicksort_execution_time(generate_random_array, size, num_iterations, "random", percentage)
                    avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
                    row.append(avg_execution_time)
                    print(f"Quick Sort - Random: {avg_execution_time} seconds")
                except:
                    print("Recursion depth exceeded for pivot selection method: Random")
                    row.append("NA")
                
                # Measure Merge Sort execution time
                execution_times = measure_merge_sort_execution_time(generate_random_array, size, num_iterations, percentage)
                avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
                row.append(avg_execution_time)
                print(f"Merge Sort: {avg_execution_time} seconds")
                
                # Measure Insertion Sort execution time
                execution_times = measure_insertion_sort_execution_time(generate_random_array, size, num_iterations, percentage)
                avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
                row.append(avg_execution_time)
                print(f"Insertion Sort: {avg_execution_time} seconds")
                
                writer.writerow(row)
                
                print(f"\n========== array size: {size}, {percentage}% identical end ==========\n")

In [None]:
# Define the range of array sizes and identical percentages
array_sizes = range(1000, 10001, 1000)
identical_percentages = range(0, 101, 10)
num_iterations = 10

# Run the sorting experiment
run_sorting_experiment(array_sizes, num_iterations, identical_percentages)


Quick Sort - Median of Three: 0.00358 seconds
Quick Sort - Random: 0.00761 seconds
Merge Sort: 0.00173 seconds
Insertion Sort: 0.02111 seconds



Quick Sort - Median of Three: 0.00261 seconds
Quick Sort - Random: 0.00777 seconds
Merge Sort: 0.00175 seconds
Insertion Sort: 0.02103 seconds



Quick Sort - Median of Three: 0.00495 seconds
Quick Sort - Random: 0.01018 seconds
Merge Sort: 0.00174 seconds
Insertion Sort: 0.02044 seconds



Quick Sort - Median of Three: 0.00867 seconds
Quick Sort - Random: 0.01417 seconds
Merge Sort: 0.00174 seconds
Insertion Sort: 0.01875 seconds



Quick Sort - Median of Three: 0.01411 seconds
Quick Sort - Random: 0.01994 seconds
Merge Sort: 0.00183 seconds
Insertion Sort: 0.01945 seconds



Quick Sort - Median of Three: 0.02088 seconds
Quick Sort - Random: 0.02656 seconds
Merge Sort: 0.00181 seconds
Insertion Sort: 0.01645 seconds



Quick Sort - Median of Three: 0.02973 seconds
Quick Sort - Random: 0.03555 seconds
Merge Sort: 0.00173 seconds
Insertion So

# Dump

In [None]:
def measure_quicksort_execution_time(arr, array_size, num_iterations, pivot_selection_method):
    execution_times = []
    sorter = QuickSort() # Instantiate the QuickSort class

    for _ in range(num_iterations):
        # arr = array_generator(array_size)
        # Set the array to be sorted using the fit function
        sorter.fit(arr)
        
        start_time = time.time()
        sorter.quicksort(0, array_size - 1, pivot_selection_method)
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
    
    return execution_times

def run_experiment(arr, array_size, num_iterations):
    execution_times = {
        "Median of Three": None, 
        "Random": None, 
        "Last": None
    }
    
    # Measure the execution times for each pivot selection
    try:
        execution_times["Median of Medians"] = measure_quicksort_execution_time(arr, array_size, num_iterations, "median_of_medians")
    except:
        print("Recursion depth exceeded for pivot selection method: Median of Medians")
        
    try:
        execution_times["Median of Three"] = measure_quicksort_execution_time(arr, array_size, num_iterations, "median_of_three")
    except:
        print("Recursion depth exceeded for pivot selection method: Median of Three")
        
    try:
        execution_times["Exact Medians"] = measure_quicksort_execution_time(arr, array_size, num_iterations, "exact_median")
    except:
        print("Recursion depth exceeded for pivot selection method: Exact Medians")
        
    try:
        execution_times["Random"] = measure_quicksort_execution_time(arr, array_size, num_iterations, "random")
    except:
        print("Recursion depth exceeded for pivot selection method: Random")
        
    try:
        execution_times["Last"] = measure_quicksort_execution_time(arr, array_size, num_iterations, "last")
    except RecursionError:
        print("Recursion depth exceeded for pivot selection method: Last")
    
    print("array size: {}, num_iterations: {}".format(array_size, num_iterations))
    print()
    for technique, times in execution_times.items():
        try:
            avg_execution_time = round(sum(times) / len(times), 5)
            print(f"{technique}: {avg_execution_time} seconds")
        except:
            print(f"{technique}: recursion depth exceeded")
        
    print("------------------------------------------------------------------")

In [None]:
generate_X_percent_identical_item_array(20, 0)

[20, 5, 4, 7, 15, 11, 2, 17, 16, 13, 1, 6, 8, 19, 3, 10, 14, 12, 9, 18]

差不多identical = 3000 就 Quick Sort 就 exceed recursion depth了

In [64]:
num_iterations = 10
array_sizes = [1000, 2000, 3000]
percentages = [10, 20, 30, 50, 70, 90]

for array_size in array_sizes:
    print(f"======array_size: {array_size}, num_iterations: {num_iterations} start ===========\n")
    for percentage in percentages:
        print(f"{percentage}% identical elements: ")
        # Quick Sort
        run_experiment_X_identical(array_size, num_iterations, percentage)
        
        # Merge Sort
        execution_times = measure_merge_sort_on_X_identical_execution_time(array_size, num_iterations, percentage)
        avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
        print(f"array size: {array_size}, num_iteration: {num_iterations}, Merge Sort: {avg_execution_time} seconds")
        print("------------------------------------------------------------------")
        
        # Insertion Sort
        execution_times = measure_insertion_sort_on_X_identical_execution_time(array_size, num_iterations, percentage)
        avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
        print(f"array size: {array_size}, num_iteration: {num_iterations}, Insertion Sort: {avg_execution_time} seconds")
        print("------------------------------------------------------------------")
    print(f"======array_size: {array_size}, num_iterations: {num_iterations} end ===========\n")


10% identical elements: 
array size: 1000, num_iterations: 10

Median of Three: 0.00521 seconds
Random: 0.00813 seconds
Last: 0.00256 seconds
------------------------------------------------------------------
array size: 1000, num_iteration: 10, Merge Sort: 0.00177 seconds
------------------------------------------------------------------
array size: 1000, num_iteration: 10, Insertion Sort: 0.02085 seconds
------------------------------------------------------------------
20% identical elements: 
array size: 1000, num_iterations: 10

Median of Three: 0.00493 seconds
Random: 0.01032 seconds
Last: 0.00475 seconds
------------------------------------------------------------------
array size: 1000, num_iteration: 10, Merge Sort: 0.00185 seconds
------------------------------------------------------------------
array size: 1000, num_iteration: 10, Insertion Sort: 0.02061 seconds
------------------------------------------------------------------
30% identical elements: 
array size: 1000, nu

In [65]:
num_iterations = 10
array_sizes = [4000, 5000, 6000]
percentages = [10, 20, 30, 50, 70, 90]

for array_size in array_sizes:
    print(f"======array_size: {array_size}, num_iterations: {num_iterations} start ===========\n")
    for percentage in percentages:
        print(f"{percentage}% identical elements: ")
        # Quick Sort
        run_experiment_X_identical(array_size, num_iterations, percentage)
        
        # Merge Sort
        execution_times = measure_merge_sort_on_X_identical_execution_time(array_size, num_iterations, percentage)
        avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
        print(f"array size: {array_size}, num_iteration: {num_iterations}, Merge Sort: {avg_execution_time} seconds")
        print("------------------------------------------------------------------")
        
        # Insertion Sort
        execution_times = measure_insertion_sort_on_X_identical_execution_time(array_size, num_iterations, percentage)
        avg_execution_time = round(sum(execution_times) / len(execution_times), 5)
        print(f"array size: {array_size}, num_iteration: {num_iterations}, Insertion Sort: {avg_execution_time} seconds")
        print("------------------------------------------------------------------")
    print(f"======array_size: {array_size}, num_iterations: {num_iterations} end ===========\n")


10% identical elements: 
array size: 4000, num_iterations: 10

Median of Three: 0.02344 seconds
Random: 0.04115 seconds
Last: 0.021 seconds
------------------------------------------------------------------
array size: 4000, num_iteration: 10, Merge Sort: 0.00829 seconds
------------------------------------------------------------------
array size: 4000, num_iteration: 10, Insertion Sort: 0.34404 seconds
------------------------------------------------------------------
20% identical elements: 
array size: 4000, num_iterations: 10

Median of Three: 0.06005 seconds
Random: 0.08209 seconds
Last: 0.06043 seconds
------------------------------------------------------------------
array size: 4000, num_iteration: 10, Merge Sort: 0.0085 seconds
------------------------------------------------------------------
array size: 4000, num_iteration: 10, Insertion Sort: 0.34812 seconds
------------------------------------------------------------------
30% identical elements: 
array size: 4000, num_i

In [63]:
def measure_quicksort_on_X_identical_execution_time(array_size, num_iterations, pivot_selection_method, X):
    sorter = QuickSort() # Instantiate the QuickSort class
    
    execution_times = []
    for _ in range(num_iterations):
        arr = generate_X_percent_identical_item_array(array_size, X)
        # Set the array to be sorted using the fit function
        sorter.fit(arr)
        
        start_time = time.time()
        sorter.quicksort(0, array_size - 1, pivot_selection_method)
        end_time = time.time()
        execution_time = end_time - start_time
        execution_times.append(execution_time)
    
    return execution_times

def run_experiment_X_identical(array_size, num_iterations, X):
    pivot_selection_method = {
        "Median of Three": "median_of_three", 
        "Random": "random", 
        "Last": "last"
    }
    
    execution_times = {}
    
    # Measure the execution times for each pivot selection
    for method, para in pivot_selection_method.items():
        execution_times[method] = None
        try:
            execution_times[method] = measure_quicksort_on_X_identical_execution_time(array_size, num_iterations, para, X)
        except:
            print(f"Recursion depth exceeded for pivot selection method: {method}")
    
    print(f"array size: {array_size}, num_iterations: {num_iterations}")
    print()
    
    for technique, times in execution_times.items():
        try:
            avg_execution_time = round(sum(times) / len(times), 5)
            print(f"{technique}: {avg_execution_time} seconds")
        except:
            print(f"{technique}: recursion depth exceeded")
        
    print("------------------------------------------------------------------")
    