In [None]:
import random
import time
import matplotlib.pyplot as plt

In [None]:
def insertionSort(arr):
    comparison = 0
    
    for i in range(1, len(arr)):
        key = arr[i]
        j = i - 1

        while j >= 0 and arr[j] > key:
            comparison += 1
            arr[j + 1] = arr[j]
            j -= 1
        arr[j + 1] = key
    
    return arr, comparison

In [None]:
def mergeSort(arr):
    if len(arr) <= 1:
        return arr, 0  # Return 0 comparisons for empty or single-element arrays
    
    mid = len(arr) // 2
    left = arr[:mid]
    right = arr[mid:]
    
    left, left_comparisons = mergeSort(left)
    right, right_comparisons = mergeSort(right)
    
    merged, merge_comparisons = merge(left, right)
    
    return merged, left_comparisons + right_comparisons + merge_comparisons
 
def merge(left, right):
    result = []
    i = j = 0
    comparisons = 0
    
    while i < len(left) and j < len(right):
        if left[i] < right[j]:
            result.append(left[i])
            i += 1
        else:
            result.append(right[j])
            j += 1
        comparisons += 1  # Count each comparison
        
    
    # result += left[i:]
    # result += right[j:]

    result.extend(left[i:])
    result.extend(right[j:])
    
    return result, comparisons

In [None]:
def hybridSort(arr, S):
    mid = len(arr)//2
    if (len(arr) <= S):
        sorted_arr, comparisons = insertionSort(arr)
        return sorted_arr, comparisons
    else:
        left, left_comparisons = hybridSort(arr[:mid], S)
        right, right_comparisons = hybridSort(arr[mid:], S)
        merged, merge_comparisons = merge(left, right)
        return merged, left_comparisons + right_comparisons + merge_comparisons 

In [None]:
def generate_random_integers(n, min_val, max_val):
    randomlist = random.sample(range(min_val, max_val), n)
    return randomlist

In [None]:
def AvgTimeCmpMerge (n , nums):
    avgTime = 0
    avgCmp = 0

    for i in range (0,n):
        start_time = time.process_time()
        merged , cmp = mergeSort(nums[i])
        end_time = time.process_time()
        avgCmp += cmp
        avgTime += (end_time - start_time)
        
    
    return avgCmp/n , avgTime/n
             

In [None]:
def AvgTimeCmpHybrid (n , nums, S):
    avgTime = 0
    avgCmp = 0
    
    for i in range (0,n):
        start_time = time.process_time()
        hybrid , cmp = hybridSort(nums[i], S)
        end_time = time.process_time()
        avgCmp += cmp
        avgTime += (end_time - start_time)
    
    return avgCmp/n , avgTime/n

In [None]:
# using time.time() instead
def AvgTimeCmpMerge_t (n , nums):
    avgTime = 0
    avgCmp = 0

    for i in range (0,n):
        start_time = time.time()
        merged , cmp = mergeSort(nums[i])
        end_time = time.time()
        avgCmp += cmp
        avgTime += (end_time - start_time)
        
    
    return avgCmp/n , avgTime/n

def AvgTimeCmpHybrid_t (n , nums, S):
    avgTime = 0
    avgCmp = 0
    
    for i in range (0,n):
        start_time = time.time()
        hybrid , cmp = hybridSort(nums[i], S)
        end_time = time.time()
        avgCmp += cmp
        avgTime += (end_time - start_time)
    
    return avgCmp/n , avgTime/n

In [None]:
random_numbers = [generate_random_integers(10000000, 0, 10000) for _ in range(5)]

In [None]:
# from numpy import random
# x = 10000 #largest value for dataset

# random_numbers = [random.randint(0, x, size=(10000000)) for _ in range(5)]
# for i in range(5):
#     random_numbers[i].tolist()
# random_numbers = random_numbers.tolist()

In [None]:
# cmp_merge , time_merge = AvgTimeCmpMerge(5 , random_numbers)
# print("Average number of comparison(merge):" , cmp_merge)
# print("Average CPU time(merge):" , time_merge , "s")

cmp_merge_t , time_merge_t = AvgTimeCmpMerge_t(5 , random_numbers)
print("Average number of comparison(merge):" , cmp_merge_t)
print("Average CPU time(merge):" , time_merge_t , "s")

In [None]:
# cmp_hybrid , time_hybrid = AvgTimeCmpHybrid(5 , random_numbers, 7)
# print("Average number of comparison(Hybrid):" , cmp_hybrid)
# print("Average CPU time(hybrid):" , time_hybrid , "s")

cmp_hybrid_t , time_hybrid_t = AvgTimeCmpHybrid_t(5 , random_numbers, 7)
print("Average number of comparison(Hybrid):" , cmp_hybrid_t)
print("Average CPU time(hybrid):" , time_hybrid_t , "s")

In [None]:
categories = ['MergeSort', 'HybridSort']
# values = [cmp_merge , cmp_hybrid]
values = [cmp_merge_t , cmp_hybrid_t]


plt.figure(figsize=(3, 4)) 
plt.bar(categories, values, color=['blue', 'green'])

plt.yscale('log') # default linear, was playing with this to see which graph looks better

# y_min = cmp_hybrid - 100000
# y_max = cmp_merge +  100000
# plt.ylim(y_min, y_max)
# Add labels and title

#plt.title('Average number of key Comparisons - merge sort vs hybrid sort')
plt.ylabel('# of key comparisions(e^8)')
plt.show()

In [None]:
categories = ['MergeSort', 'HybridSort']
# values = [time_merge , time_hybrid]
values = [time_merge_t , time_hybrid_t]

plt.figure(figsize=(3, 4)) 
plt.bar(categories, values, color=['blue', 'green'])


# Add labels and title
#plt.title('Average CPU time - merge sort vs hybrid sort')
plt.ylabel('Seconds')


plt.show()

In [None]:
print(cmp_merge - cmp_hybrid)
print((time_merge - time_hybrid)/time_merge)