# Import Dependencies

In [1]:
import numpy as np
import pandas as pd
from time import process_time
import matplotlib.pyplot as plt
import seaborn as sns

# Build out user-defined functions for later use

### The first two are functions I built myself - it is possible that there are more efficient ways to write them.

In [2]:
def reset_arrays():
    """Returns two items:
    1) List of five unsorted random uniform integer arrays of length [512, 1024, 2048, 4096, and 8192].
    2) List of five array lengths in ascending order: [512, 1024, 2048, 4096, 8192]
    """
    arrays = []
    array_lengths = [512, 1024, 2048, 4096, 8192]
    for length in array_lengths:
        array = np.random.randint(low=1, high=10000, size=length)
        arrays.append(array)
    return arrays, array_lengths


# This assumes that the list passed is all integers and does not validate - this is to keep runtimes clean
def max_linear_search(intarray):
    counter = 0
    maximum = 0
    while counter+1 < len(intarray):        
        if intarray[counter] >= maximum:
            maximum = intarray[counter]
        else:
            pass
        return None

### The third here came from Egon Schiele - his GitHub page is referenced below.

In [3]:
##### COPIED CODE FROM https://github.com/egonSchiele/grokking_algorithms
def binary_search(list, item):
  # low and high keep track of which part of the list you'll search in.
  low = 0
  high = len(list) - 1

  # While you haven't narrowed it down to one element ...
  while low <= high:
    # ... check the middle element
    mid = (low + high) // 2
    guess = list[mid]
    # Found the item.
    if guess == item:
      return mid
    # The guess was too high.
    if guess > item:
      high = mid - 1
    # The guess was too low.
    else:
      low = mid + 1

  # Item doesn't exist
  return None


# Use sort and search algorithms on variable-length integer arrays.

In [4]:
# Sort each array, ascending
arrays, array_lengths = reset_arrays()
sort_times = []

for i in range(len(arrays)):
    array = arrays[i] #define the array
    print(len(array))
    t1 = process_time()
    array.sort() #sort the array - `reverse` parameter is optional if we wanted to sort descending
    t2 = process_time()
    elapsed = t2-t1
    sort_times.append(elapsed)
    print(f'Sorting time took {elapsed} seconds.')

512
Sorting time took 0.000568000000000346 seconds.
1024
Sorting time took 0.0001660000000001105 seconds.
2048
Sorting time took 0.00018400000000040606 seconds.
4096
Sorting time took 0.00036799999999992394 seconds.
8192
Sorting time took 0.0008360000000000589 seconds.


In [6]:
# Binary search plus sort on unsorted array
binary_sort_and_search_times = []
arrays, array_lengths = reset_arrays()

for i in range(len(arrays)):
    array = arrays[i]
    array_length = len(array)
    
    t1 = process_time()
    array.sort()
    binary_search(array, max(array))
    t2 = process_time()
    
    elapsed = t2-t1
    binary_sort_and_search_times.append(elapsed)
    print(f'Sort and binary search for unsorted array of length {array_length}: {elapsed} seconds.')

Sort and binary search for unsorted array of length 512: 0.00017399999999989646 seconds.
Sort and binary search for unsorted array of length 1024: 0.00030399999999985994 seconds.
Sort and binary search for unsorted array of length 2048: 0.0005999999999999339 seconds.
Sort and binary search for unsorted array of length 4096: 0.0013579999999997483 seconds.
Sort and binary search for unsorted array of length 8192: 0.003962000000000021 seconds.


In [8]:
# Binary search on the sorted array
binary_sorted_search_times = []
arrays, array_lengths = reset_arrays()
for i in range(len(arrays)):
    array = arrays[i]
    array_length = len(array)
    array.sort() #sorting before timer
    
    t1 = process_time()
    binary_search(array, max(array))
    t2 = process_time()
    
    elapsed = t2-t1
    binary_sorted_search_times.append(elapsed)
    print(f'Binary search for array of length {array_length}: {elapsed} seconds.')

Binary search for array of length 512: 0.00021199999999987895 seconds.
Binary search for array of length 1024: 0.0004179999999998074 seconds.
Binary search for array of length 2048: 0.0007340000000000124 seconds.
Binary search for array of length 4096: 0.0014980000000002214 seconds.
Binary search for array of length 8192: 0.0023320000000000007 seconds.


In [10]:
linear_search_times = []
arrays, array_lengths = reset_arrays()
for i in range(len(arrays)):
    array = arrays[i]
    array_length = len(array)
    array.sort()
    
    t1 = process_time()
    max_linear_search(array)
    t2 = process_time()
    
    elapsed = t2-t1
    linear_search_times.append(elapsed)
    print(f'Linear search for sorted array of length {array_length}: {elapsed} seconds.')

Linear search for sorted array of length 512: 1.4000000000180535e-05 seconds.
Linear search for sorted array of length 1024: 7.999999999785956e-06 seconds.
Linear search for sorted array of length 2048: 7.999999999785956e-06 seconds.
Linear search for sorted array of length 4096: 1.0000000000065512e-05 seconds.
Linear search for sorted array of length 8192: 1.3999999999736445e-05 seconds.


### Make a dataframe of the results

In [18]:
df = pd.DataFrame(list(zip(array_lengths, sort_times, linear_search_times,
                           binary_sorted_search_times, binary_sort_and_search_times)),
                 columns=['array_length','sort_time','linear_search_time','binary_search','binary_sort_and_search'])
df

Unnamed: 0,array_length,sort_time,linear_search_time,binary_search,binary_sort_and_search
0,512,0.000568,1.4e-05,0.000212,0.000174
1,1024,0.000166,8e-06,0.000418,0.000304
2,2048,0.000184,8e-06,0.000734,0.0006
3,4096,0.000368,1e-05,0.001498,0.001358
4,8192,0.000836,1.4e-05,0.002332,0.003962


# Plot the results
We expect to see the relative complexity of binary and linear search on sorted arrays as well as binary sort-and-search.

**Use python matplotlib or Seaborn to generate a plot with the size of the random number array on the horizontal axis and with execution time in milliseconds on the vertical axis. The plot should show execution time against array size for linear and binary search algorithms alone.  Discuss the results.**

---------------

**Use Python matplotlib or Seaborn to generate a measure of the size of the data set on the horizontal axis and with execution time in milliseconds on the vertical axis. The plot should show execution time against array size for each form of the algorithm being tested (last four columns of the table).  Discuss the results.**
