# FreeCodeCamp Algorithms and Data Structures Tutorial Part 3. Sorting and searching algorithms

In [26]:
import random

def is_sorted(values):
    for index in range(len(values) - 1):
        if values[index] > values[index + 1]:
            return False
    return True

In [34]:
def bogo_sort(values):
    '''
    Randomly shuffles values until it sorts values correctly
    '''
    attempts = 0
    while not is_sorted(values):
        random.shuffle(values)
        attempts += 1
    
    print('attempts: ',attempts)
    return values    

In [35]:
nums = [5, 8, 1, 4, 7]

print(bogo_sort(nums))

attempts:  45
[1, 4, 5, 7, 8]


In [1]:
def selection_sort(values):
    '''
    Takes O(n^2) time
    '''
    sorted_list = []
    # print('%-25s %-25s' % (values, sorted_list))
    for i in range(len(values)):
        index_to_move = index_of_min(values)
        sorted_list.append(values.pop(index_to_move))
        # print('%-25s %-25s' % (values, sorted_list))
    return sorted_list

def index_of_min(values):
    min_index = 0
    for i in range(1, len(values)):
        if values[i] < values[min_index]:
            min_index = i
    return min_index

In [53]:
nums = [5, 8, 1, 4, 7]
print(selection_sort(nums))

[5, 8, 1, 4, 7]           []                       
[5, 8, 4, 7]              [1]                      
[5, 8, 7]                 [1, 4]                   
[8, 7]                    [1, 4, 5]                
[8]                       [1, 4, 5, 7]             
[]                        [1, 4, 5, 7, 8]          
[1, 4, 5, 7, 8]


In [2]:
%%timeit
nums = [4, 6, 3, 2, 9, 7, 3, 5]
selection_sort(nums)

4.29 µs ± 173 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [5]:
def sum(numbers):
    total = 0
    for number in numbers:
        total += number
    return total

def recursive_sum(numbers):
    if not numbers:
        return 0
    print('Calling sum(%s)' % numbers[1:])
    remaining_sum = recursive_sum(numbers[1:])
    print('Call to sum(%s) return %d + %d' % (numbers, numbers[0], remaining_sum))
    return numbers[0] + remaining_sum

In [6]:
nums = [1,2,7,9]
recursive_sum(nums)

Calling sum([2, 7, 9])
Calling sum([7, 9])
Calling sum([9])
Calling sum([])
Call to sum([9]) return 9 + 0
Call to sum([7, 9]) return 7 + 9
Call to sum([2, 7, 9]) return 2 + 16
Call to sum([1, 2, 7, 9]) return 1 + 18


19

In [19]:
def quicksort(values):
    '''
    Best case: O(n log n) time
    Worst case: O(n^2) time 
        list is reverse sorted and you
        pick the first element as pivot
    
    More commonly used than merge sort
    This is because operation that merge sort
    performs repeatedly takes longer than that of 
    quicksort
    '''
    if len(values) <= 1:
        return values

    less_than_pivot = []
    greater_than_pivot = []
    pivot = values[0]
    for value in values[1:]:
        if value <= pivot:
            less_than_pivot.append(value)
        else:
            greater_than_pivot.append(value)
    # print('%15s %1s %-15s' % (less_than_pivot, pivot, greater_than_pivot))
    return quicksort(less_than_pivot) + [pivot] + quicksort(greater_than_pivot)

In [20]:
%time
nums = [4, 6, 3, 2, 9, 7, 3, 5]
sorted_numbers = quicksort(nums)
print(sorted_numbers)

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.25 µs
[2, 3, 3, 4, 5, 6, 7, 9]


In [21]:
def merge_sort(values):
    if len(values) <= 1:
        return values
    middle_index = len(values) // 2
    left_values = merge_sort(values[:middle_index])
    right_values = merge_sort(values[middle_index:])
    # print('%15s %-15s' % (left_values, right_values))
    sorted_values = []
    left_index = 0
    right_index = 0
    while left_index < len(left_values) and right_index < len(right_values):
        if left_values[left_index] < right_values[right_index]:
            sorted_values.append(left_values[left_index])
            left_index += 1
        else:
            sorted_values.append(right_values[right_index])
            right_index += 1
    sorted_values += left_values[left_index:]
    sorted_values += right_values[right_index:]
    return sorted_values

In [22]:
%time
nums = [4, 6, 3, 2, 9, 7, 3, 5]
sorted_numbers = merge_sort(nums)
print(sorted_numbers)

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 5.01 µs
[2, 3, 3, 4, 5, 6, 7, 9]


When tested against a list of 1M numbers, quicksort performed marginally faster than merge_sort  
quicksort ~ 11s  
merge sort ~ 15s

## Some notes on Big O Notation

* Tells you number of times an operation is performed
* Doesn't describe duration of operation
* A useful tool for quickly describing how the run time of an algorithm increases as the data set it's operating on gets really big

## Searching algorithms

In [47]:
def index_of_item(collection, target):
    '''
    Linear search 
    O(n) time
    '''
    for i in range(0, len(collection)):
        if target == collection[i]:
            return i
    return None

In [55]:
import random
with open('names.txt') as f:
    names = f.read().splitlines()
names_shuffled = names.copy()
random.shuffle(names_shuffled)
search_names = names_shuffled[:100]
print(names[:10])
print(names_shuffled[:10])

['Michael', 'Christopher', 'Jessica', 'Matthew', 'Ashley', 'Jennifer', 'Joshua', 'Amanda', 'Daniel', 'David']
['Jayesh', 'Arnesha', 'Sheenna', 'Diona', 'Jvon', 'Kayci', 'Delwyn', 'Myka', 'Kalila', 'Frimet']


In [99]:
%%timeit
for n in search_names:
    index = index_of_item(names, n)

46.7 ms ± 2.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [69]:
sorted_names = quicksort(names)

In [None]:
sorted_names[len(sorted_names)-10:]

['Zoua',
 'Zubair',
 'Zubin',
 'Zulay',
 'Zuleika',
 'Zulema',
 'Zuleyka',
 'Zully',
 'Zulma',
 'Zvi']

In [101]:
def binary_search(collection, target):
    '''
    O(log n) time
    '''
    first = 0
    last = len(collection) - 1
    while first <= last:
        midpoint = (first + last) // 2
        if collection[midpoint] == target:
            return midpoint
        elif collection[midpoint] < target:
            first = midpoint + 1
        else:
            last = midpoint - 1
    return None

**Binary search is clearly faster than linear search**

In [100]:
%%timeit
for n in search_names:
    index = binary_search(sorted_names, n)
    # print(index)

245 µs ± 13.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [81]:
search_names[0] == sorted_names[7296]

True

In [82]:
search_names[-1] == sorted_names[10261]

True