# Chapter 10: Sorting and Searching 
### Interview Questions
---

In [1]:
from typing import List
from collections import defaultdict
from itertools import groupby

---
## 10.1 Sorted Merge
- input: two sorted arrays
    - `A` = large enough buffer at end to hold `B`
- merge `B` into `A` in sorted order

In [2]:
def mergeTwoSorted(arrA, arrB, A):
    # two pointers
    a,b = A-1, len(arrB)-1
    # third pointer fills from back to front 
    insert_idx = a+b+1
    
    while a >= 0 and b >= 0:
        if arrA[a] > arrB[b]:
            arrA[insert_idx] = arrA[a]
            a -= 1
        else:
            arrA[insert_idx] = arrB[b]
            b -= 1
        insert_idx -= 1
    
    # don't want double entries in first array
    while b >= 0:
        arrA[insert_idx] = arrB[b]
        insert_idx -= 1
        b -= 1
        
    return arrA

In [3]:
def mergeSorted(arrA,arrB,A):
    a,b = A-1, len(arrB)-1
    insert_idx = a+b+1
    while b>=0:
        if a>=0 and arrA[a] > arrB[b]:
            arrA[insert_idx] = arrA[a]
            a -= 1
        else:
            arrA[insert_idx] = arrB[b]
            b -= 1
        insert_idx -= 1
    return arrA

In [4]:
array1 = [2,5,6,18,22,45,0,0,0,0,0,0]
array2 = [1,2,4,8,6,12]
length1 = [2,5,6,18,22,45]
len1 = len(length1)

In [5]:
# mergeTwoSorted(array1,array2,len1)
mergeSorted(array1,array2,len1)

[1, 2, 2, 4, 5, 6, 8, 6, 12, 18, 22, 45]

#### Time Complexity: `O(a+b)`
- length of two different arrays

#### Space Complexity: `O(1)`
- rewriting first array -> in place solution 

---

## 10.2 Group Anagrams
- write a method to sort an array of strings so anagrams are next to each other 
- Anagram: words that have the same characters but in different orders 

In [6]:
from collections import defaultdict
from itertools import groupby

In [7]:
anagrams = ['abc','xzy','klm','zyx','kml','lmk','mlk','cab','xyz']

In [8]:
def anagramDefaultDict(anagrams):
    temp = defaultdict(list)
    
    for e in anagrams:
        temp[str(sorted(e))].append(e)
    res = list(temp.values())
    return res

In [9]:
anagramDefaultDict(anagrams)

[['abc', 'cab'], ['xzy', 'zyx', 'xyz'], ['klm', 'kml', 'lmk', 'mlk']]

In [10]:
def anagramItertools(anagrams):
    
    temp = lambda anagrams: sorted(anagrams)
    result = [list(val) for key, val in groupby(sorted(anagrams,key=temp), temp)]
    
    return result

In [11]:
anagramItertools(anagrams)

[['abc', 'cab'], ['klm', 'kml', 'lmk', 'mlk'], ['xzy', 'zyx', 'xyz']]

In [12]:
# Bucket Sort

def anagramsSimplePython(anagrams):
    
    def createKey(string):
        k = ''
        for ch in sorted(string):
            k += ch
        return str(k)
    
    def groupWords(data):
        group = dict()
        for x in data:
            if group.get(createKey(x)) == None:
                group[createKey(x)] = [x]
            else:
                group[createKey(x)].append(x)
        return group
    
    
    return groupWords(anagrams)

In [13]:
anagramsSimplePython(anagrams)

{'abc': ['abc', 'cab'],
 'xyz': ['xzy', 'zyx', 'xyz'],
 'klm': ['klm', 'kml', 'lmk', 'mlk']}

#### Time Complexity: `O(a * s * log s)`
- `a` = size of array
- `s` = length of longest string 

#### Space Complexity: `O(a)`
- creating new output 

---

---
## 10.3 Search in Rotated Array
- input: sorted array of `n` integers rotated an unknown number of times
- find element in the array that was sorted arround
- array orignally sorted in increasing order
- Binary Search
    - compare `x` to the midpoint
    - figure out if `x` belongs on the left or right side
    - take into account the inflection point on shifted array
        - find which side is normally ordered 
        - find potential duplicate entries 
    - if one half of the array is ordered, check if `x` is in the range
        - recurse accordingly
    - if left == middle -> search right
    - if middle == right -> search left
    - if both are true -> search both halves 
    

In [14]:
array = [15,16,19,20,25,1,3,4,5,7,10,14]
k = 5

In [15]:
class RecursiveRotatedArray:
    
    def binarySearch(nums,target):
        
        def find_r_idx(left,right):
            if nums[left] < nums[right]:
                return 0
            while left<=right:
                pivot = (left+right)//2
                if nums[pivot] > nums[pivot+1]:
                    return pivot + 1
                else:
                    if nums[pivot] < nums[left]:
                        right = pivot - 1
                    else:
                        left = pivot + 1
        
        def search(left,right):
            while left <= right:
                pivot = (left + right)//2
                if nums[pivot] == target:
                    return pivot
                else:
                    if target < nums[pivot]:
                        right = pivot - 1
                    else:
                        left = pivot + 1
            return -1
        

        
        n = len(nums)
        
        # base case
        if n == 1:
            return 0 if nums[0] == target else -1
        
        rotate_idx = find_r_idx(0,n-1)
        
        #target found -> return index
        if nums[rotate_idx] == target:
            return rotate_idx
        #search entire array
        if rotate_idx == 0:
            return search(0,n-1)
        #search right 
        if target < nums[0]:
            return search(rotate_idx,n-1)
        #search left
        return search(0,rotate_idx)
    

In [16]:
rr = RecursiveRotatedArray
rr.binarySearch(array,k)

8

In [17]:
def fasterBinarySearch(array,target):
    
    start, end = 0, len(array)-1
    
    while start <= end:
        mid = start+(end-start) // 2
        if array[mid] == target:
            return mid 
        elif array[mid] >= array[start]:
            #search left 
            if target >= array[start] and target < array[mid]:
                end = mid - 1
            #search right 
            else:
                start = mid + 1
        else:
            #search right 
            if target <= array[end] and target > array[mid]:
                start = mid + 1
            #search left
            else:
                end = mid - 1
    return -1 

In [18]:
fasterBinarySearch(array,k)

8

#### Time Complexity: `O(log N)`
- `O(n)` max because of potential high number of duplicates

#### Space Complexity: `O(1)`

---

---
## 10.4 Sorted Search: No Size 
- given an array-like data structure `listy` which lacks a size method 
    - has `elementAt(i)` method that returns the element at index `i` in `O(1)` time
    - if `i` is beyond the data structure:
        - returns `-1`
        - why the structure only supports positive integers
- given a `listy` which contains sorted, positive integers, find the index `x` occurs
- Binary Search requires you to know the length of the list -> don't have that here
    - all we know is `-1` will be returned if we go beyond list length 
    - go through the list exponentially: `1,2,4,8,16....`
        - length of list of length `n` can be found in `O(log n)` time 
            - `2ᴷ = N` -> `k = log n`
    - once length is found -> BINARY SEARCH TIME! 
    - TWEAKS:
        - if `mid = -1`:
            - treat as a "too big value"
            - search left
        - if the element is bigger than `x`:
            - jump over to the binary search part early (list is sorted already)

In [19]:
from typing import List

In [20]:
def searchy(listy: List[int], x: int) -> int:
    idx = 1
    while listy[idx] != -1 and listy[idx] < x:
        idx *= 2 
        
    return binarySearchy(listy, x, idx//2, idx)


def binarySearchy(listy: List[int], x: int, low, high):
    while low <= high:
        mid = low + (high-low)//2
        middle = listy[mid]
        if middle > x or middle == -1:
            high = mid -1 
        elif middle < x:
            low = mid + 1
        else:
            return mid
    return -1 

In [21]:
listy = [2,5,6,18,22,45]
x = 22

searchy(listy,x)

4

#### Time Complexity: `O(log n)`
- not knowing the length did not affect the runtime
- length found in `O(log n)` time
- binary search in `O(log n)` time 
- remove constants from Big O notation -> stays as `O(log n)`

---

---
## 10.5 Sparse Search
- input: sorted array of strings w/ dispersed empty strings 
- algorithm: find location of a given string 
- empty strings keep us from using traditional binary search 
- Binary Search Modification:
    - fix comparison against `mid`
    - if `mid` is an empty string:
        - move `mid` to the closest non-empty string
    - Recursive or Iterative Options 

In [22]:
def helper(strangzies: List[str], strang: str, first: int, last: int):
    
    # Base Case 
    if first > last:
        return -1
    
    mid = first + (last-first)//2
    # If `Mid` is EMPTY
    if strangzies[mid].isascii():
        left = mid - 1
        right = mid + 1
        while True:
            if left < first and right > last: 
                return -1 
            elif right <= last and strangzies[right].isascii():
                mid = right
                break 
            elif left >= first and strangzies[left].isascii():
                mid = left
                break 
            right += 1
            left += 1
            
            
    if strang == strangzies[mid]:
        return mid
    elif strangzies[mid] < strang:
        return helper(strangzies, strang, mid+1, last)
    # elif strangzies[mid].compareTo(strang) < 0:
    else:
        return helper(strangzies, strang, first, mid-1)
        

    
def sparseSearch(strangzies: List[str], strang: int):
    
    if strangzies == None or strang == None or strang == '':
        return -1
    return helper(strangzies, strang, 0, len(strangzies)-1)

In [23]:
stringzies = ["at","", "", "","ball","","","car","","","dad","",""]
word = "car"
eword = ""

print(sparseSearch(stringzies,word))
print(sparseSearch(stringzies,eword))
print([(x,y) for x,y in enumerate(stringzies)])

7
-1
[(0, 'at'), (1, ''), (2, ''), (3, ''), (4, 'ball'), (5, ''), (6, ''), (7, 'car'), (8, ''), (9, ''), (10, 'dad'), (11, ''), (12, '')]


#### Time Complexity: `O(n)`
- impossible to be any better 
- no smart way to find the non-empty strings
- worst case you have to look at every element in the array 
- Best to return `-1` here for error 
    - you don't know how many empty strings there are 
    
---

---
## 10.6: Sort Big File
- Have a 20GB file with one string per line, how would you sort the file? 
- *file size tells you they don't want you to bring all the data into memory*
    - only bring part of the data into memory 
- divide file into chunks of `x` megabytes each
    - `x` = amount of memory avaliable 
    - each chunk sorted separately 
        - after sorting:
            - put back into the file system 
        - sort in chunks to save time and memory 
    - once all chunks are sorted
        - merge the chunks one by one
- **called an External Sort**
---