In [None]:
# Topic: Hashing

# Objective: Find all pairs with a given sum

# Description: Given two unsorted arrays A[] of size n and B[] of size m of distinct elements, the task is to find all pairs from both arrays whose sum is equal to x.

# Examples:

# Input :  A[] = {-1, -2, 4, -6, 5, 7}
#          B[] = {6, 3, 4, 0}  
#          x = 8
# Output : 4 4, 5 3

# Input : A[] = {1, 2, 4, 5, 7} 
#         B[] = {5, 6, 3, 4, 8}  
#         x = 9
# Output : 1 8, 4 5, 5 4

# A Naive approach is to simply run two loops and pick elements from both arrays. One by one check that both elements sum is equal to given value x or not

# An Efficient solution of this problem is to hashing. Hash table is implemented using unordered_set in C++. We store all first array elements in hash table. For elements of second array, we subtract every element from x and check the result in hash table. If result is present, we print the element and key in hash (which is an element of first array).

# 1) Initialize an empty hash table s.
# 2) Do following for each element A[i] in A[]
#    (a)    If s[x - A[i]] is set then print the pair (A[i], x - A[i])
#    (b)    Insert A[i] into s.

In [55]:
# unsorted array with distinct elements -> set
# a + b = x
# a = x-b
a = {-1, -2, 4, -6, 5, 7}
b = {6, 3, 4, 0}
x = 8

a = {1, 2, 4, 5, 7} 
b = {5, 6, 3, 4, 8}  
x = 9

a = {1, 0, -4, 7, 6, 4}
b = {0 ,2, 4, -3, 2, 1}
x = 8

In [56]:
# naive approach - using double loop -> O(n^2)
for i in a:
    for j in b:
        if (i + j) == x:
            print i, j
        else:
            pass

4 4
6 2
7 1


In [83]:
a, b = {}, []
print type(a)
print type(b)

<type 'dict'>
<type 'list'>


In [57]:
# efficient approach - using hashing -> O(n)
s = {}
for i in a:
    s[i] = 0
for j in b:
    if x-j in s: 
        # 8-3=5, 3
        # 8-4=4, 4
        print "{} {}".format(x-j, j)

7 1
6 2
4 4


In [72]:
# Topic: Hashing
# Objective: Find all pairs with a given sum
# Description: Given an integer array, output all pairs that sum up to a specific value k.

# Sorting method: O(n log n)
# A more efficient solution would be to sort the array and having two pointers to scan the array from the beginning and the end at the same time.
nums = [1,5,4,2,6,7,2]
def pair_sum_sort(nums, k):
    if len(nums) < 2:
        return None
    sorted_nums = sorted(nums)
    left, right = (0, len(nums)-1)
    while left < right:
        current_sum = sorted_nums[left] + sorted_nums[right]
        if current_sum == k:
            print sorted_nums[left], sorted_nums[right]
            left += 1
        elif current_sum < k:
            left += 1
        else:
            right -= 1
pair_sum_sort(nums, 9)

2 7
2 7
4 5


In [78]:
# Hashing method: O(n)
# a + b = 9
# a = 9 - b
def pair_sum_hashset(nums, k):
    if len(nums) < 2:
        return None
    seen = set()
    output = set()
    for n in nums:
        target = k - n
        if target not in seen:
            seen.add(n)
        else:
            output.add((min(n, target), max(n, target)))
    print "\n".join(map(str, list(output))) # it maps each item to string in the list, then prints each item with newline by joining the items together
pair_sum_hashset(nums, 9)

(4, 5)
(2, 7)


In [None]:
# Topic: Hashing

# Objective: First non-repeating character in a stream

# Description: Given an input stream of n characters consisting only of small case alphabets the task is to find the first non repeating character each time a character is inserted to the stream.

# Example

# Flow in stream : a, a, b, c
# a goes to stream : 1st non repeating element a (a)
# a goes to stream : no non repeating element -1 (5, 15)
# b goes to stream : 1st non repeating element is b (a, a, b)
# c goes to stream : 1st non repeating element is b (a, a, b, c)

# Example:
# Input:
# 2
# 4
# a a b c
# 3
# a a c 
# Output:
# a -1 b b
# a -1 c

In [63]:
stream = "aabc"
indll = [] * 256
repeated = [False] * 256

for i in stream:
    print "reading {} from stream".format(i)
    if not repeated[ord(i)]:
        if not i in indll:
            indll.append(i)
        else:
            indll.remove(i)
        
        if len(indll) != 0:
            print "first non-repeating character so far is {}".format(indll[0])

reading a from stream
first non-repeating character so far is a
reading a from stream
reading b from stream
first non-repeating character so far is b
reading c from stream
first non-repeating character so far is b


In [64]:
# Topic: Hashing

# Problem: Find the Odd Occurence

# Description: Given an array of positive integers. All numbers occur even number of times except one number which occurs odd number of times. Find the number.

# Example:

# Input
# 1
# 5
# 8 4 4 8 23

# Output
# 23

In [77]:
# O(n)
p = [8,4,4,8,23,6,6]
d = {}
for i in p:
    if i not in d:
        d[i] = 1
    else:
        d.pop(i)
if bool(d) is True:
    print d.keys()[0]
else:
    print "odd number not found"

23


In [86]:
# Topic: Hashing

# Objective: Find all four sum numbers

# Description: Given an array A of size N, find all combination of four elements in the array whose sum is equal to a given value K. For example, if the given array is {10, 2, 3, 4, 5, 9, 7, 8} and K = 23, one of the quadruple is “3 5 7 8” (3 + 5 + 7 + 8 = 23).

# Example:
# Input:
# 2
# 5 3
# 0 0 2 1 1 
# 7 23
# 10 2 3 4 5 7 8
# Output:
# 0 0 1 2 $
# 2 3 8 10 $2 4 7 10 $3 5 7 8 $

In [87]:
# Topic: Hashing

# Objective: Find whether an array is subset of another array

# Examples:
# Input: arr1[] = {11, 1, 13, 21, 3, 7}, arr2[] = {11, 3, 7, 1}
# Output: arr2[] is a subset of arr1[]

# Input: arr1[] = {1, 2, 3, 4, 5, 6}, arr2[] = {1, 2, 4}
# Output: arr2[] is a subset of arr1[]

# Input: arr1[] = {10, 5, 2, 23, 19}, arr2[] = {19, 5, 3}
# Output: arr2[] is not a subset of arr1[]

In [89]:
a1 = {11, 1, 13, 21, 3, 7}
a2 = {11, 3, 7, 1}

a2.issubset(a1)

True

In [92]:
# Topic: Hashing

# Objective: Given an array of numbers, return the mode (the number that appears the most times)

In [114]:
# Brute-force method -> O(n^2)
nums = [5,5,5,1,1,1,1,2,3,3,4]
def get_mode_bf(nums):
    mode = None
    max_ct = 0
    for p in nums:
        c = 0
        for n in nums:
            if n == p:
                c += 1
        if c > max_ct:
            max_ct = c
            mode = p
    return mode
get_mode_bf(nums)

1

In [119]:
# Sorting method -> O(n log n)
nums = [5,5,5,1,1,2,3,3,4]
def get_mode_sort(nums):
    if len(nums) == 0:
        return None
    elif len(nums) == 1:
        return nums[0]
    sorted_nums = sorted(nums)
    mode, pnum = None, None
    max_ct, c_ct = 0, 0
    for cnum in sorted_nums:
        if cnum == pnum:
            c_ct += 1
        if c_ct > max_ct:
            max_ct = c_ct
            mode = cnum
        if cnum != pnum:
            c_ct = 1
        pnum = cnum
#         print cnum, c_ct
    return mode
get_mode_sort(nums)

5

In [124]:
# Hashing method -> O(n)
def get_mode_dict(nums):
    if len(nums) == 0:
        return None
    elif len(nums) == 1:
        return nums[0]
    dnum = {}
    max_ct = 0
    mode = None
    for cnum in nums:
        if cnum not in dnum:
            dnum[cnum] = 1
        else:
            dnum[cnum] += 1
        if dnum[cnum] > max_ct:
            max_ct = dnum[cnum]
            mode = cnum
    return mode
get_mode_dict(nums)

5

In [101]:
# Topic: Recursion

# Objective: Sum of a List of Numbers

l = [1,3,5,7,9]
def sum_list_recur(l):
    # while the list still has two values to add, keep recursively calling the function to sum up all values
    if len(l) == 1: # escape clause for recursion method (like while loops)
        return l[0]
    else:
        # first calls function itself until getting to the base case
        # now, it goes back to all of the called functions to sum up the values
        return l[0] + sum_list_recur(l[1:])
sum_list_recur(l)

25

In [127]:
# Topic: Recursion

# Objective: Factorial of number

# Description: fact(n) returns n * n-1 * n-2 * ... 

def fact_recur(n):
    if n <= 1:
        return n
    else:
        return n * fact_recur(n-1)
fact_recur(3)

6

In [143]:
# Topic: Recursion

# Objective: Converting an Integer to a String in Any Base

def tostr_recur(n,base):
    convert_str = "0123456789ABCDEF"
    if n < base:
        return convert_str[n]
    else:
        return tostr_recur(n//base, base) + convert_str[n%base]

print tostr_recur(1453,16)

5AD


In [155]:
# Topic: Hashing

# Objective: Given an array of numbers where one number appears twice, find the repeat number

nums = [7,2,3,4,5,5,6]
def repeat_hash(nums):
    d = {}
    repeat_num = None
    repeat_ct = 0
    for n in nums:
        if n not in d:
            d[n] = 1
        else:
            d[n] += 1
        if d[n] > repeat_ct:
            repeat_num = n
            repeat_ct = d[n]
    return repeat_num
repeat_hash(nums)

5

In [156]:
# Use set() to solve the problem instead of dictionary
def repeat_hashset(nums):
    nums_seen = set()
    for n in nums:
        if n in nums_seen:
            return n
        nums_seen.add(n)
repeat_hashset(nums)

5

In [1]:
# Topic: Hashing

# Objective: Given an array of numbers in the range 1-1000, return a new array with those same numbers, in sorted order. There may be repeats in the input array. If there are, you should include those repeats in your sorted answer.

nums = xrange(1,1001)
def sort_array_hash(nums):
    # get all of the values in dictionary with counts
    d = {}
    for n in nums:
        if n in d:
            d[n] += 1
        else:
            d[n] = 1
    # sort the values
    sorted_nums = []
    for n in range(1,1001):
        if n in d:
            for _ in range(d[n]):
                sorted_nums.append(n)
    return sorted_nums
sort_array_hash(nums)[:10]

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [20]:
# Topic: Probability Theory and Algorithm Design
# Objective: Solving reservoir sampling or stratified random sampling
# Description: Say you have a stream of items of large and unknown length that we can only iterate over once. Create an algorithm that randomly chooses an item from this stream such that each item is equally likely to be selected.

# The simplest concrete example would be a stream with only containing a single item -> make our algorithm return the item with probability of 1
# A stream with two items: hold onto the first item from stream since we don't know if the stream only has one item and when the second item comes along, we want to return one of the two items (each with a probability of 1/2). Generate a random number, R, between 0 and 1, and return the first item if R is less than 0.5 and return the second item if R is greater than 0.5
# A stream with three items: after seeing the second item in the stream, we are now holding onto either the first item or the second item, each with probability of 1/2. When the third element arrives, we know there are only 3 items in the stream, so we need to return the third element with probability 1/3. This means that we have to return the other items with probability 1 - 1/3 = 2/3
    # 1st item: (1/2) * (2/3) = 1/3
    # 2nd item: (1/2) * (2/3) = 1/3
    # 3rd item: 1/3
# Generalize this algorithm to any N:
    # At every step N,
        # Keep the next element in the stream with probability: 1/N (e.g. 1/3 for 3rd item)
        # The probability of keeping the element we are currently holding onto: 1 - (1/N) = (N-1)/N (e.g. (3-1)/3 = 2/3 for other items (1st and 2nd item))
        
N = 4
prob = None
for n in xrange(1,N+1):
    print "stream of {} items:".format(n) 
    for i in xrange(1,n+1):
        if i == n:
            prob = 1./n
            print "    probability of latest ({}) item: {}".format(i, prob)
        else:
            print "    probability of {} item via ({}) * ({}): {}".format(i, prob, (n-1.)/n, prob * (n-1.)/n)

stream of 1 items:
    probability of latest (1) item: 1.0
stream of 2 items:
    probability of 1 item via (1.0) * (0.5): 0.5
    probability of latest (2) item: 0.5
stream of 3 items:
    probability of 1 item via (0.5) * (0.666666666667): 0.333333333333
    probability of 2 item via (0.5) * (0.666666666667): 0.333333333333
    probability of latest (3) item: 0.333333333333
stream of 4 items:
    probability of 1 item via (0.333333333333) * (0.75): 0.25
    probability of 2 item via (0.333333333333) * (0.75): 0.25
    probability of 3 item via (0.333333333333) * (0.75): 0.25
    probability of latest (4) item: 0.25


In [25]:
# Objective: Given a string as your input, delete any reoccurring character, and return the new string.

# using set -> O(n)
mystr = "hello"
st = set()
uniq_str = ""
for s in mystr:
    if s not in st:
        st.add(s)
        uniq_str += s
print uniq_str

# using dictionary -> O(n)
mystr = "hello"
d = {}
uniq_str = ""
for s in mystr:
    if s in d:
        d[s] += 1
    else:
        d[s] = 1
    if d[s] == 1:
        uniq_str += s
print uniq_str

helo
helo


In [70]:
# Objective: Maximum number of partitions that can be sorted individually to make sorted / Max Chunks To Make Sorted 
# Description: Given an array arr that is a permutation of [0, 1, ..., arr.length - 1], we split the array into some number of "chunks" (partitions), and individually sort each chunk.  After concatenating them, the result equals the sorted array. What is the most number of chunks we could have made?
# Strategy: The key to solve this problem is using a stack to track the existing chunk. Each chunk is represented a min and max number. Each chunk is essentially an interval and the interval can not overlap.

arr = [5,4,7,6,8]
# sort partitions using stack class
def sort_partitions_stack(arr):
    # get max partitions
    stack = []
    if arr == None or len(arr) == 0:
        print 0
    for n in arr:
        min_arr, max_arr = n, n
        while(stack != []):
            top = stack[-1]
#             print n, top[1]
            if(n < top[1]):
                min_arr = min(top[0], min_arr)
                max_arr = max(max_arr, top[1])
                stack.pop()
            else:
                break
#         print min_arr,max_arr
        stack.append([min_arr,max_arr])
#     print "final stack:", stack
    print len(stack)
sort_partitions_stack(arr)

def sort_partitions_simple(arr):
    max_seen, total = 0, 0
    for i, v in enumerate(arr, 1):
#         print "index, value:", i, v
        max_seen = max(max_seen, v)
#         print "max_seen, i-1:", max_seen, i-1
        if max_seen == i-1:
            total += 1
    return total
sort_partitions_simple(arr)

3


0