# CH12 Hash Tables

In [None]:
# Some important notes:
# A hash table is a data structure used to store keys, optionally, with corresponding values. Inserts, deletes and lookups run in O(1) time on average.
# Compared to binary search trees, inserting and deleting in a hash table is more efficient
# If you want to update a key in a hash table, first delete it and then update it
# As a rule of thumb, avoid using mutable objects as keys
# Hash table can be represented using a dictionary. Trie which is a tree data structure can also be used => unlike BST, nodes in the tree do not store a key. Instead, the nodes's position in the tree defines the key which it is associated with.
# Hash tables have best theoretical and real world performance for lookup, insert and delete=>Avg Time Complexity: O(1)
# A single insert can take O(n) if the hash table has to be resized
# The following are the properties of hash table libraries in Python:
# - Common hashtable data structures in python are: set, dict, collections.defaultdict, collections.counter => set stores only keys whereas others store key-value pairs
# - a collections.defaultdict returns the default value of the type that was specified when the collection was instantiated,e.g.,ifd = collections.defaultdict(list),thenif k not in d[k] is []
# - collections.counter is used for counting the number of occurences of keys 
# - The most important operations for set are s.add(42),s.remove(42),s.discard(123),x in s, as well as s <= t (is s a subsetof t), and s - t (elements in s that are not in t).
# - In key-value collections, to iterate over keys-value pair iterate over items()
# Only immutable data types can act as keys 

## An application of hash tables

In [3]:
# Anagrams are popular word play puzzels, whereby rearranging letters of one set of words, you get another set of words.

# Task: Test if one word is anagram of another
# Sol: Sort both words, if they result in equal string after sorting then they are anagrams
# Time Complexity: O(mlogm) where m is the length of the string

# Task: write a program that takes as input a set of words and retums groups of anagrams for those words.
# Brute Force: Take each word -> sort it-> compare it with all other words => Time Complexity = O(n^2*m*logm)
# Optimized: Take each string -> sort it-> add the sorted string as a key to the dict if it is not already present

import collections
# Time Complexity: O(nmlogm) to sort n strings + O(nm) for insertions => O(nmlogn)
def find_anagrams(input_list):
    sorted_strings_to_anagrams = collections.defaultdict(list) # creates a dict where values of each key are stored in a list
    for s in input_list:
        #print(sorted(s))
        sorted_strings_to_anagrams[''.join(sorted(s))].append(s)
    
    return [group for group in sorted_strings_to_anagrams.values() if len(group) >= 2]

input_list = ['debitcard', 'elvis', 'silent', 'badcredit', 'lives', 'freedom', 'listen', 'levis', 'money']
print(f'The groups of anagrams are:{find_anagrams(input_list)}')

The groups of anagrams are:[['debitcard', 'badcredit'], ['elvis', 'lives', 'levis'], ['silent', 'listen']]


In [23]:
# Variant: Design an O(nm) algorithm for the same problem, assuming strings are made up of lower case English characters.
# Sol: HashMap<HashMap, ArrayList> here the inner hashmap is used to store the freq of each char which acts as key for the outer hash map
# Ref: https://www.geeksforgeeks.org/given-a-sequence-of-words-print-all-anagrams-together/

def find_anagrams_lower(input_list):
    sorted_strings_to_anagrams = collections.defaultdict(list)
    for s in input_list:
        # Creating inner hashmap for each string to store the freq of each char
        char_freq_map = collections.defaultdict(int)
        for c in s:
            char_freq_map[c] += 1
        # Adding string to the outer dict
        key_present = False
        for k in sorted_strings_to_anagrams.keys():
            if char_freq_map == dict(k):
                key_present = True
                sorted_strings_to_anagrams[k].append(s)
        if not key_present: 
            sorted_strings_to_anagrams[tuple(char_freq_map.items())].append(s) # dict cannot be key to another dict so converting it to tuple
            
    return [group for group in sorted_strings_to_anagrams.values() if len(group) >= 2]

input_list = ['debitcard', 'elvis', 'silent', 'badcredit', 'lives', 'freedom', 'listen', 'levis', 'money']
print(f'The groups of anagrams are:{find_anagrams_lower(input_list)}')

The groups of anagrams are:[['debitcard', 'badcredit'], ['elvis', 'lives', 'levis'], ['silent', 'listen']]


## Design of a hashable class

In [4]:
# Consider a class that represents contacts. For simplicity, assume each contact is a string. Suppose
# it is a hard requirement that the individual contacts are to be stored in a list and it's possible that
# the list contains duplicates. Two contacts should be equal if they contain the same set of strings,
# regardless of the ordering of the strings within the underlying list. Multiplicity is not important,
# i.e., three repetitions of the same contact is the same as a single instance of that contact

# We have to create a hashable class and define our own methods for equal operation
class ContactList:
    def __init__(self, names):
        self.names = names
    
    # Time Complexity: O(n) where n is the number of strings in the list
    def __hash__(self):
        # we would like to convert list to set to avoid duplicates and then hash the set
        # But set is a mutable data type so it cannot be hashed => we can use frozenset instead
        return hash(frozenset(self.names))
    
    def __eq__(self, other):
        return set(self.names) == set(other.names)

# Task: Merge all contacts to a single list
def merge_contact_list(contacts):
    return list(set(contacts))

### collections.Counter Example

In [8]:
from collections import Counter
myList = [1, 1, 2, 3, 4, 5, 2, 3, 4, 2, 1, 2, 3]
print(f'Counter for the given list:{Counter(myList)}')
c = Counter(a=3, b=1)
d = Counter(a=1, b=2)
print(f'c+d:{c+d}') # adding two counters
print(f'c-d:{c-d}') # subtract (keep only positive counts)
print(f'c&d:{c&d}') # intersection: min(c[x], d[x]) 
print(f'c|d:{c|d}') # union: max(c[x], d[x]) 

Counter for the given list:Counter({2: 4, 1: 3, 3: 3, 4: 2, 5: 1})
c+d:Counter({'a': 4, 'b': 3})
c-d:Counter({'a': 2})
c&d:Counter({'a': 1, 'b': 1})
c|d:Counter({'a': 3, 'b': 2})


## 12.1 Test for palindromic permutations

In [10]:
# Task: Write a program to test whether the letters forming a string can be permuted to form a palindrome.
# For example, "edified" can be permuted to form "deified".

# Brute Force: Generate all permutations of the input string and then test each for palindromicity
# Optimized: In order to be a palindrome, all chars should appear even no.of time or at most one char can appear odd no.of times

# Time Complexity: O(N)
def can_get_palindrome(string):
    # return sum(v % 2 for v in Counter(string).values()) <= 1 # code given in book
    char_counter = Counter(string)
    #print(char_counter)
    odd_count_chars = {}
    for key, value in char_counter.items():
        if value % 2 == 1:
            odd_count_chars[key] = value
            if len(odd_count_chars) > 1:
                return False
    return True

string = 'edified'
can_get_palindrome(string)

Counter({'e': 2, 'd': 2, 'i': 2, 'f': 1})


True

## 12.2 Is an anonymous letter constructible?

In [5]:
# Task: Write a program which takes text for an anonymous letter and text for a magazine and determines if it is possible to write the anonymous letter using the magazine. 
# The anonymous letter can be written using the magazine if for each character in the anonymous letter, the number of times it appears in the anonymous letter is no more than the number of times it appears in the magazine.
# Brute Force: Count the number of occurences of each charachter by parsing the letter and magazine multiple times.
import collections
# Better Approach: Pass the letter once and build a hash table then decrement the count of each char while parsing the magazine
# Time Complexity: O(m+n) Space complexity: size of the hash table => O(L)
# If the chars are ASCII encoded, then we can eliminate hash table and just use an integer array of size 256.
def is_letter_constructible_from_magazine(letter, magazine):
    char_freq_for_letter = collections.Counter(letter) # builds a hash table for each occurence of the char in letter
    
    for c in magazine:
        if c in char_freq_for_letter:
            char_freq_for_letter[c] -= 1
            if char_freq_for_letter[c] == 0:
                del char_freq_for_letter[c]
                if not char_freq_for_letter:
                    return True
    
    return not char_freq_for_letter

def is_letter_constructible_from_magazine_pythonic(letter, magazine):
    return not (collections.Counter(letter) - collections.Counter(magazine))

letter = "ababab"
magazine = "ababab"
print(f'Is {letter} constructible from {magazine}:{is_letter_constructible_from_magazine(letter, magazine)}')
print(f'Pythonic Is {letter} constructible from {magazine}:{is_letter_constructible_from_magazine_pythonic(letter, magazine)}')
letter = "ababab"
magazine = "abababc"
print(f'Is {letter} constructible from {magazine}:{is_letter_constructible_from_magazine(letter, magazine)}')
print(f'Pythonic Is {letter} constructible from {magazine}:{is_letter_constructible_from_magazine_pythonic(letter, magazine)}')
letter = "abababc"
magazine = "ababab"
print(f'Is {letter} constructible from {magazine}:{is_letter_constructible_from_magazine(letter, magazine)}')
print(f'Pythonic Is {letter} constructible from {magazine}:{is_letter_constructible_from_magazine_pythonic(letter, magazine)}')

Is ababab constructible from ababab:True
Pythonic Is ababab constructible from ababab:True
Is ababab constructible from abababc:True
Pythonic Is ababab constructible from abababc:True
Is abababc constructible from ababab:False
Pythonic Is abababc constructible from ababab:False


## 12.3 Implement an ISBN cache

In [8]:
# The Intemational Standard Book Number (ISBN) is a unique commercial book identifier. It is a string of length 10. The first 9 characters are digits; the last character is a check character. The check character is the sum of the first 9 digits, mod 11, with 10 represented by 'X'.
# Task: Create a cache for looking up prices of books identified by their ISBN. You implement lookup, insert, and remove methods. Use the Least Recently Used (LRU) policy for cache eviction. 
# If an ISBN is already present, insert should not change the price, but it should update that entry to be the most recently used entry. Lookup should also update that entry to be the most recently used entry.

# Hash tables are ideal for lookup. we need to create a hash table with ISBN as key with {price, least recent time a lookup was done} as values.
# Insert takes O(1), lookup takes O(1), once the cache is full we have to find the entry which is not recently used - O(n) where n is the size of the cache

# One way to improve performance is to use lazy garbage collection. If we want a cache of size n, we will not delete any entries until the hash table size grows to 2n entries.
# Drawback: O(n) increase in memory, O(n) time needed for lookups that miss on a full cache

# Optimized approach: Keys Queue using linked list + Hash table stores location of each key in the queue
# An altemative is to maintain a separate queue of keys. In the hash table we store for each key a reference to its location in the queue. 
# Each time an ISBN is looked up and is found in the hash table, it is moved to the front of the queue. (This requires us to use a linked list implementation of the queue, so that items in the middle of the queue can be moved to the head.) 
# Then the length of the queue exceeds n,when a new element is added to the cache, the item at the tail of the queue is deleted from the cache, i.e., from the queue and the hash table.
# Time complexity: Lookup - O(1) for the hash table lookup, O(1) - for updating the queue. So, all operations take O(1) time complexity
# Positions:
# - recently lookedup or inserted element will be present at the beginning of the linked list => left
# - least recently lookedup element will be present at the end of the linked list =>right
class LRUCache:
    def __init__(self, capacity):
        # Ordered dict-{key:reference_to_position_of_key_in_circular_doubly_linked_list}
        # Ref: https://stackoverflow.com/questions/33748340/how-does-pythons-ordereddict-remember-elements-inserted
        # Ref: https://hg.python.org/cpython/file/2.7/Lib/collections.py 
        # ordered_dict[key] = [PREV, NEXT, KEY] the key in the list holds the value
        self._isbn_price_table = collections.OrderedDict()
        self._capacity = capacity
    
    def lookup(self, isbn):
        if isbn not in self._isbn_price_table:
            return -1
        # Logic given in book - but I think this will insert the isbn at the end whereas we want the isbn at the beginning
        #price = self._isbn_price_table.pop(isbn) # removing isbn
        #self._isbn_price_table[isbn] = price # then inserting it again - 
        
        # we can move the key to the front 
        # Ref: https://docs.python.org/3/library/collections.html#collections.OrderedDict.move_to_end
        self._isbn_price_table.move_to_end(isbn, last = False) # moving key to the beginning
        return self._isbn_price_table[isbn] # returning value
    
    def insert(self, isbn, price):
        if isbn in self._isbn_price_table:
            return lookup(self, isbn) 
        elif self._capacity <= len(self._isbn_price_table):
            self._isbn_price_table.popitem() # removes the last entry
        
        self._isbn_price_table[isbn] = price # inserting an element at the end 
        self._isbn_price_table.move_to_end(isbn, last = False) # moving it to the beginning 
            
    def erase(self, isbn):
        return self._isbn_price_table.pop(isbn, None) is not None
    
cache = LRUCache(2)
cache.insert(1, 1)
print(cache._isbn_price_table)
cache.insert(2, 2)
print(cache._isbn_price_table)
cache.lookup(1)
print(cache._isbn_price_table)
cache.insert(3, 3)
print(cache._isbn_price_table)
cache.lookup(2)
print(cache._isbn_price_table)
cache.insert(4, 4)
print(cache._isbn_price_table)
cache.lookup(1)
print(cache._isbn_price_table)
cache.lookup(3)
print(cache._isbn_price_table)
cache.lookup(4)
print(cache._isbn_price_table)

OrderedDict([(1, 1)])
OrderedDict([(2, 2), (1, 1)])
OrderedDict([(1, 1), (2, 2)])
OrderedDict([(3, 3), (1, 1)])
OrderedDict([(3, 3), (1, 1)])
OrderedDict([(4, 4), (3, 3)])
OrderedDict([(4, 4), (3, 3)])
OrderedDict([(3, 3), (4, 4)])
OrderedDict([(4, 4), (3, 3)])


## 12.4 Compute the LCA, optimizing for close ancestors

In [9]:
# Design an algorithm for computing the LCA of two nodes in a binary tree. 
# The algorithm's time complexity should depend only on the distance from the nodes to the LCA.

# Approach: Traverse up from node1 and store the visited nodes in the hash table. Simultaneously, traverse up from node and check if the node is already present in the hash table if not insert it.
# Time Complexity: O(h) Space Complexity:O(D0 + D1) where D0 is the distance from the LCA to the first node and D1 is the distance to the second node.
 
# But the approach specified in 9.4 uses Time Complexity:O(h) and Space Complexity:O(1) - which is better than hash table so not implementing it here

## 12.5 Find the nearest repeated entries in an array

In [27]:
# Write a program which takes as input an array and finds the distance between a closest pair of equal entries. 
# For example, if s = <"All", "work", "and", "no", "play", "makes", "for", "no", "work", "no", "fun","and", "no", "results">, 
# then the second and third occurrences of "no" is the closest Pair.

# Brute Force: Iterate over all pairs of entries -> check if they are same -> update min distance if it is smalles than the curr values.
# Time Complexity: O(N)

# Approach: We can store the set of indices corresponding to a given value using a hash table and iterate over all such sets.
# Optimized approach: Just store the latest index of the element in the hash table instead of all the indices.
# Time Complexity: O(n) Space Complexity: O(d) where d is the number of distinct entries in the array
def find_nearest_repetition(paragraph):
    word_to_latest_index, nearest_repeated_distance = {}, float('+inf')
    for i, word in enumerate(paragraph.split(" ")):
        #print(f'i={i} word={word}')
        if word in word_to_latest_index:
            latest_ind = word_to_latest_index[word]
            nearest_repeated_distance = min(nearest_repeated_distance, i - latest_ind)
        word_to_latest_index[word] = i
    return nearest_repeated_distance if nearest_repeated_distance != float('+inf') else -1

s = "All work and no play makes for no work no fun and no results"
print(f'The smalles distance is {find_nearest_repetition(s)}')

The smalles distance is 2


## 12.6 Find the smallest subarray covering all values

In [38]:
# Write a program which takes an array of strings and a set of strings, and return the indices of the starting and ending index of a shortest subarray of the given array that "covers" the set, i.e., contains all strings in the set.
# Brute Force: Iterate over all subarays and test if the subarray contains all strings in the set. 
# - If the array size is n => number of subarrays is O(n^2). 
# - Testing if the subarray covers the set is O(n) using hash table
# - Overall Time complexity: O(n^3)

# Approach: At index i, store which strings in the set remain to be covered and then increase the length of the subarray - stop immediately once the set is covered.
# we can further improve by using the work done at i while calculating for i+1
# use the concept of two pointers left, right => right keeps incrementing in for loop, left increments until it covers the set
# Time Complexity: O(n) => both left and right pointer can increment at most n times so this algo has linear time complexity.
Subarray = collections.namedtuple('Subarray', ('start', 'end'))
def find_smallest_subarray_covering_set(paragraph, keywords):
    para_word_list = paragraph.split(" ")
    keywords_to_cover = collections.Counter(keywords)
    #print(keywords_to_cover)
    result = Subarray(-1, -1)
    remaining_to_cover = len(keywords)
    left = 0
    for right, p in enumerate(para_word_list):
        #print(f'right={right}, p={p} remaining={remaining_to_cover}')
        if p in keywords:
            keywords_to_cover[p] -= 1
            if keywords_to_cover[p] == 0:
                remaining_to_cover -= 1
                #print(f'right={right}, p={p} remaining={remaining_to_cover}')
        # Keep advancing left until we find the min subarray that covers the set and also we place left such that it no longer convers the entire set
        while remaining_to_cover == 0:
            if (result == (-1, -1)) or ((right - left) < result[1] - result[0]):
                result = (left, right) # update result to store the indices of the smallest subarray that covers the set
            pl = para_word_list[left]
            if pl in keywords:
                keywords_to_cover[pl] += 1
                if keywords_to_cover[pl] > 0:
                    remaining_to_cover += 1
            left += 1
    return result

paragraph = "apple banana apple apple dog cat apple dog banana apple cat dog"                
keywords = ["banana", "cat"]
print(f'The smallest subarray is {find_smallest_subarray_covering_set(paragraph, keywords)}')

The smallest subarray is (8, 10)


In [1]:
# variant: Given an array A, find a shortest subarray A[i, j] such that each distinct value present in A is also present in the subarray.
# Incomplete
import collections
Subarray = collections.namedtuple('Subarray',('start','end'))
def find_smallest_subarray_covering_distinct_words(paragraph):
    para_word_list = paragraph.split(" ")
    result = Subarray(-1, -1)
    
    word_dict = {}
    for right, para in enumerate(para_word_list):
        if not para in word_dict:
            word_dict[para] = 1
    print(word_dict)   
    remaining_words_to_cover = len(word_dict)
    left = 0
    for right, para in enumerate(para_word_list):
        if para in word_dict and word_dict[para] == 1:
            remaining_words_to_cover -= 1
            word_dict[para] -= 1
        
        while(remaining_words_to_cover == 0):
            if (result == (-1, -1)) or ((right - left) < result[1] - result[0]):
                result = (left, right) # update result to store the indices of the smallest subarray that covers the set
            pl = para_word_list[left]
            if pl in word_dict and word_dict[pl] == 0:
                word_dict[pl] += 1
                remaining_words_to_cover += 1
            left += 1
            
            
    print(word_dict)
    #remaining_words_to_cover = len(word_dict)
    #left = 0
    #for right, p in enumerate(para_word_list):
        
paragraph = "apple banana apple apple dog cat apple dog banana apple cat dog"
print(f'The smallest subarray is {find_smallest_subarray_covering_distinct_words(paragraph)}')

{'apple': 1, 'banana': 1, 'dog': 1, 'cat': 1}
{'apple': 1, 'banana': 0, 'dog': 0, 'cat': 0}
The smallest subarray is None


## 12.7 Find smallest subarray sequentially covering all values

In [12]:
# Task: Write a program that takes two arrays of strings, and return the indices of the starting and ending
# index of a shortest subarray of the first array (the "paragraph" array) that "sequentially covers",
# i.e., contains all the strings in the second array (the "keywords" array), in the order in which they
# appear in the keywords array. You can assume all keywords are distinct

# Brute Force: check if keywords occur sequentially in each subarray => Time complexity:O(n^3)

# Approach: Use hash tables: 
# 1.{keyword:keyword_index_in_keywords_array}, 
# 2.{keyword_index_in_keywords_array:keyword_latest_occurence_ind_in_para}, 
# 3.{keyword_index_in_keywords_array:shortest_subarray_length}
import collections
Subarray = collections.namedtuple('Subarray',('start','end'))
# Time Complexity: O(N) where N is the length of the paragraph array.
# Space Complexity: O(m) where m is the number of keywords - which is used by the three hash tables
def find_smallest_sequentially_covering_subarray(paragraph, keywords):
    paragraph_words = paragraph.split(" ")
    #print(f'para:{paragraph_words}')
    keyword_to_idx = {k: i for i, k in enumerate(keywords)} # hashtable that stores {keyword:index} 
    
    latest_occurence = [-1] * len(keywords) # stores the index at which a keyword occured recently in the paragraph
    shortest_subarray_length = [float('inf')] * len(keywords) # shortest sub array length at the index where a keyword occured recently
    
    shortest_distance = float('inf')
    result = Subarray(-1, -1)
    for i, p in enumerate(paragraph_words):
        if p in keyword_to_idx:
            keyword_idx = keyword_to_idx[p]
            latest_occurence[keyword_idx] = i # update the latest occurence index for the keyword
            if(keyword_idx == 0): # first keyword
                shortest_subarray_length[keyword_idx] = 1
            elif shortest_subarray_length[keyword_idx - 1] != float('inf'): # check if all the keywords occured at least once before this keyword
                distance_to_previous_keyword = i - latest_occurence[keyword_idx - 1]
                shortest_subarray_length[keyword_idx] = distance_to_previous_keyword + shortest_subarray_length[keyword_idx - 1]
        
            # check if all keywords got covered sequentially. If so, update the shortest_distance accordingly
            if(keyword_idx == len(keywords)-1 and shortest_subarray_length[-1] < shortest_distance):
                shortest_distance = shortest_subarray_length[-1]
                result = Subarray(i - shortest_distance + 1, i)
        
    return result

paragraph = "apple banana cat apple"
keywords = ["banana", "apple"] # correct ans :[1,3], even though [0,1] covers keywords but it does not cover them sequentially
print(f'The indices of the shortest subarray covering the keywords sequentially is {find_smallest_sequentially_covering_subarray(paragraph, keywords)}')

The indices of the shortest subarray covering the keywords sequentially is Subarray(start=1, end=3)


## 12.8 Find the longest subarray with distinct entries

In [15]:
# Task: Write a program that takes an array and returns the length of a longest subarray with the property that all its elements are distinct.
# Brute Force: For each subarray, check if all the words in it are distict. Time Complexity:O(n^3)

# Approrch: Suppose if i to j is the longest subarray with distinct entries and when j+1 comes in, two cases are possible:
# - if j+1 is not present in the longest subarray, it gets appended to the longest subarray
# - if j+1 is present in the longest subarray, then it gets appended to the subarray beginning at the most recent occurence of the keyword at j+1 till j+1
# Time Complexity: O(N) 
def longest_subarray_with_distinct_entries(A):
    most_recent_occurence = {} # hast table to store the index at which keyword occurred most recently
    longest_dup_free_subarray_start_idx = result = 0
    for i, a in enumerate(A):
        if a in most_recent_occurence:
            dup_idx = most_recent_occurence[a]
            # check if a is present in the longest_subarary
            if(dup_idx >= longest_dup_free_subarray_start_idx):
                # changing the longest subarray we have till now - so check its length and if it is larger than result => update result
                result = max(result, i - longest_dup_free_subarray_start_idx)
                longest_dup_free_subarray_start_idx = dup_idx + 1 # starting a new subarray
        most_recent_occurence[a] = i
    return max(result, (len(A) - longest_dup_free_subarray_start_idx)) # comparing result with last subarry

A = ['f','s','f','e','t','w','e','n','w','e']
print(f'The length of the longest subarray with distinct entries is {longest_subarray_with_distinct_entries(A)}')

The length of the longest subarray with distinct entries is 5


## 12.9 Find the length of a longest contained interval

In [19]:
# Task: Write a program which takes as input a set of integers represented by an array, and returns the size of a largest subset of integers in the array having the property that if two integers are in the subset, then so are all integers between them.
# Example: Input:(3,-2,7,9,8,7,2,0, -1,5,8) Largest subset:{-2,-1,0,1,2,3} should return 6

# Brute Force: Sort the array and then iterate through it, recording for each entry the largest subset with the desired property ending at that entry

# Approach: Put distinc elements of the input array in a set. Pick an element, search for element-1 and element +1 if found remove those from set and keep on searching
# Time Complexity: O(N) where N is the size of the input array. We add elemets to set one time and remove elements from the set one time.
def longest_contained_range(A):
    unprocessed_entries = set(A)
    
    max_interval_size = 0
    while unprocessed_entries:
        a = unprocessed_entries.pop()
        # search if values on both sides of a are present in the set
        lower_bound = a - 1# first search for a-1, if a-1 is present then search for a-2, and so on
        while lower_bound in unprocessed_entries:
            unprocessed_entries.remove(lower_bound) # Calculating interval using this will result in same result - so removing it
            lower_bound -= 1
        upper_bound = a + 1 # search for a+1, if a+1 is present then search for a+2 and so on
        while upper_bound in unprocessed_entries:
            unprocessed_entries.remove(upper_bound)# Calculating interval using this will result in same result - so removing it
            upper_bound += 1
        max_interval_size = max(max_interval_size, upper_bound - lower_bound - 1)
    
    return max_interval_size
            
A = [10, 5, 3, 11, 6, 100, 4]
print(f'The length of longest contained interval is {longest_contained_range(A)}')

The length of longest contained interval is 4


## 12.10 Compute all string decompositions

In [23]:
# Write a program which takes as input a string (the "sentence") and an array of strings (the "words"), and returns the starting indices of substrings of the sentence string which are the concatenation
# of all the strings in the words array. Order concatenation of words is immaterial. All words should appear once in the concatenation.

# Brute Force:  #Incomplete

def find_all_substrings(s, words):
    def match_all_words_in_dict(start):
        curr_string_to_freq = collections.Counter()
        for i in range(start, start + len(words) * unit_size, unit_size):
            curr_word = s[i:i + unit_size]
            it = word_to_freq[curr_word]
            if it == 0:
                return False
            curr_string_to_freq[curr_word] += 1
            if curr_string_to_freq[curr_word] > it:
                return False
            return True
    word_to_freq = collections.Counter(words)
    unit_size = len(words[0])
    return [i for i in range(len(s) - unit_size * len*(words) + 1) if match_all_words_in_dict(i)]

sentence = "amanaplanacanal"
words = ["can", "apl", "ana"]
print(f'The substring indices are :{find_all_substrings(sentence, words)}')

The substring indices are :None


## 12.11 Test the collatz conjecture

In [3]:
# The Collatz conjecture is the following: Take any natural number. If it is odd, triple it and add one;
# if it is even, halve it. Repeat the process indefinitely. No matter what number you begin with, you
# will eventually arrive at 1.

# Task: Test the Collatz conjecture for the first n positive integers
# Brute Force: Generate convergence seq for each number and then return True if the seq converges at 1 for each of them.
# Another Approach: The question is open-ended - so just give a good heuristic and code it well
# Collatz hypothesis can fail in two ways
# -a sequence returns to a previous number in the sequence, which implies it will loop forever,(save all numbers encountered in a set)
# - or a sequence goes to infinity.(overflow)
# The following tricks can be used to speedup computation:
# - if even number skip the check as it is immediately divided by 2.
# - save all the already verified numbers that converges to 1 in a set 
def test_collatz_conjecture(n):
    verified_numbers = set() # stores odd numbers already tested to converge to 1
    
    for i in range(3, n+1):
        sequence = set()
        test_i = i
        while test_i >= i:
            if test_i in sequence:
                return False # encountered number that is already visited - infinite loop
            sequence.add(test_i)
            if test_i % 2: #odd number
                if test_i in verified_numbers:
                    break
                verified_numbers.add(test_i)
                test_i = 3 * test_i + 1
            else:
                test_i //= 2
    return True

print(test_collatz_conjecture(11))
print(test_collatz_conjecture(100))

True
True
