# STRINGS

## Longest common prefix

In [None]:
# DO NOT USE: ELEGANT, BUT EXPENSIVE ( O(nlogn)? )
from typing import List
def lcp2(m: List[str]) -> str:
    '''
    Given a list of strings, returns the longest common prefix
   
    :param   list m: list of strings
    :return: longest common prefix
    :rtype:  str
    '''
    # edge cases    
    if not m:
        return ''
    elif len(m) == 1:
        return m[0]        
       
    # sorting & slicing
    s1, s2 = min(m), max(m)                    # lexicographical sorting of strings (expensive)
    for i in range(len(s1)):
        if s1[i] != s2[i]:
            return s1[:i]
    return s1


arr  = [
         'c://docs/ppt/mail/',
         'c://docs/ppt/mail',
         'c://docs/tpt/mail/',
         'c://docs/ppts/mail/',
       ]
lcp2(arr)

## All palindromic partitions of a string
All such partitions in which every element is a palindrome string.  
Example: "bcc" => [["b", "c", "c"], ["b", "cc"]] OR "geeks" => [["g", "e", "e", "k", "s"], ["g", "ee", "k", "s"]]

Solution:  
In recursion, when we are on index i, we incrementally check all substrings starting from i for being palindromic. If found, we recursively solve the problem for the remaining string and add this in our solution.
* Maintain a 2D vector to store all possible partitions + temp vector to store current partition, new start index of string to check partitions as we have already checked partitions before this index.
* Keep on iterating further on string and check if it is palindrome or not.
* If palindrome - add it to current partitions vector and recurse on this new string if it is not the end of the string. After coming back again change the current partition vector to the old one as it might have changed in the recursive step.
* If we reach the end of string while iterating - we have our partitions in temp vector so we will add them to results

In [77]:
def check_palindrome(string):
        
    if not string:
        return False
    return string == string[::-1]


def add_strings(v, s, temp, index): 
      
    # Iterate all indexes, recursively add remaining partitions if current str is palindrome
    length = len(s)
    string = ""
  
    current = temp[:] 
  
    if index == 0: 
        temp = [] 
    for i in range(index, length): 
        string += s[i] 
        if check_palindrome(string): 
            temp.append(string) 
            if i + 1 < length: 
                add_strings(v, s, temp[:], i + 1) 
            else: 
                v.append(temp) 
            temp = current 
            
            
def partition(s, v):
      
    # Generate all palindromic partitions of 's' and store result in 'v' 
    temp = [] 
    add_strings(v, s, temp[:], 0)
    
    for item in v:
        print(' '.join(item))
    
    return v
 
s = "abbacabba"
partitions = [] 
partition(s, partitions) 

a b b a c a b b a
a b b a c a bb a
a b b a c abba
a b b aca b b a
a b b aca bb a
a b bacab b a
a bb a c a b b a
a bb a c a bb a
a bb a c abba
a bb aca b b a
a bb aca bb a
a bb bbacabb a
abba c a b b a
abba c a bb a
abba c abba
abba abbacabba


[['a', 'b', 'b', 'a', 'c', 'a', 'b', 'b', 'a'],
 ['a', 'b', 'b', 'a', 'c', 'a', 'bb', 'a'],
 ['a', 'b', 'b', 'a', 'c', 'abba'],
 ['a', 'b', 'b', 'aca', 'b', 'b', 'a'],
 ['a', 'b', 'b', 'aca', 'bb', 'a'],
 ['a', 'b', 'bacab', 'b', 'a'],
 ['a', 'bb', 'a', 'c', 'a', 'b', 'b', 'a'],
 ['a', 'bb', 'a', 'c', 'a', 'bb', 'a'],
 ['a', 'bb', 'a', 'c', 'abba'],
 ['a', 'bb', 'aca', 'b', 'b', 'a'],
 ['a', 'bb', 'aca', 'bb', 'a'],
 ['a', 'bb', 'bbacabb', 'a'],
 ['abba', 'c', 'a', 'b', 'b', 'a'],
 ['abba', 'c', 'a', 'bb', 'a'],
 ['abba', 'c', 'abba'],
 ['abba', 'abbacabba']]

## Is circle?
Robot is in position (0, 0) and makes moves R (Right), L (Left), U (Up) and D (down) encoded into a string. Determine if  the robot made a circle.

In [129]:
def is_circle(moves):
    
    moves = moves.upper()        
    dict_moves = {  'U' : 0,
                    'D' : 0,
                    'R' : 0,
                    'L' : 0  }
        
    for char in moves:
        dict_moves[char] += 1
                
    return dict_moves['L'] == dict_moves['R'] and dict_moves['U'] == dict_moves['D']


moves1 = 'lr'
moves2 = 'lurd'
moves3 = 'llluluuurdrdrdrd'
moves4 = 'lrlrlrlrddddddudududud'

for moves in [moves1, moves2, moves3, moves4]:
    print(is_circle(moves))

True
True
True
False


## TRIES

### Longest Common Prefix using Trie (see above for word- and char-based solutions)
* Insert all the words one by one in the trie
* Walk the trie by going deeper until we find a node having more than 1 child (branching) or 0 children (a string gets exhausted). This is because the chars (nodes in trie) which are present in the longest common prefix must be the single child of its parent, i.e. no branching in any of these nodes


Time c.: inserting all the words in the trie O(MN) time; performing a walk O(M) where N = # chars in strings & M = length of largest string  
Space c. O(26*M*N) ~ O(MN) to store all strings in trie

In [2]:
ALPHABET_SIZE = 26
indexs = 0
class TrieNode: 
     
    def __init__(self): 
        self.isLeaf = False
        self.children = [None]*ALPHABET_SIZE 

# if not present, insert the node in the Trie 
def insert(key, root): 
    pCrawl = root 
    for level in range(len(key)): 
        index = ord(key[level]) - ord('a') 
        if pCrawl.children[index] == None: 
            pCrawl.children[index] = TrieNode() 
        pCrawl = pCrawl.children[index] 
    pCrawl.isLeaf = True

# construct trie 
def constructTrie(arr, n, root): 
    for i in range(n): 
        insert(arr[i], root) 

# Counts and returns number of children of the node 
def countChildren(node): 
    count = 0
    for i in range(ALPHABET_SIZE): 
        if node.children[i] != None: 
            count +=1
            # Keeping track of diversion in the trie 
            global indexs 
            indexs = i 
    return count 
      
# Perform walk on trie and return longest common prefix  
def walkTrie(root): 
    pCrawl = root 
    prefix = "" 
    while(countChildren(pCrawl) == 1 and pCrawl.isLeaf == False): 
        pCrawl = pCrawl.children[indexs] 
        prefix += chr(97 + indexs) 
    return prefix or -1
  
# Function that returns longest common prefix  
def commonPrefix(arr, n, root): 
    constructTrie(arr, n, root) 
    return walkTrie(root) 
  
# Driver code to test the code 
n = 4
arr = ["geeksforgeeks", "geeks", "geek", "geezer"] 
root = TrieNode() 
print(commonPrefix(arr,n, root)) 

gee


# Appendix

## Other algorithms to find a substring in a string

In [None]:
# Knuth-Morris-Pratt Algorithm - returns first found match only
def kmp1(pattern, text):
    
    # preprocess pattern
    prefix_array = get_prefix_array(pattern)
    print('Pattern {}: array {}'.format(pattern, prefix_array))

    # iterate over text
    i, j = 0, 0                                                                # index in text, pattern
    while i < len(text):
        if pattern[j] == text[i]:
            if j == (len(pattern) - 1):
                return True
            j += 1
        
        elif j > 0:                                                            # if this is prefix in pattern, go back
            j = prefix_array[j - 1]
            continue
        i += 1
                
    return False


# calculate new idx we should go to if comparison fails
def get_prefix_array(pattern):
    
    prefix = [0]
    i = 0
    j = 1
    while j < len(pattern):                                 # Pattern AAAB: array [0, 1, 2, 0]
        if pattern[i] == pattern[j]:                        # i,j,[]:  0,1,[0]  1,2,[0,1]  2,3[0,1,2] 1,3[0,1,2]  0,4[0,1,2,0]
            i += 1
        elif i > 0:
            i = prefix[i-1]
            continue
        j += 1
        prefix.append(i)
    return prefix


# if __name__ == '__main__':

# Test 1)
pattern = "abc1abc12"
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
text2 = "alskfjaldsk23adsfabcabc"
print(kmp(pattern, text1))
print(kmp(pattern, text2))

# Test 2)
pattern = "ABABX"
text = "ABABZABABYABABX"
print(kmp(pattern, text))

# Test 3)
pattern = "AAAB"
text = "ABAAAAAB"
print(kmp(pattern, text))

# Test 4)
pattern = "abcdabcy"
text = "abcxabcdabxabcdabcdabcy"
print(kmp(pattern, text))

# Test 5)
pattern = "aaab"
get_prefix_array(pattern)

In [None]:
def string_matching_naive(text='', pattern=''):
    """Returns positions where pattern is found in text.

    We slide the string to match 'pattern' over the text

    O((n-m)m)
    Example: text = 'ababbababa', pattern = 'aba'
                     string_matching_naive(t, s) returns [0, 5, 7]
    @param text text to search inside
    @param pattern string to search for
    @return list containing offsets (shifts) where pattern is found inside text
    """

    n = len(text)
    m = len(pattern)
    offsets = []
    for i in range(n-m+1):
        if pattern == text[i:i+m]:
            offsets.append(i)

    return offsets


def string_matching_rabin_karp(text='', pattern='', hash_base=256):
    """Returns positions where pattern is found in text.

    worst case: O(nm)
    O(n+m) if the number of valid matches is small and the pattern is large.

    Performance: ord() is slow so we shouldn't use it here

    Example: text = 'ababbababa', pattern = 'aba'
             string_matching_rabin_karp(text, pattern) returns [0, 5, 7]
    @param text text to search inside
    @param pattern string to search for
    @param hash_base base to calculate the hash value
    @return list containing offsets (shifts) where pattern is found inside text
    """

    n = len(text)
    m = len(pattern)
    offsets = []
    htext = hash_value(text[:m], hash_base)
    hpattern = hash_value(pattern, hash_base)
    for i in range(n-m+1):
        if htext == hpattern:
            if text[i:i+m] == pattern:
                offsets.append(i)
        if i < n-m:
            htext = (hash_base *
                     (htext -
                      (ord(text[i]) *
                       (hash_base ** (m-1))))) + ord(text[i+m])

    return offsets


def string_matching_boyer_moore_horspool(text='', pattern=''):
    """Returns positions where pattern is found in text.

    O(n)
    Performance: ord() is slow so we shouldn't use it here

    Example: text = 'ababbababa', pattern = 'aba'
         string_matching_boyer_moore_horspool(text, pattern) returns [0, 5, 7]
    @param text text to search inside
    @param pattern string to search for
    @return list containing offsets (shifts) where pattern is found inside text
    """
    m = len(pattern)
    n = len(text)
    offsets = []
    if m > n:
        return offsets
    skip = []
    for k in range(256):
        skip.append(m)
    for k in range(m-1):
        skip[ord(pattern[k])] = m - k - 1
    skip = tuple(skip)
    k = m - 1
    while k < n:
        j = m - 1
        i = k
        while j >= 0 and text[i] == pattern[j]:
            j -= 1
            i -= 1
        if j == -1:
            offsets.append(i + 1)
        k += skip[ord(text[k])]

    return offsets

In [None]:
# The following program is the python implementation of
# Rabin Karp Algorithm

class RollingHash:
    def __init__(self, text, size_word):
        self.text = text
        self.hash = 0
        self.size_word = size_word

        for i in range(0, size_word):
            #ord maps the character to a number
            #subtract out the ASCII value of "a" to start the indexing at zero
            self.hash += (ord(self.text[i]) - ord("a")+1)*(26**(size_word - i -1))

        #start index of current window
        self.window_start = 0
        #end of index window
        self.window_end = size_word

    def move_window(self):
        if self.window_end <= len(self.text) - 1:
            #remove left letter from hash value
            self.hash -= (ord(self.text[self.window_start]) - ord("a")+1)*26**(self.size_word-1)
            self.hash *= 26
            self.hash += ord(self.text[self.window_end])- ord("a")+1
            self.window_start += 1
            self.window_end += 1

    def window_text(self):
        return self.text[self.window_start:self.window_end]

def rabin_karp(word, text):
    if word == "" or text == "":
        return None
    if len(word) > len(text):
        return None

    rolling_hash = RollingHash(text, len(word))
    word_hash = RollingHash(word, len(word))
    #word_hash.move_window()

    for i in range(len(text) - len(word) + 1):
        if rolling_hash.hash == word_hash.hash:
            if rolling_hash.window_text() == word:
                return i
        rolling_hash.move_window()
    return None

In [186]:
def rabin_karp(pattern, text):
    """

    The Rabin-Karp Algorithm for finding a pattern within a piece of text
    with complexity O(nm), most efficient when it is used with multiple patterns
    as it is able to check if any of a set of patterns match a section of text in o(1) given the precomputed hashes.

    This will be the simple version which only assumes one pattern is being searched for but it's not hard to modify

    1) Calculate pattern hash

    2) Step through the text one character at a time passing a window with the same length as the pattern
        calculating the hash of the text within the window compare it with the hash of the pattern. Only testing
        equality if the hashes match

    """
    p_len = len(pattern)
    p_hash = hash(pattern)

    for i in range(0, len(text) - (p_len - 1)):

        # written like this t
        text_hash = hash(text[i:i + p_len])
        if text_hash == p_hash and \
                text[i:i + p_len] == pattern:
            return True
    return False


if __name__ == '__main__':
    # Test 1)
    pattern = "abc1abc12"
    text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
    text2 = "alskfjaldsk23adsfabcabc"
    assert rabin_karp(pattern, text1) and not rabin_karp(pattern, text2)

    # Test 2)
    pattern = "ABABX"
    text = "ABABZABABYABABX"
    assert rabin_karp(pattern, text)

    # Test 3)
    pattern = "AAAB"
    text = "ABAAAAAB"
    assert rabin_karp(pattern, text)

    # Test 4)
    pattern = "abcdabcy"
    text = "abcxabcdabxabcdabcdabcy"
    assert rabin_karp(pattern, text)

## Word squares

In [None]:
# Given a set of words (without duplicates),
# find all word squares you can build from them.

# A sequence of words forms a valid word square
# if the kth row and column read the exact same string,
# where 0 ≤ k < max(numRows, numColumns).

# For example, the word sequence ["ball","area","lead","lady"] forms
# a word square because each word reads the same both horizontally
# and vertically.

# b a l l
# a r e a
# l e a d
# l a d y
# Note:
# There are at least 1 and at most 1000 words.
# All words will have the exact same length.
# Word length is at least 1 and at most 5.
# Each word contains only lowercase English alphabet a-z.

# Example 1:

# Input:
# ["area","lead","wall","lady","ball"]

# Output:
# [
  # [ "wall",
    # "area",
    # "lead",
    # "lady"
  # ],
  # [ "ball",
    # "area",
    # "lead",
    # "lady"
  # ]
# ]

# Explanation:
# The output consists of two word squares. The order of output does not matter
# (just the order of words in each word square matters).

import collections

def word_squares(words):
    n = len(words[0])
    fulls = collections.defaultdict(list)
    for word in words:
        for i in range(n):
            fulls[word[:i]].append(word)

    def build(square):
        if len(square) == n:
            squares.append(square)
            return
        prefix = ""
        for k in range(len(square)):
            prefix += square[k][len(square)]
        for word in fulls[prefix]:
            build(square + [word])
    squares = []
    for word in words:
        build([word])
    return squares