# String Problems : Striver a-z

## Strings Easy Problem

### Remove outermost Paranthesis

Example 1:

Input: s = "(()())(())"

Output: "()()()"

Explanation: 

The input string is "(()())(())", with primitive decomposition "(()())" + "(())".

After removing outer parentheses of each part, this is "()()" + "()" = "()()()".


Example 2:

Input: s = "(()())(())(()(()))"

Output: "()()()()(())"

Explanation: 
The input string is "(()())(())(()(()))", with primitive decomposition "(()())" + "(())" + "(()(()))".

After removing outer parentheses of each part, this is "()()" + "()" + "()(())" = "()()()()(())".
Example 3:

Input: s = "()()"

Output: ""

Explanation: 

The input string is "()()", with primitive decomposition "()" + "()".
After removing outer parentheses of each part, this is "" + "" = "".
 

In [10]:
def removeOuterParentheses(s: str) -> str:

    ### given a valid paranthesis string.
    ### when a stack is completely empty its a primite string
    ### TC: O(N)
    ### SC: O(1)

    open_case = 0
    start = 0
    ans = ""

    for i in range(len(s)):

        if s[i] == '(':
            open_case += 1
        elif s[i] == ')':
            open_case -= 1

        if open_case == 0:
            ans += s[start+1:i]
            start = i + 1

    return ans

print(removeOuterParentheses("(()())(())"))
print(removeOuterParentheses("(()())(())(()(()))"))

()()()
()()()()(())


## Reverse Words in a String

Problem Statement: Given a string s, reverse the words of the string.

Examples:

Example 1:

Input: s=”this is an amazing program”

Output: “program amazing an is this”

Example 2:

Input: s=”This is decent”

Output: “decent is This”

In [None]:
def reverseWords(s: str) -> str:

    s = s.strip()

    output = ""

    for i in s.split()[::-1]:
        output += i + " "

    return output.strip()

## Largest Odd Number in String

In [None]:
class Solution:
    def largestOddNumber(self, num: str) -> str:
        
        ### TLE : 161/196
        ### TC: O(N^2)
        ### SC: O(1)
        # n = len(num)
        # max_odd = float("-inf")

        # for i in range(n):
        #     substring = ""
        #     for j in range(i, n):

        #         substring += num[j]

        #         if int(substring) % 2 == 1:
        #             max_odd = max(max_odd, int(substring))
        
        # if max_odd == float('-inf'): return ""

        # return str(max_odd)

        ### solving using math property of odd numbers 
        ### a number if odd if the least signigicant digit is odd.
        ### eg. 4861 is odd, 51 is odd.

        ### problem is changed to get the index of right most odd number.

        ### TC: O(N)
        ### SC: O(1)

        n = len(num)
        idx = -1

        for i in range(n):

            if int(num[n-i-1]) % 2 == 1:
                return num[:n-i]
        
        return ""
            

### Longest Common Prefix, (LCP)

Write a function to find the longest common prefix string amongst an array of strings.

If there is no common prefix, return an empty string "".

Example 1:

Input: strs = ["flower","flow","flight"]

Output: "fl"

Example 2:

Input: strs = ["dog","racecar","car"]

Output: ""

Explanation: There is no common prefix among the input strings.

In [None]:
class Solution:
    def longestCommonPrefix(self, strs: List[str]) -> str:

        ### Solution 1
        ### get the smallest element, longest common prefix can be as long as the smallest string in strs
        ### TC : O(N) + O(k * N), k is len of smallest string.
        ### SC : O(k) -> to store the smallest element.

        mini = float("inf")
        smallest_str = ""

        ###O(N)
        for i in strs:

            if mini > len(i):
                mini = len(i)
                smallest_str = i

        ans = ""

        ## O(K)
        for idx, i in enumerate(smallest_str):

            flag = False

            ##(N)
            for j in strs:

                if j[idx] != i:
                    flag = True

            if flag:
                break

            ans += i

        # return ans

        ### Solution 2
        ### using stack/ horizontal scanning
        ### TC: O(S), where s is total number of characters.
        ### SC: O(1)

        def get_common_prefix(s1, s2):
            out = ""

            for i,j in zip(s1, s2):

                if i != j: break

                out += i

            return out


        # ### horizontal scanning
        while len(strs) > 1:

            s1 = strs.pop()
            s2 = strs.pop()

            prefix = get_common_prefix(s1, s2)

            strs.append(prefix)

        # return strs[0]

        ### Solution 3
        ### vertical scanning, check index 0 for all strings
        ### this is same as solution one but that is optimized to only search the smallest string.
        ### TC: O(S), S = m * n, m->length of string.
        ### SC: (1)
        for i in range(len(strs[0])):

            c = strs[0][i]

            for j in range(1, len(strs)):

                if i == len(strs[j]) or strs[j][i] != c:
                    return strs[0][:i]

        # return strs[0]

        #### divide and conquer
        ### TC: O(m * log(N))
        ### SC: O(mlog(N))
        def conquer(s1, s2):
            out = ""
            for i,j in zip(s1, s2):

                if i != j: break
                out += i

            return out

        def divide(l, h):

            if l == h: return strs[l]

            mid = (l + h) // 2

            lpart = divide(l, mid)
            rpart = divide(mid+1, h)

            return conquer(lpart, rpart)

        # return divide(0, len(strs)-1)

        ### binary search
        ### TC: O(S * log(m))
        ### SC: O(1)

        ###get the smallest str
        ####get the mini string in strs
        mini = float('inf')

        for i in strs:
            mini = min(len(i), mini)

        low = 1
        high = mini

        def is_common_prefix(mid):

            s = strs[0][0:mid+1]

            for i in strs[1:]:

                if not i.startswith(s):
                    return False

            return True


        while low <= high:

            mid = (low + high) // 2

            ###do vertical scan and check if 0 : mid can be a common prefix
            if is_common_prefix(mid):
                ### we need to longest to increase the length of prefix.
                low = mid + 1
            else:
                high = mid - 1

        # print(mid, low, high,  (low + high) // 2 + 1)
        # return strs[0][0 : (low + high) // 2 + 1]

        ### >>> leetcode solution !!!!

        ### sort lexicography.
        ### TC: (nlog(n))
        ### SC: O(1)

        strs.sort()

        ###
        s1 = strs[0]
        s2 = strs[-1]

        prefix = ""
        for i, j in zip(s1, s2):

            if i != j: break

            prefix += i

        return prefix


### Isomorphic Strings

In [11]:
def areIsomorphic(str1: str, str2: str) -> bool:

    if len(str1) != len(str2): return False

    ### TC: O(N)
    ### SC: O(N)
    mp = {}
    mp2 = {}

    for i, j in zip(str1, str2):

        if i not in mp and j not in mp2:
            mp[i] = j
            mp2[j] = i
        elif mp.get(i) != j or mp2.get(j) != i:
            return False

    return True


### Check If One String Is A Rotation Of Another String

In [None]:
def isCyclicRotation(p: str, q: str) -> int:
    if p == q: return 1

    # n = len(p)

    # for i in range(n):

        # p = p[-1] + p[:-1]

        # if p == q: return 1

    p = p + p

    if q in p:
        return 1

    return 0


### Check if two Strings are anagrams of each other

Problem Statement: Given two strings, check if two strings are anagrams of each other or not.

Examples:

Example 1:

Input: CAT, ACT

Output: true

Explanation: Since the count of every letter of both strings are equal.

Example 2:

Input: RULES, LESRT 

Output: false

Explanation: Since the count of U and T  is not equal in both strings.

### good solutions

1. https://takeuforward.org/data-structure/check-if-two-strings-are-anagrams-of-each-other/

In [None]:
def isAnagram(str1, str2) :
	
	if len(str1) != len(str2): return True


	### TC: 2 * O(N) * log(N) (for inserting)
	### SC: 2 * O(N)

	# str_dict_1 = {}
	# str_dict_2 = {}

	# for i in str1:
	# 	str_dict_1[i] = str_dict_1.get(i, 0) + 1
	
	# for j in str2:
	# 	str_dict_2[j] = str_dict_2.get(j, 0) + 1
	
	# # print(str_dict_1, str_dict_2)
	# return str_dict_1 == str_dict_2

	###sort and compare 
	### TC: 2 * O(NlogN)
	### SC: O(1)

	str1 = sorted(str1)
	str2 = sorted(str2)

	return str1 == str2



## Strings Medium Problems

### Sort Characters by frequency

1. https://www.codingninjas.com/studio/problems/sorting-characters-by-frequency_1263699?utm_source=striver&utm_medium=website&utm_campaign=a_zcoursetuf
2. https://leetcode.com/problems/sort-characters-by-frequency/

In [1]:
def sortByFrequency(n: int, s: str) -> str:
    

    ### TC : N * log(N)
    ### SC : O(N)
    
    freq = {}

    for i in s:

        freq[i] = freq.get(i, 0) + 1
    
    ls = sorted(freq, key=lambda x : freq[x])

    out = ""

    for i in ls:
        out += i * freq[i]
    
    return out

## String 2 Roman

In [12]:
def romanToInt(s:str) -> int:

    ### X I -> 11
    ### I X -> 9
    ### if i + 1 > i.  I + X => 10 - 1 => 9
    ### if i + 1 < i.  X I => 10 + 1 => 11

    roman_dict = {'I': 1, 'V': 5, 'X': 10,
                  'L': 50, 'C': 100, 'D': 500,
                  'M': 1000}

    roman = 0
    n = len(s)
    i = 0

    for i in range(n):

        if i + 1 < n and roman_dict[s[i+1]] > roman_dict[s[i]]:
            roman -= roman_dict[s[i]]
        else:
            roman += roman_dict[s[i]]

        i += 1

    return roman


In [13]:
print(romanToInt('XII'))
print(romanToInt('XC'))
print(romanToInt('IX'))
print(romanToInt('XIX'))

12
90
9
19


## ATIO

In [19]:
from typing import Optional

def createAtoi(s: str) -> int:

    ###discard any leading whitespaces
    s = s.strip()

    ###check for a sign character if any.
    sign = 1

    if s[0] == '-':
        sign = -1
        s = s[1:]
    elif s[0] == '+':
        sign = 1
        s = s[1:]

    digit = 0

    for i in s:

        ###ignore non-digit
        if not (ord('0') <= ord(i) <= ord('9')): break

        ###leading zeros
        if i == '0' and digit == 0: continue

        digit = digit * 10 + int(i)

    # print(digit)

    ###adjust the sign
    digit *= sign

    ###constraint the range to [-2^31, 2^31-1]

    if digit > 2 ** 31 - 1:
        digit = 2 ** 31 - 1
    elif digit < -2 ** 31:
        digit = -2 ** 31

    return digit





In [20]:
print(createAtoi('-23'))
print(createAtoi('-ban23'))

-23
0


## Count With K Different Characters

Problem Statement

You are given a string 'str' of lowercase alphabets and an integer 'k'.

Your task is to return the count all the possible substrings that have exactly 'k' distinct characters.

For example:

str = 'abcad' and 'k' = 2.

we can see that the substrings {ab, bc, ca, da} are the only substrings with distinct characters.

Therefore, the answer will be 4.

In [None]:
from typing import *

def countSubStrings(s: str, k: int) -> int:
    
    ###brute force, create all possiable substrings 

    ### Time complixity : O(N^3)
    ### Space complixity : O(1)
    # n = len(s)
    # count = 0

    # for i in range(n):
    #     item = s[i]
    #     for j in range(i+1,n):
    #         item += s[j]
    #         if len(set(item)) == k:
    #             count += 1

    # ###also need to add len of string in case k == 1
    # if k == 1:
    #     count += n

    # return count

    ### Reduce the approach to O(N^2) by making the inner checking == O(1) by using more space.
    ### can use hashmap of type dict or list 

    ###Time Complexity: O(N^2)
    ###Space Complexity: O(26) -> O(1)
    
    n = len(s)
    count = 0

    for i in range(n):
        
        ### for each subarray needs a new hashmap
        hashmap = [0] * 26
        unique_freq = 0

        for j in range(i, n):

            ###calcualte idx, 97 is acii for a
            idx = 97 - ord(s[j])

            ###now use the hashmap to think of 
            if hashmap[idx] == 0:
                unique_freq += 1
            
            hashmap[idx] += 1

            if unique_freq == k:
                count += 1
            elif unique_freq > k:
                break
    
    return count

## Longest Palindromic Substring[Do it without dynamic programming]

In [None]:
class Solution:
    def longestPalindrome(self, s: str) -> str:
        
        ###brute force approach is to calculate all the substrings
        ###then check if substring is palindrome
        ###if yes then calculate the length.
        ###if length is greater then max lenght update max_length and answer.
        
        ###Time Complexity: O(N^3)
        ###SC: O(1)
        
        # def is_palindrome(x):

        #     return x == x[::-1]

        # ans = ""
        # max_length = float("-inf")
        # n = len(s)

        # for i in range(n):

        #     for j in range(i, n):

        #         sub_string = s[i:j+1]

        #         ###check if palindrome
        #         if is_palindrome(sub_string):

        #             # length = len(sub_string)
        #             length = j - i + 1

        #             if length > max_length:
        #                 ans = sub_string
        #                 max_length = length
        
        # return ans

        ###TC: O(N * 2N) -> (N^2)
        ###SC: O(1)

        # def expand(left: int, right: int, n: int, s: str)-> List[int]:
            
        #     start = 1
        #     end = -2

        #     while left >= 0 and right < n:
                
        #         if s[left] != s[right]:
        #                 return start, end
                
        #         start = left
        #         end = right

        #         left -= 1
        #         right += 1
            
        #     return start, end


        # ## Expand from the center 
        # n = len(s)
        # ans = ""
        # max_length = float("-inf")    

        # for center in range(n):

        #     ###check once for add 
        #     odd_start, odd_end = expand(center, center, n, s)
        #     ###check once for even    
        #     even_start, even_end = expand(center, center+1, n , s)

        #     odd_ans = odd_end - odd_start + 1
        #     even_ans = even_end - even_start + 1

        #     if odd_ans > even_ans:
        #         if max_length < odd_ans:
        #             max_length = odd_ans
        #             ans = s[odd_start: odd_end+1]
        #     else:
        #         if max_length < even_ans:
        #             max_length = even_ans
        #             ans = s[even_start: even_end+1]
            
        
        # return ans

        '''
        Given nnn as the length of s,

        Time complexity: O(n2)O(n^2)O(n 
        2
        )

        There are 2n−1=O(n)2n - 1 = O(n)2n−1=O(n) centers. For each center, we call expand, which costs up to O(n)O(n)O(n).

        Although the time complexity is the same as in the DP approach, the average/practical runtime of the algorithm is much faster. This is because most centers will not produce long palindromes, so most of the O(n)O(n)O(n) calls to expand will cost far less than nnn iterations.

        The worst case scenario is when every character in the string is the same.

        Space complexity: O(1)O(1)O(1)

        We don't use any extra space other than a few integers. This is a big improvement on the DP approach.
        '''

        '''
        Solving using DP

        if start == end: then move to start+1, end-1

        if start != end: 
                either start, end-1
                or 
                either start+1, end


        will also need to calculate the length.
        '''

        ###recursive.
        # def worker(start, end, length, s):

        #     ###base condition
        #     if start > end:
        #         return length

        #     if s[start] == s[end]:
        #         return worker(start+1, end-1, length+2, s)
        #     else:
        #         return max(worker(start+1, end, 0, s), worker(start, end-1, 0, s))


        # return worker(0, len(s)-1, 0, s)-1

## Sum of Beauty of all substring

In [25]:
from typing import *

def sumOfBeauty(s : str) -> int:

    n = len(s)

    result = 0

    for i in range(n):

        hashmap = {}

        for j in range(i, n):

            hashmap[s[j]] = hashmap.get(s[j], 0) + 1

            ###find max and min
            maxi = float('-inf')
            mini = float('inf')

            for k in hashmap:

                maxi = max(maxi, hashmap[k])
                mini = min(mini, hashmap[k])

            result += abs(maxi - mini)

            print(hashmap, maxi, mini)

    return result

In [26]:
print(sumOfBeauty('hello'))

{'h': 1} 1 1
{'h': 1, 'e': 1} 1 1
{'h': 1, 'e': 1, 'l': 1} 1 1
{'h': 1, 'e': 1, 'l': 2} 2 1
{'h': 1, 'e': 1, 'l': 2, 'o': 1} 2 1
{'e': 1} 1 1
{'e': 1, 'l': 1} 1 1
{'e': 1, 'l': 2} 2 1
{'e': 1, 'l': 2, 'o': 1} 2 1
{'l': 1} 1 1
{'l': 2} 2 2
{'l': 2, 'o': 1} 2 1
{'l': 1} 1 1
{'l': 1, 'o': 1} 1 1
{'o': 1} 1 1
5


In [33]:
from typing import *

def sumOfBeauty(s : str) -> int:

    ### TC : O(26 * N^2) ~ O(N^2)
    ### SC : O(26) ~ O(1)

    n = len(s)

    result = 0

    for i in range(n):

        # hashmap = {}
        hashmap = [0] * 26

        for j in range(i, n):

            # hashmap[s[j]] = hashmap.get(s[j], 0) + 1
            hashmap[ord(s[j]) - ord('a')] += 1

            ###find max and min
            maxi = float('-inf')
            mini = float('inf')

            ### O(26)
            for k in hashmap:
                if k == 0: continue
                maxi = max(maxi, k)
                mini = min(mini, k)

            result += abs(maxi - mini)

    return result




In [35]:
print(sumOfBeauty('hello'))

5


## Reverse Every Word in A String

In [36]:
def reverseString(s:str) -> str:
    
    ### TC : O(N), N is the number of words.

    s = s.strip()

    output = ""

    for i in s.split()[::-1]:

        output += i + " "
    
    return output.strip()

reverseString('The example word')

'word example The'

### Maximum Nesting Depth of Paranthesi…

Example 1:

Input: s = "(1+(2*3)+((8)/4))+1"

Output: 3

Explanation: Digit 8 is inside of 3 nested parentheses in the string.

Example 2:

Input: s = "(1)+((2))+(((3)))"

Output: 3

In [37]:
def maxDepth(s: str) -> int:

        max_count = 0
        open_count = 0

        for i in s:

            if i == '(':
                open_count += 1
                max_count = max(max_count, open_count)
            elif i == ')':
                open_count -= 1

        return max_count

print(maxDepth("(1+(2*3)+((8)/4))+1"))
print(maxDepth("(1)+((2))+(((3)))"))

3
3


# String Hard Problems

## Count and Say


In [63]:
def countAndSay(n: int) -> str:

    def say(x):

        ###using two pointer
        i = 0
        j = i
        n = len(x)
        out = ""

        while j < n:

            while j < n and x[i] == x[j]:
                j += 1

            count = j - i
            out += f"{count}{x[i]}"
            i = j

        return out


    def worker(n):

        if n == 1: return "1"

        x = worker(n-1)

        print(x)

        return say(x)

    return worker(n)

# print(say("1"))
# print(say("2"))
# print(say("11"))
# print(say("1121")) ### expected 211211

In [64]:
print(countAndSay(4))

1
11
21
1211


In [65]:
print(countAndSay(5))

1
11
21
1211
111221


## First Occurence of a Pattern in a Text

In [42]:
def firstOccurence(text:str, pat: str) -> int:

    # ### TC: O(N)
    # try:
    #     return text.index(pat)
    # except:
    #     return -1

    ### greedy approach

    ###TC : O(N^M)
    ###SC: O(1)

    # n = len(text)
    # for i in range(n):

    #     ###check if all chars match.
    #     ans = i
    #     flag = True
    #     for j in pat:
    #         if i < n and j == text[i]:
    #             i += 1
    #         else:
    #             flag = False
    #             break

    #     ### found first occurance of complete string pat.
    #     if flag:
    #         return ans
    # return -1

    ###can use Rabin-Karp to make the TC: O(N)



print(firstOccurence("abcdecd", "cd"))
print(firstOccurence("codingninjas", "ninja"))
print(firstOccurence("abcde", "xyz"))

2
6
-1


# Pattern Searching Section Problems

https://www.geeksforgeeks.org/algorithms-gq/pattern-searching/?ref=lbp


# Advantages of string searching algorithms
1. Useful when performing a search in a database.
2. Useful for finding patterns in substrings of larger strings.


# Different Methods
1. Naive approach that checks, all characters of the main string to the pattern. TC: O(N*M), SC: O(1).
2. Rabin-karp, uses rolling hash technique to compare substring of text T with patter P in O(1), TC: O(N+M), O(N*M) -> worst case, SC: O(1).
3. Z Algorithm


### Naive approach

In [44]:
txt = "AABAACAADAABAAABAA"
pat = "AABA"

M = len(pat)
N = len(txt)

##

for i in range(N-M):
    k = -1
    for j in range(M):

        if pat[j] != txt[i+j]: break
        k = j+1

    if k == len(pat):
        print(f'Pattern found at index {i}')


Pattern found at index 0
Pattern found at index 9
Pattern found at index 13


# Rabin-Karp Algorithm for pattern matching.

Given a text T[0. . .n-1] and a pattern P[0. . .m-1], write a function search(char P[], char T[]) that prints all occurrences of P[] present in T[] using Rabin Karp algorithm. You may assume that n > m.

Like the Naive Algorithm, the Rabin-Karp algorithm also check every substring. But unlike the Naive algorithm, the Rabin Karp algorithm matches the hash value of the pattern with the hash value of the current substring of text, and if the hash values match then only it starts matching individual characters. So Rabin Karp algorithm needs to calculate hash values for the following strings.

Pattern itself
All the substrings of the text of length m which is the size of pattern.

### Algorithm Steps
1. Need to calculate the hash values of substring of T with length m. (using rolling hashing)
2. Need to calculate the hash value of pattern P.
3. Steps to calculate the hash value.
    1. choose a prime number p for modulo, this ensures good distribution.
    2. choose another prime number as b for base, often size of the character set.
    3. hash function : c * (b^(pattern_len-i-1))%p where c is char at index i.
    4. Using formula calculate the hash value for entire pattern P.
    5. For text T, calculate for each substring of length P, update the hash value for each substring. using formula : hash = (hash - (text[i-pattern_length] * (b ^ (pattern_length-1))%p) * b + text[i]
    6. Compare the hash values, if there a match then check char-by-char.

# Rabin-Karp Algorithm for pattern matching.

### Formula for hash function

1. assume the ascii for a = 1, b = 2, c = 3, d = 4
2. let base be 10 (can be any value)
3. let the mod value is max_int
4. assume the length of the window be 4
5. let the string be "abcd"
6. Mod is add to avoid overflow.
7. Since we start from 0 the max power can be string length or window size - 1

    ```hash_value = (1 * base ^ 3 + 2 * base ^ 2 + 3 * base ^ 1 + 4 * base ^ 0) % MOD```

8. To update the hash_value eg. we want to add e with a ascii value of 5.

    ```hash_value_new = (p * (hash_value - 1 * base ^ 3)  + 5 * base ^ 0) % MOD ```

9. If hash_value is -ve, then we need

    ```hash_value = hash_value + MOD```


### Resources
1. https://www.geeksforgeeks.org/rabin-karp-algorithm-for-pattern-searching/?ref=lbp
2. https://www.programiz.com/dsa/rabin-karp-algorithm
3. https://www.bing.com/videos/riverview/relatedvideo?&q=rabin-karp+algo+solution&&mid=351EA31D1ABC049BB834351EA31D1ABC049BB834&&FORM=VRDGAR


In [38]:
str = "abc"
hash_code = 0
base = 10
MOD = 1000000
for i in str:
    hash_code = (base *  hash_code  + ord(i)) % MOD

hash_code

10779

In [39]:
hash_code = 0
for i in range(3):
    val = ord(str[i]) * (base ** (3-i-1))
    print(val)
    hash_code += val

hash_code

9700
980
99


10779

In [41]:
hash_code = 0
for i in str:
    print(hash_code)
    hash_code = base * hash_code + ord(i)
    print(hash_code)

    print('---')

0
97
---
97
1068
---
1068
10779
---


In [45]:
def rabin_karp_1(text, pattern):

    ### TC: ~O(N^2) or O(N*M) solution as hash() function takes O(n) tc.
    ### SC: O(1)

    ###using builtin hash()

    n = len(text)
    m = len(pattern)

    pattern_hash = hash(pattern)

    for i in range(0, N-M+1):

        if hash(text[i: i + M]) == pattern_hash:
            if text[i:i+M] == pattern:
                print('Match Found at Index ', i)

txt = "AABAACAADAABAAABAA"
pat = "AABA"

rabin_karp_1(txt, pat)

Match Found at Index  0
Match Found at Index  9
Match Found at Index  13


In [56]:
def rabin_karp(text, pattern):

    ### TC: O(N + M) (best, avg), (N*M) worst case, O(N*M) when there are spurious hits. (false positives, collusion)
    ### SC: O(1)

    N = len(text)
    M = len(pattern)
    p = 31
    mod = 1e9+7
    pattern_hash = 0
    text_hash = 0

    print(M , N)

    pow = p ** (M - 1)

    for i in range(M):

        print(i)
        print(pattern[i])

        pattern_hash = (p * pattern_hash + ord(pattern[i]))%mod
        text_hash = (p * text_hash + ord(text[i])) % mod

    for i in range(N-M+1):

        if pattern_hash == text_hash:
            if text[i:i+M] == pattern:
                print('pattern found at index : ', i)

        if i < N - M:
            # print(i)
            ###update the hash
            text_hash = (p * (text_hash - ord(text[i]) * pow) + ord(text[i+M]))%mod

            ###how update works
            '''
            >>> rolling hash
            1. multiply by p, needed eg. if hash was [1*p^3 + ... 1*p^1]. since we remove 1 * p ^ 3, the highest pow of p will now be p ^ 2, to make it p ^ 3 multiple my p.
            2. need to remove first/trailing character done by subtracting ord(text[i]) * pow
            3. why multiply by power??
                -> assume the hash is [1*p^3 + ... 1*p^1].
                -> to remove 1 * p ^ 3, we have 1 as ord(text[i]), we need to compute p ^ 3 and store it somewhere. p ^ 3 is simply p ^ (m - 1)
            '''

            if text_hash < 0:
                text_hash += mod

txt = "AABAACAADAABAAABAA"
pat = "AABA"

txt = "aacc"
pat = "acc"

rabin_karp(txt, pat)

3 4
0
a
1
c
2
c
pattern found at index :  1


## z algorithm

#### Working
- works in linear time, TC (m + n).
- Simple to understand than KMP.
- Create a Z array, array of substring lengths
    1. To calculate the value z[i].
    2. check prefix from 0 to i, starting character t[i]
    3. Using Brute force approach it takes O(N^2)
- concatenate pattern and text. P$T -> $ should not be present in P or T.
- if any index of z-array, z[i] == len(P) then there is a match.

In [15]:
str = "aabzaabzcaabzaabza"
z = [None] * len(str)
z[0] = 'X'

### TC: O(N^2)
### SC: O(N)
for i in range(1, len(str)):
    idx = i
    prefix_length = 0
    for j in range(idx):
        if i < len(str) and str[i] == str[j]:
            prefix_length += 1
            i += 1
        else:
            break
    z[idx] = prefix_length

print(z)


['X', 1, 0, 0, 4, 1, 0, 0, 0, 8, 1, 0, 0, 5, 1, 0, 0, 1]


In [27]:
### concat pattern and text
pattern = "GEEK"
text = "GEEKS FOR GEEKS"
concat = pattern + "$" + text

print("Concat : ", concat)

z = [0] * len(concat)

### TC: O(N^2)
### SC: O(N)
for i in range(1, len(str)):
    idx = i
    prefix_length = 0
    for j in range(idx):
        if i < len(concat) and concat[i] == concat[j]:
            prefix_length += 1
            i += 1
        else:
            break
    z[idx] = prefix_length

# print(z)
# print([i for i in concat])
# for i,j in zip(z, [i for i in concat]):
    # print(j, i)

for i in range(len(z)):

    if z[i] == len(pattern):
        print(f'Match found at {i - len(pattern) - 1}')

Concat :  GEEK$GEEKS FOR GEEKS
Match found at 0
Match found at 10


### computing z array in O(N) time.

In [35]:
####there are 3 cases
'''
1. Maintain a L,R pointer from 0, 0. If i > R. The do naive matching to calculate the value of z[i], and set new values of L,R.
2. If i <= R then, k = i - L
    1. If z[k] < R-i+1 do z[i] = z[k]
    2. If z[k] >= R-i+1, possible to extend L.
        1. set L as i and start matching from str[R] onwards update the new interval of z.
'''

def calculate_z_array(pattern):

    ### TC: O(N+M) size of pattern is N+M
    ### SC: O(N+M)
    left = 0
    right = 0
    n = len(pattern)

    ###z-array
    z = [0] * len(pattern)

    for i in range(1, n):

        ###case 1
        if i > right:
            match_counter = 0

            while i + match_counter < n and pattern[match_counter] == pattern[i+match_counter]:
                match_counter += 1

            z[i] = match_counter

            if match_counter > 0:
                left = i
                right = i + match_counter - 1
        else:
            ### case 2
            k = i - left

            if k < right - i + 1:
                z[i] = z[k]
            else:
                left = i
                while right < n and pattern[right] == pattern[right-left]:
                    right += 1

                z[i] =  right - left
                right -= 1
    return z


def z_algo(text, pattern):

    ### TC: O(N+M), avg, worst
    ### SC: O(N+M)

    ###concat_str
    concat_str = pattern + "$" + text

    ###calculate z-array
    z = calculate_z_array(concat_str)

    for i in range(len(z)):

        if z[i] == len(pattern):
            print('Match found at :', i - len(pattern) - 1)

# pattern = "GEEK"
# text = "GEEKS FOR GEEKS"

pattern = "aabza"
text = "aabzaabzcaabzaabza"

z_algo(text, pattern)

Match found at : 0
Match found at : 9
Match found at : 13


## KMP algorithm for pattern searching

1. The KMP matching algorithm uses degenerating property, (pattern having the same sub-patterns appearing more than once in the pattern).
2. Worst case time complexity is O(N+M)
3. Need preprocessing: Need to prepare an array lps, that tells us the count of characters to be skipped.

In [9]:
def calculate_lps(pattern):

    n = len(pattern)
    lps = [0] * n
    prev_len = 0
    i = 1
    while i < n:

        if pattern[i] == pattern[prev_len]:
            prev_len += 1
            lps[i] = prev_len
            i += 1
        elif prev_len == 0:
            lps[i] = 0
            i += 1
        else:
            prev_len = lps[prev_len-1]

    return lps


# str = "AAAACAAA" # 0 1 2 3 0 1 2 3
# calculate_lps(str)


def kmp(text, pattern):

    lps = calculate_lps(pattern)
    n = len(text)
    m = len(pattern)
    i = 0
    j = 0

    while i < n:

        if j < m and text[i] == pattern[j]:
            i += 1
            j += 1
        else:
            if j == 0:
                i += 1
            else:
                j = lps[j-1]

        if j == m:
            print('match found at index ', i - m)
            # j = 0
    return -1
# pattern = "GEEK"
# text = "GEEKS FOR GEEKS"

pattern = "aabza"
text = "aabzaabzcaabzaabza"

kmp(text, pattern)

match found at index  0
match found at index  9
match found at index  13


# Repeated String Match

Given two strings a and b, return the minimum number of times you should repeat string a so that string b is a substring of it. If it is impossible for b​​​​​​ to be a substring of a after repeating it, return -1.

Notice: string "abc" repeated 0 times is "", repeated 1 time is "abc" and repeated 2 times is "abcabc".

 

Example 1:

Input: a = "abcd", b = "cdabcdab"

Output: 3

Explanation: We return 3 because by repeating a three times "abcdabcdabcd", b is a substring of it.

Example 2:

Input: a = "a", b = "aa"

Output: 2
 

In [None]:
class Solution:

    def naive_search(self, a, b):
        N = len(a)
        M = len(b)
        for i in range(N-M+1):
            k = -1
            for j in range(M):
                if b[j] != a[i+j]: break
                k = j+1

            if k == M: return True

        return False

    def rabin_karp(self, a, b):
        ''' a - text
            b - pattern to be searched
        '''
        N = len(a)
        M = len(b)

        base = 10
        mod = 10 ** 9 + 7 ###to avoid overflow, and prime for distribition. (no much collisions maybe)

        pow = base ** (M - 1)

        a_hash = 0
        b_hash = 0

        for i in range(M):
            a_hash = (base * a_hash + ord(a[i])) % mod
            b_hash = (base * b_hash + ord(b[i])) % mod

        for i in range(N-M+1):

            ###compare hash
            if a_hash == b_hash:
                if a[i: i + M] == b: return True
            ###update hash
            if i < N - M:

                a_hash = (base * (a_hash - ord(a[i]) * pow) % mod + ord(a[i+M])) % mod

                if a_hash < 0:
                    a_hash += base

        return False

    def repeatedStringMatch(self, a: str, b: str) -> int:

        ###alreay a substring
        if a == b: return 1

        ans = 1
        tmp = a
        while len(a) < len(b):
            a += tmp
            ans += 1

        if a == b: return ans

        # if self.naive_search(a, b):
        if self.rabin_karp(a, b):
            # print('in here')
            return ans

        a += tmp
        ans += 1

        # if self.naive_search(a, b):
        if self.rabin_karp(a, b):
            return ans

        return 1

## Shortest palindrome

https://leetcode.com/problems/shortest-palindrome/description/

In [None]:
class Solution:
    def shortestPalindrome(self, s: str) -> str:
        '''
        Greedy Approach
        1. Find the largest palindrome substring/prefix from the start.
        2. the remaining characters (not part of palandrome prefix) need to be added in reverse order at the begnning.

        Dry Run
        s1 -> aabbaa  rev -> aabbaa

        when i = 0
        s[0:n-0] => s[0:n] => aabbaa
        rev[0:] = aabbaa

        s1 ->aabbaax rev -> xaabbaa

        when i = 0
        s[0:n-0] => aabbaax
        rev[0:] => xaabbaa

        not same

        when i = 1
        s[0:n-1] => aabbaa
        rev[1:] => aabbaa

        equal

        hence remaining characters are rev[:i]

        output = rev[:i] + s

        '''

        ### TC: O(N^2)
        ### SC: O(N)

        rev = s[::-1]
        n = len(s)

        for i in range(n):

            if s[0:n-i] == rev[i:]:
                return rev[:i] + s

        return ""

        ### approach 2: using two pointers and recursion (complexity is similar dont need. optional)
        ### Using KMP
        rev = s[::-1]
        s_new = s + "#" + rev
        n_new = len(s_new)
        n = len(s)

        i = 1
        prev_len = 0
        lps = [0] * n_new

        while i < n_new:

            if s_new[i] == s_new[prev_len]:
                prev_len += 1
                lps[i] = prev_len
                i += 1
            elif prev_len == 0:
                lps[i] = 0
                i += 1
            else:
                prev_len = lps[prev_len - 1]

        return rev[:n - lps[n_new-1]] + s

## Longest Prefix Which is Suffix

In [None]:
def longestPrefixSuffix(s: str) -> str:

        ### TC: O(N)
        ### SC: O(N)

        n = len(s)
        lps = [0] * n
        prev_len = 0
        i = 1

        while i < n:

            if s[i] == s[prev_len]:
                prev_len += 1
                lps[i] = prev_len
                i += 1
            elif prev_len == 0:
                lps[i] = 0
                i += 1
            else:
                prev_len = lps[prev_len-1]

        return s[:prev_len]

### Minimum Add to make Parentheses Valid

```here the operation is to add extra brackets.```

A parentheses string is valid if and only if:

It is the empty string,
It can be written as AB (A concatenated with B), where A and B are valid strings, or
It can be written as (A), where A is a valid string.
You are given a parentheses string s. In one move, you can insert a parenthesis at any position of the string.

For example, if s = "()))", you can insert an opening parenthesis to be "(()))" or a closing parenthesis to be "())))".
Return the minimum number of moves required to make s valid

In [None]:
def minAddToMakeValid(s: str) -> int:

    stack = []

    ##get unclosed brackets

    for i in s:

        if i == '(':
            stack.append(i)
        else:
            if stack and stack[-1] == '(':
                stack.pop()
            else:
                stack.append(i)

    return len(stack)

### Minimum Cost To Make String Valid

```here the operation is to convert a closing -> opeaning or vice-versa.```

In [None]:
# import java.util.Stack;
# public class Solution {
#     public static int findMinimumCost(String str) {
# 		// Write your code here
#         // minimum cost is = len of stack 
#         /*
#         {{  }}
#         1. # of closing and opeaning brackets need to be equal
#         Hence S needs to be even 
        
#         1. If str is odd then return -1 
#         2. if str[0] == '}' and stack is empty return -1
#         */
#         Stack<Character> s = new Stack<Character>();
#         int size = 0;
#         int len = str.length();
        
#         if(len % 2 == 1)
#         {
#             return -1;
#         }
            
#         for(int i = 0;i < len; i++)
#         {
          
            
# //             if (str.charAt(i) == '}' && !s.empty() && s.peek() == '{'){
# //                 s.pop();
# //             }
# //             else{
# //                   s.push(str.charAt(i));
# //             }
            
#             if (str.charAt(i) == '{')
#             {
#                 s.push('{');
#             }          
#             else
#             {
#                 if (str.charAt(i) == '}' && !s.empty() && s.peek() == '{')
#                 {
#                     s.pop();
#                 }
#                 else
#                 {
#                     s.push('}');
#                 }
#             }
#         }
        
#         int a = 0,b = 0;
#         while(!s.empty())
#         {
#             char c = s.pop();
                
#             if(c == '}'){
#                 a ++;
#             }
#             else{
#                 b ++;
#             }
#         }
        
#         return (a + 1) / 2 + (b + 1) / 2;
#     }
# }