### Popular String Methods or functions related to strings:

- `join()` Converts the elements of an iterable into a string
- `isalnum()`	Returns True if all characters in the string are alphanumeric
- `isalpha()`	Returns True if all characters in the string are in the alphabet
- `ord()` function returns an integer representing the Unicode character
- `chr()` method returns a character (a string) from an integer (represents a unicode POV of a character)
- `capitalize()`  method convert all chars in the string to sentence case
- `upper()` method converts all lowercase characters in a string into uppercase characters and returns it
- `lower()` method converts all uppercase characters in a string into lowercase characters and returns it
- `string[::-1]` method will reverse the string
- `split("x")` method splits the string into 
- `zfill(width)` ensures a string has at least width characters, filling with 0 if needed.

In [1]:
string = "Check me Again"
print(string)

Check me Again


In [2]:
listOfChar = sorted(string.lower())
print(listOfChar)

[' ', ' ', 'a', 'a', 'c', 'c', 'e', 'e', 'g', 'h', 'i', 'k', 'm', 'n']


In [3]:
sortedString = ''.join(listOfChar)
print(sortedString)

  aacceeghikmn


In [4]:
string.split(" ")

['Check', 'me', 'Again']

### Properties:

- Small set of characters (bringing structure to the problem making it amenable to space complexity solutions)
- Contiguous integer values for 'a' to 'z' and 'A' to 'Z' in both ASCII (C++) and UTF-16 (Java)
- ASCII only english, so UTF-16 was created to include other characters
- Since we can convert characters to integers, we can apply traditional array algorithms to many string problems!

In [5]:
print(ord("a"), ord("b"), ord("c"), ord("d"))

97 98 99 100


In [6]:
print(chr(97), chr(98), chr(99), chr(100))

a b c d


### Question-1 (Frequencies of Characters):

- All characters in sorted order and their frequencies in $\mathcal{O}(1)$ time
- A simple frequency counter for each character in string that respects the **order** of each occurred character.

In [7]:
string = "aailovethecountryukraine"
count = [0]*26
for index in range(len(string)):
    count[ord(string[index]) - ord("a")] += 1
    
print(count)    

for index in range(len(count)):
    if count[index]>0:
        print(chr(index + ord("a")), count[index])

[3, 0, 1, 0, 3, 0, 0, 1, 2, 0, 1, 1, 0, 2, 2, 0, 0, 2, 0, 2, 2, 1, 0, 0, 1, 0]
a 3
c 1
e 3
h 1
i 2
k 1
l 1
n 2
o 2
r 2
t 2
u 2
v 1
y 1


### Question-2 (Palindrome Check):

- I/P : "ABCDCBA"
- O/P : True

    
- I/P : "ABBA"
- O/P : True
    
    
- I/P : "russia"
- O/P : False


- I/P : "ukraine"
- O/P : False

In [8]:
# O(n) time, O(1) space!
def palindrome(string):
    left, right = 0, len(string)-1
    while left<right:
        if string[left]!=string[right]:
            return False 
        left  += 1
        right -= 1
    return True
        
string = "ABCDCBA"       
print(palindrome(string))   

string = "ABBA"
print(palindrome(string))   

string = "ukraine"
print(palindrome(string))  

string = "russia"
print(palindrome(string))  

True
True
False
False


### Question-3 (Check if a string is a Subsequence of the other):

- I/P : s1 = "ABCD", s2 = "AD"
- O/P : True

    
- I/P : s1 = "ABCDE", s2 = "AED"
- O/P : False

In [9]:
# O(n + m) time solution;
def is_subsequence(s1, s2):
    pointer1 = 0
    for pointer2 in range(len(s2)):
        while pointer1 <=len(s1)-1 and s1[pointer1]!=s2[pointer2]:
            pointer1 += 1
        
        if pointer1>=len(s1):
            return False 
        
    return True    

s1, s2 = "ABCD", "AD"
print(is_subsequence(s1, s2))  

s1, s2 = "ABCDE", "AED"
print(is_subsequence(s1, s2))  

s1, s2 = "GEEKSFORGEEKS", "GRGES"
print(is_subsequence(s1, s2))  

s1, s2 = "AD", "ABCDEF"
print(is_subsequence(s1, s2))  

True
False
True
False


### Question-4 (Check for Anagram):

- I/P : s1 = "listen", s2 = "silent"
- O/P : True

    
- I/P : s1 = "aaacb", s2 = "cabaa"
- O/P : True


- I/P : s1 = "aab", s2 = "bab"
- O/P : True

In [10]:
# O(n log(n)) solution;
def quick_anagram(s1, s2):
    s1 = ''.join(sorted(s1))
    s2 = ''.join(sorted(s2))
    return s1==s2

s1, s2 = "listen", "silent"
print(quick_anagram(s1, s2))  

s1, s2 = "aaacb", "cabaa"
print(quick_anagram(s1, s2))  

s1, s2 = "aab", "bab"
print(quick_anagram(s1, s2))  


print(" ")


# O(n + m) time solution;
def check_anagrams(s1, s2):
    
    if len(s1)!=len(s2): return False 
    
    cache = {}
    for char in s1:
        if char not in cache:
            cache[char] = 0
        cache[char] += 1    
    
    for char in s2:
        if char not in cache:
            return False
        
        cache[char] -= 1
        if cache[char]==0:
            del cache[char]
    
    return len(cache)==0

s1, s2 = "listen", "silent"
print(check_anagrams(s1, s2))  

s1, s2 = "aaacb", "cabaa"
print(check_anagrams(s1, s2))  

s1, s2 = "aab", "bab"
print(check_anagrams(s1, s2))  

True
True
False
 
True
True
False


### Question-5 (Leftmost Repeating Character):

- I/P : s1 = "geeksforgeeks"
- O/P : 0

    
- I/P : s1 = "abbcc"
- O/P : 1


- I/P : s1 = "abcd"
- O/P : -1

In [11]:
# O(n^2) solution!
def naive_solution(string):
    for outerIndex in range(len(string)):
        for innerIndex in range(outerIndex+1, len(string)):
            if string[outerIndex]==string[innerIndex]:
                return outerIndex
            
    return -1

s1 = "geeksforgeeks"
print(naive_solution(s1))  

s1 = "abbcc"
print(naive_solution(s1)) 

s1 = "abcddc"
print(naive_solution(s1)) 

s1 = "abcd"
print(naive_solution(s1)) 


print(" ")


# O(n) solution- requires 2 loops of the input string!
def better_solution(string):
    cache = {}
    for char in string:
        if char not in cache:
            cache[char] = 0
        cache[char] += 1
        
    # here you are passign through the string again (since the hashmap ddoes not have an order)! Not a good sign!    
    for index in range(len(string)):
        if cache[string[index]]>1:
            return index 
    return -1

s1 = "geeksforgeeks"
print(better_solution(s1))  

s1 = "abbcc"
print(better_solution(s1)) 

s1 = "abcddc"
print(better_solution(s1)) 

s1 = "abcd"
print(better_solution(s1)) 


print(" ")


# requires only one pass of the array!
def even_better_repeating(string):
    cache, answer = [-1]*256, float("inf")
    for index in range(len(string)):
        if cache[ord(string[index]) - ord('a')] == -1:
            cache[ord(string[index]) - ord('a')] = index
        else:
            answer = min(answer, cache[ord(string[index]) - ord('a')])
    
    return answer

s1 = "geeksforgeeks"
print(even_better_repeating(s1))  

s1 = "abbcc"
print(even_better_repeating(s1)) 

s1 = "abcddc"
print(even_better_repeating(s1)) 

s1 = "abcd"
print(even_better_repeating(s1)) 

0
1
2
-1
 
0
1
2
-1
 
0
1
2
inf


### Question-6 (Leftmost Non-Repeating Element):

- I/P : s1 = "geeksforgeeks"
- O/P : 5

    
- I/P : s1 = "abbcc"
- O/P : 0


- I/P : s1 = "abcd"
- O/P : 0

In [12]:
# O(n) solution!
def naive_non_repeating(string):
    cache = {}
    for char in string:
        if char not in cache:
            cache[char] = 0
        cache[char] += 1    
    
    for index in range(len(string)):
        if cache[string[index]]==1:
            return index 
    
    return -1    
        
s1 = "geeksforgeeks"
print(naive_non_repeating(s1))  

s1 = "abcabc"
print(naive_non_repeating(s1)) 

s1 = "abbcc"
print(naive_non_repeating(s1)) 

s1 = "abcddc"
print(naive_non_repeating(s1)) 

s1 = "abcd"
print(naive_non_repeating(s1)) 


print(" ")


# using a state machine!
# theta(n) solution, theta(CHAR) space 
def even_better_non_repeating(string):
    cache, answer = [-1]*256, float("inf")
    for index in range(len(string)):
        if cache[ord(string[index]) - ord('a')] == -1:
            cache[ord(string[index]) - ord('a')] = index
        else:
            cache[ord(string[index]) - ord('a')] = -2
            
    for number in cache:
        if number>=0:
            answer = min(answer, number)
        
    return answer

s1 = "geeksforgeeks"
print(even_better_non_repeating(s1))  

s1 = "abcabc"
print(even_better_non_repeating(s1)) 

s1 = "abbcc"
print(even_better_non_repeating(s1)) 

s1 = "abcddc"
print(even_better_non_repeating(s1)) 

s1 = "abcd"
print(even_better_non_repeating(s1)) 

5
-1
0
0
0
 
5
inf
0
0
0


### Question-7 (Reverse words in string):

- I/P : s1 = "welcome to gfg"
- O/P : "gfg to welcome"

    
- I/P : s1 = "i love coding"
- O/P : "coding love i"


- I/P : s1 = "abc"
- O/P : "abc"

In [13]:
def python_hack(string):
    answer = ' '.join(string.split(" ")[::-1])
    return answer

string = "welcome to gfg"
print(python_hack(string))

string = "i love coding"
print(python_hack(string))

string = "abc"
print(python_hack(string))

gfg to welcome
coding love i
abc


### Question-8 (Pattern match with given string):

- I/P : text = "AAAAA", pattern = "AAA" 
- O/P : 0, 1, 2
    
        
- I/P : text = "ABCABCD", pattern = "ABD" 
- O/P : NOT PRESENT
    
    
- I/P : text = "GEEKSFORGEEKS", pattern = "EKS" 
- O/P : 2, 10

In [17]:
def naive_pattern_searching(string, pattern):
    left, right = 0, 0 
    currString, answer = "", []
    while right<len(string):
        currString += string[right]
        
        if right-left+1<len(pattern):
            right += 1
        elif right-left+1==len(pattern):  
            if currString==pattern:
                answer.append(left)
            
            currString = currString[1:]
            left  += 1
            right += 1
    return answer

string, pattern = "AAAAA", "AAA" 
print(naive_pattern_searching(string, pattern)) 

string, pattern = "ABCABCD", "ABD" 
print(naive_pattern_searching(string, pattern)) 

string, pattern = "GEEKSFORGEEKS", "EKS" 
print(naive_pattern_searching(string, pattern)) 

string, pattern = "ABABABCD", "ABAB" 
print(naive_pattern_searching(string, pattern)) 

string, pattern = "ABCABCD", "ABCD" 
print(naive_pattern_searching(string, pattern)) 

[0, 1, 2]
[]
[2, 10]
[0, 2]
[3]


### Introduction to Pattern matching:

Assuming that $m$ is the pattern length and $n$ is the text length

- Naive algorithm: $\mathcal{O}((n-m+1) \cdot m)$


- Improved Naive for Distinct characters: $\mathcal{O}(n)$


- Rabin Karp: $\mathcal{O}((n-m+1) \cdot m)$ but better than naive algorithm on average


- KMP: $\mathcal{O}(n)$ (here we pre-process the pattern)


- Suffix Tree Data Structure: $\mathcal{O}(m)$ (here we pre-process the entire text)

In [None]:
def distinct_naive_pattern_searching(string, pattern):
    left, right = 0, 0 
    currString, answer = "", []
    while right<len(string):
        currString += string[right]
        
        if right-left+1<len(pattern):
            right += 1
        elif right-left+1==len(pattern):  
            if currString==pattern:
                answer.append(left)
            
            currString = currString[1:]
            left  += 1
            right += 1
    return answer

string, pattern = "AAAAA", "AAA" 
print(distinct_naive_pattern_searching(string, pattern)) 

string, pattern = "ABCABCD", "ABD" 
print(distinct_naive_pattern_searching(string, pattern)) 

string, pattern = "GEEKSFORGEEKS", "EKS" 
print(distinct_naive_pattern_searching(string, pattern)) 

string, pattern = "ABABABCD", "ABAB" 
print(distinct_naive_pattern_searching(string, pattern)) 

string, pattern = "ABCABCD", "ABCD" 
print(distinct_naive_pattern_searching(string, pattern)) 