<a href="https://colab.research.google.com/github/fbeilstein/algorithms/blob/master/strings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**String**

You may think of string as an array containing characters.

**Matching problem**

You are given a string of $n$ characters and a string of $m < n$ characters.
The $n$-string is called *text*, the $m$-string -- *pattern*.
You are asked to answer one of the follwing questions:
* does the text contain the pattern
* at which index does the contained pattern start

**Applications**

* text editors
* biology (DNA/RNA sequences)
* search engines

**Main algorithms**

* brute-force
* Boyer-Moore
* KMP
* Rabin-Karp

**Efficiency**

* Measured in terms of comparisons


In [3]:
#@title Visualization helper code

str_style = '''
<style>
table {
  border-collapse: collapse;
}

td {
  min-width:30px;
  height: 30px;
  position: relative; 
  text-align:center; 
  color: #00000080;
  font-size:20px;
  font-weight: bolder;
  padding: 19px;
}

.border {
  border: 2px solid #d6d6d6ff;
}

.noborder {
  border: 0px;
}
</style>
'''

def enclose_element(element, color, txt_color, border=True):
  res  = '\t<td bgcolor="' + color + '" '
  res += 'class="border"' if border else 'class="noborder"'
  if txt_color: res += 'style="color:' + str(txt_color) + '" ';
  res += '>' + str(element) + '</td>'
  return res

def horizontal_tbl(array, colors, txt_colors, borders):
  result = '<table>\n'
  for row, c_row, tc_row, has_border in zip(array, colors, txt_colors, borders):
    result += '<tr>\n'
    for e, c, tc in zip(row, c_row, tc_row):
      result += enclose_element(e, c, tc, has_border) + '\n'
    result += '</tr>\n'
  result += '</table>\n'
  return result

def array_to_html(array, colors, txt_colors, borders):
  return str_style + horizontal_tbl(array, colors, txt_colors, borders)

def visualize_array(array, colors, txt_colors, borders):
  array_str = array_to_html(array, colors, txt_colors, borders)
  import IPython
  from google.colab import output
  display(IPython.display.HTML(array_str))


###Brute Force

|Scenario||Best Case|Example||Worst Case|Example
--||--|--||--|--
No match||$O(n)$|"aaaaaaaa", "bcd"||$O(nm)$|"aaaaaaaa", "aab"|
Single occurence||$O(m)$|"abcdef", "abc"||$O(nm)$|"aaaaaaaab", "aab"|
All occurences||$$O(n+m)$$|"ccccabccc", "ab"||$O(nm)$|"abababab", "ab"|

In [8]:
text = "aaaaaa"
pattern = "ax"

def brute_match(text, pattern):
  for t_idx in range(len(text) - len(pattern)):
    for p_idx in range(len(pattern)):
      if pattern[p_idx] != text[t_idx+p_idx]: break;
    else:
      return t_idx
  return -1

print(brute_match(text, pattern))

-1


In [6]:
text = "mythbusters"
pattern = "build"


def brute_match_draw(text, pattern):
  for t_idx in range(len(text) - len(pattern)):
    for p_idx in range(len(pattern)):
      arr = [list(text), [""]*t_idx + list(pattern) + [""]*(len(pattern) - t_idx)]
      cols = [["#ffffff"]*len(text)]*2
      tcols = [["#000000"]*len(text)]*2
      cols[1][t_idx+p_idx] = "#ff0000" if pattern[p_idx] != text[t_idx+p_idx] else "#00ff00"
      visualize_array(arr, cols, tcols, [True, True])
      if pattern[p_idx] != text[t_idx+p_idx]: break;
    else:
      return t_idx
  return -1

0,1,2,3,4,5,6,7,8,9,10
m,y,t,h,b,u,s,t,e,r,s
b,u,i,l,d,,,,,,


0,1,2,3,4,5,6,7,8,9,10
m,y,t,h,b,u,s,t,e,r,s
,b,u,i,l,d,,,,,


0,1,2,3,4,5,6,7,8,9,10
m,y,t,h,b,u,s,t,e,r,s
,,b,u,i,l,d,,,,


0,1,2,3,4,5,6,7,8,9,10
m,y,t,h,b,u,s,t,e,r,s
,,,b,u,i,l,d,,,


0,1,2,3,4,5,6,7,8,9,10
m,y,t,h,b,u,s,t,e,r,s
,,,,b,u,i,l,d,,


0,1,2,3,4,5,6,7,8,9,10
m,y,t,h,b,u,s,t,e,r,s
,,,,b,u,i,l,d,,


0,1,2,3,4,5,6,7,8,9,10
m,y,t,h,b,u,s,t,e,r,s
,,,,b,u,i,l,d,,


0,1,2,3,4,5,6,7,8,9,10
m,y,t,h,b,u,s,t,e,r,s
,,,,,b,u,i,l,d,


-1


##Problems

## Reverse String

In [None]:
class Solution:
    def reverseString(self, s: List[str]) -> None:
        """
        Do not return anything, modify s in-place instead.
        """
        s[:] = s[::-1]

In [None]:
class Solution:
    def reverseString(self, s: List[str]) -> None:
        """
        Do not return anything, modify s in-place instead.
        """
        for idx in range(len(s)//2):
            s[idx], s[len(s)-idx-1] = s[len(s)-idx-1], s[idx]
        

## Reverse String II

In [None]:
class Solution:
    def reverseStr(self, s: str, k: int) -> str:
        s = list(s)
        for initial_idx in range(0, len(s), 2*k):
            s[initial_idx:initial_idx+k] = s[initial_idx:initial_idx+k][::-1]
        return "".join(s)

In [None]:
class Solution:
    def reverseStr(self, s: str, k: int) -> str:
        result = ""
        for initial_idx in range(0, len(s), 2*k):
            result += s[initial_idx:initial_idx+k][::-1] + s[initial_idx+k:initial_idx+2*k]
        return result

In [None]:
class Solution:
    def reverseStr(self, s: str, k: int) -> str:
        p = 0
        q = min(k, len(s))
        s = list(s)
        while p < len(s):
            while p+1 < q:
                s[p], s[q-1] = s[q-1], s[p]
                p += 1
                q -= 1
            p += (3*k+1)//2
            q = min(p+k, len(s))
        return "".join(s)

In [None]:
s = "hello world"
k = 4

def show_ptrs(idx_1, idx_2):
  arr = [list(s), [""]*len(s)]
  arr[1][idx_1] += '<b style="color:green">↑</b>'
  arr[1][idx_2] += '<b style="color:red">↑</b>'
  c = (['#00aa00'] * k + ['#ffffff'] * k) * (len(arr[0])//2//k + 1)
  cols = [c[:len(arr[0])], ['#ffffff'] * len(arr[0])]
  visualize_array(arr, cols, 
                [['#000000'] * len(arr[0])] * 2, 
                [True, False])


p = 0
q = min(k, len(s))-1
s = list(s)
while p < len(s):
  show_ptrs(p, q)
  while p < q:
    s[p], s[q] = s[q], s[p]
    show_ptrs(p, q)
    p += 1
    q -= 1
    show_ptrs(p, q)
  p += (3*k+1)//2
  q = min(p+k, len(s))-1

0,1,2,3,4,5,6,7,8,9,10
h,e,l,l,o,,w,o,r,l,d
↑,,,↑,,,,,,,


0,1,2,3,4,5,6,7,8,9,10
l,e,l,h,o,,w,o,r,l,d
↑,,,↑,,,,,,,


0,1,2,3,4,5,6,7,8,9,10
l,e,l,h,o,,w,o,r,l,d
,↑,↑,,,,,,,,


0,1,2,3,4,5,6,7,8,9,10
l,l,e,h,o,,w,o,r,l,d
,↑,↑,,,,,,,,


0,1,2,3,4,5,6,7,8,9,10
l,l,e,h,o,,w,o,r,l,d
,↑,↑,,,,,,,,


0,1,2,3,4,5,6,7,8,9,10
l,l,e,h,o,,w,o,r,l,d
,,,,,,,,↑,,↑


0,1,2,3,4,5,6,7,8,9,10
l,l,e,h,o,,w,o,d,l,r
,,,,,,,,↑,,↑


0,1,2,3,4,5,6,7,8,9,10
l,l,e,h,o,,w,o,d,l,r
,,,,,,,,,↑↑,


## Reverse Vowels of a String

In [None]:
class Solution:
    def reverseVowels(self, s: str) -> str:
        s = list(s)
        idx_l, idx_r = 0, len(s)-1
        vowels = ['a', 'e', 'i', 'o', 'u',
                  'A', 'E', 'I', 'O', 'U']
        while idx_l < idx_r:
            if s[idx_l] in vowels and s[idx_r] in vowels:
                s[idx_l], s[idx_r] = s[idx_r], s[idx_l]
                idx_l, idx_r = idx_l+1, idx_r-1
            if not s[idx_l] in vowels:
                idx_l += 1
            if not s[idx_r] in vowels:
                idx_r -= 1
        
        return "".join(s)
        

In [None]:
s = "hello world"

def show_ptrs(idx_1, idx_2):
  arr = [list(s), [""]*len(s)]
  arr[1][idx_1] += '<b style="color:green">↑</b>'
  arr[1][idx_2] += '<b style="color:red">↑</b>'
  cols = [['#ffffff'] * len(arr[0]), ['#ffffff'] * len(arr[0])]
  for idx,c in enumerate(s):
    if c in vowels:
      cols[0][idx] = "#00aa00"
  visualize_array(arr, cols, 
                [['#000000'] * len(arr[0])] * 2, 
                [True, False])
  
s = list(s)
idx_l, idx_r = 0, len(s)-1
vowels = ['a', 'e', 'i', 'o', 'u',
          'A', 'E', 'I', 'O', 'U']
while idx_l < idx_r:
  if s[idx_l] in vowels and s[idx_r] in vowels:
    s[idx_l], s[idx_r] = s[idx_r], s[idx_l]
    idx_l, idx_r = idx_l+1, idx_r-1
  if not s[idx_l] in vowels:
    idx_l += 1
  if not s[idx_r] in vowels:
    idx_r -= 1
  show_ptrs(idx_l, idx_r)

0,1,2,3,4,5,6,7,8,9,10
h,e,l,l,o,,w,o,r,l,d
,↑,,,,,,,,↑,


0,1,2,3,4,5,6,7,8,9,10
h,e,l,l,o,,w,o,r,l,d
,↑,,,,,,,↑,,


0,1,2,3,4,5,6,7,8,9,10
h,e,l,l,o,,w,o,r,l,d
,↑,,,,,,↑,,,


0,1,2,3,4,5,6,7,8,9,10
h,o,l,l,o,,w,e,r,l,d
,,,↑,,↑,,,,,


0,1,2,3,4,5,6,7,8,9,10
h,o,l,l,o,,w,e,r,l,d
,,,,↑↑,,,,,,


## Maximum Nesting Depth of the Parentheses

In [None]:
class Solution:
    def maxDepth(self, s: str) -> int:
        max_d, running_d = 0, 0
        for c in s:
            running_d += {'(':1, ')':-1}.get(c, 0)
            max_d = max(running_d, max_d)
        return max_d
        

In [None]:
s = "h((e)l()l)o (wo(r)l)d"

def show_ptrs(idx_1):
  arr = [list(s), [""]*len(s)]
  arr[1][idx_1] += '<b style="color:green">↑</b>'
  cols = [['#ffffff'] * len(arr[0]), ['#ffffff'] * len(arr[0])]
  for idx,c in enumerate(s):
    if c in "()":
      cols[0][idx] = "#00aa00"
  visualize_array(arr, cols, 
                [['#000000'] * len(arr[0])] * 2, 
                [True, False])

max_d, running_d = 0, 0
for idx, c in enumerate(s):
  running_d += {'(':1, ')':-1}.get(c, 0)
  max_d = max(running_d, max_d)
  print("Current depth:", running_d, "   max depth: ", max_d)
  show_ptrs(idx)

Current depth: 0    max depth:  0


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
↑,,,,,,,,,,,,,,,,,,,,


Current depth: 1    max depth:  1


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,↑,,,,,,,,,,,,,,,,,,,


Current depth: 2    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,↑,,,,,,,,,,,,,,,,,,


Current depth: 2    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,↑,,,,,,,,,,,,,,,,,


Current depth: 1    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,↑,,,,,,,,,,,,,,,,


Current depth: 1    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,↑,,,,,,,,,,,,,,,


Current depth: 2    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,↑,,,,,,,,,,,,,,


Current depth: 1    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,↑,,,,,,,,,,,,,


Current depth: 1    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,↑,,,,,,,,,,,,


Current depth: 0    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,↑,,,,,,,,,,,


Current depth: 0    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,↑,,,,,,,,,,


Current depth: 0    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,↑,,,,,,,,,


Current depth: 1    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,,↑,,,,,,,,


Current depth: 1    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,,,↑,,,,,,,


Current depth: 1    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,,,,↑,,,,,,


Current depth: 2    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,,,,,↑,,,,,


Current depth: 2    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,,,,,,↑,,,,


Current depth: 1    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,,,,,,,↑,,,


Current depth: 1    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,,,,,,,,↑,,


Current depth: 0    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,,,,,,,,,↑,


Current depth: 0    max depth:  2


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
h,(,(,e,),l,(,),l,),o,,(,w,o,(,r,),l,),d
,,,,,,,,,,,,,,,,,,,,↑


## Minimum Moves to Convert String

In [None]:
class Solution:
    def minimumMoves(self, s: str) -> int:
        idx, out = 0, 0
        while idx < len(s):
            out += s[idx] == 'X'
            idx += {'X': 3, 'O': 1}[s[idx]]
        return out

## 1-bit and 2-bit Characters

In [None]:
class Solution:
    def isOneBitCharacter(self, bits: List[int]) -> bool:
        idx = 0
        while idx < len(bits)-1:
            idx += 1 + bits[idx]
        return idx == len(bits)-1