In [27]:
import sys
print("Python version", sys.version[:5])
import re

Python version 3.8.6


# Why to use raw string?
It's recommended to use raw string while working with Regex once the string special prefix *r* indicates to Python's interpreter to ignore special(escape) characters

# Checking
## Search
Scan through the `input_str`, looking for any location where the `pattern` matches.

`result = search(pattern, input_str)`
### Match
Determine if the `pattern` matches at the beginning of the `input_str`.

`result = match(pattern, input_str)`
### FullMatch
Determine if the `pattern` matches at the entire `input_str`.

`result = fullmatch(pattern, input_str)`
### FindAll
Find all substrings where the `pattern` matches, and returns them as a `list`.

`result = findall(pattern, input_str)`
### FindIter
Find all substrings where the `pattern` matches, and returns them as a `iterator`.

`result = finditer(pattern, input_str)`

## Examples:

In [28]:
regex = r"([a-zA-Z]+) (\d+)"
result = re.search(regex, "Today is October 22")
print(f"Substring match: [{result.start()}, {result.end()})")

Substring match: [9, 19)


In [29]:
print(result.groups())
print("Today is", result.group())
print("Month:", result.group(1))
print("Day:", result.group(2))

('October', '22')
Today is October 22
Month: October
Day: 22


In [30]:
regex = r"([a-zA-Z]+) \d+"
matches = re.findall(regex, """My mother was born in April 9.
                               I was born in February 23.
                               My daughter was born on September 1.""")

print(matches)
for match in matches:
    print(f"Month:", match)

['April', 'February', 'September']
Month: April
Month: February
Month: September


In [31]:
regex = r"([a-zA-Z]+) \d+"
matches = re.finditer(regex, 
"""My mother was born in April 9.
I was born in February 23.
My daughter was born in September 1.""")

print(matches)
for match in matches:
    print(match)
    print(f"Month:", match.group())

<callable_iterator object at 0x000002593DDA3550>
<re.Match object; span=(22, 29), match='April 9'>
Month: April 9
<re.Match object; span=(45, 56), match='February 23'>
Month: February 23
<re.Match object; span=(82, 93), match='September 1'>
Month: September 1


# Replacing

## Sub
Return the string obtained by replacing n(`count`) occurrences of `pattern` in `input_str` by the replacement repl. If `count` <= 0, all occurrences will be replaced.

`replacedString = sub(pattern, repl, input_str, count)`
## Examples:

In [32]:
text = "Yesterday I saw a cat fighting another cat"
regex = "cat"
re.sub(regex, "feline", text)

'Yesterday I saw a feline fighting another feline'

In [33]:
regex = r"([a-zA-Z]+) (\d+)"

order = re.sub(regex, r"\2th of \1", """My mother was born in April 9.
I was born in February 23.
My daughter was born in September 1.""")

print(order)

My mother was born in 9th of April.
I was born in 23th of February.
My daughter was born in 1th of September.


# Spliting
## Split
Return the list obtained by spliting by n(count) occurrences of pattern in input_str. If count <= 0, all occurrences will be splitted.

`result = re.split(pattern, input_str)`
## Examples:

In [34]:
re.split(r'\W+', 'Words, words, words.')

['Words', 'words', 'words', '']

In [35]:
re.split(r'\W+', 'Words, words, words.', 1)

['Words', 'words, words.']