# Regex Syntax

Python regex has differences from other languages, such as how capture groups are referenced, and how named capture groups are specified.

# Basic Substitution

In [7]:
import re

pattern = r'(\w+)-(\w+)' # use raw string to simplify

text = 'abc-def ghi-jkl'

print(re.sub(pattern, r'replaced:\1', text)) # use \1 (not $1 as in other languages) for capture group
print(re.sub(pattern, r'replaced:\g<1>', text)) # same thing but using \g<1> instead of \1

replaced:abc replaced:ghi
replaced:abc replaced:ghi


# Named Capture Groups

In [10]:
import re

pattern = r'(?P<name1>\w+)-(?P<name2>\w+)'  # format for named capture groups in Python

text = 'abc-def ghi-jkl'

print(re.sub(pattern, r'replaced:\g<name1>', text))  # format for specifying in replacement

replaced:abc replaced:ghi


# Find All Matches

In [14]:
import re

pattern = r'(\w+)-(\w+)' # use raw string to simplify

text = 'abc-def ghi-jkl'

for match in re.findall(pattern, text):
    print(match)  # tuple of capture groups! (whole string if none)

for match in re.finditer(pattern, text):
    print(match.group(1)) # 1-based, could be a group name too

('abc', 'def')
('ghi', 'jkl')
abc
ghi


# Pre-Compiling

In [22]:
import re

pattern = re.compile(r'(\w+)-(\w+)')

text = 'abc-def ghi-jkl'

print(pattern.sub(r'\1', text))  # same methods, but drop the regex argument

abc ghi


# Multiline

In [20]:
import re

# Example text with multiple lines
text = """abc-def
ghi-jkl
mno-pqr"""

# Regular expression pattern with line anchors
pattern = r'^(\w+)-(\w+)$'

# Finding matches with re.MULTILINE flag
matches = re.finditer(pattern, text, re.MULTILINE) # this could go in re.compile() if pre-compiling

for match in matches:
    print(match.groups())

('abc', 'def')
('ghi', 'jkl')
('mno', 'pqr')


# Dotall

`re.DOTALL` is a modifier that makes `.` match newlines too (usually doesn't).

# Combining Modifiers

`re.DOTALL | re.MULTLINE`