In [1]:
text = 'yeah, but no, but yeah, but no, but yeah'

# Exact match
print(text == 'yeah')  # False

# Match at start or end
print(text.startswith('yeah'))  # True
print(text.endswith('no'))  # False

# Search for the location of the first occurrence
print(text.find('no'))  # 10

False
True
False
10


In [2]:
import re

text1 = '11/27/2012'
text2 = 'Nov 27, 2012'

# Simple matching: \d+ means match one or more digits
if re.match(r'\d+/\d+/\d+', text1):
    print('yes')  # yes
else:
    print('no')

if re.match(r'\d+/\d+/\d+', text2):
    print('yes')
else:
    print('no')  # no

yes
no


In [3]:
# Precompiling Regular Expressions
datepat = re.compile(r'\d+/\d+/\d+')

if datepat.match(text1):
    print('yes')  # yes
else:
    print('no')

if datepat.match(text2):
    print('yes')
else:
    print('no')  # no

yes
no


In [4]:
# Finding All Matches
text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
print(datepat.findall(text))  # ['11/27/2012', '3/13/2013']

['11/27/2012', '3/13/2013']


In [5]:
# Capture Groups
datepat = re.compile(r'(\d+)/(\d+)/(\d+)')

m = datepat.match('11/27/2012')
print(m.group(0))  # '11/27/2012'
print(m.group(1))  # '11'
print(m.group(2))  # '27'
print(m.group(3))  # '2012'
print(m.groups())  # ('11', '27', '2012')

month, day, year = m.groups()

# Find all matches (notice splitting into tuples)
print(datepat.findall(text))  # [('11', '27', '2012'), ('3', '13', '2013')]

for month, day, year in datepat.findall(text):
    print(f'{year}-{month}-{day}')
    # 2012-11-27
    # 2013-3-13

11/27/2012
11
27
2012
('11', '27', '2012')
[('11', '27', '2012'), ('3', '13', '2013')]
2012-11-27
2013-3-13


In [6]:
# Iterative Matching
for m in datepat.finditer(text):
    print(m.groups())
    # ('11', '27', '2012')
    # ('3', '13', '2013')

('11', '27', '2012')
('3', '13', '2013')


In [7]:
# Exact match with end-marker
datepat = re.compile(r'(\d+)/(\d+)/(\d+)$')
print(datepat.match('11/27/2012abcdef'))  # None
print(datepat.match('11/27/2012'))  # Match object

# Module-level function
print(re.findall(r'(\d+)/(\d+)/(\d+)', text))  # [('11', '27', '2012'), ('3', '13', '2013')]

None
<re.Match object; span=(0, 10), match='11/27/2012'>
[('11', '27', '2012'), ('3', '13', '2013')]
