In [1]:
import re

In [2]:
phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')

mo = phoneNumRegex.search('My number is 415-555-4242.')

print('Phone number found: ' + mo.group())

Phone number found: 415-555-4242


In [3]:
phoneNumRegex.search('Cell: 415-555-9999 Work: 212-555-0000').group()

'415-555-9999'

In [4]:
phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000')

['415-555-9999', '212-555-0000']

In [5]:
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')

phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000')

[('415', '555', '9999'), ('212', '555', '0000')]

# group with ( )

In [6]:
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')

mo = phoneNumRegex.search('My number is 415-555-4242.')

In [7]:
mo.group(1)

'415'

In [8]:
mo.group(2)

'555-4242'

In [9]:
mo.group(0)

'415-555-4242'

In [10]:
mo.group()

'415-555-4242'

In [11]:
mo.groups()

('415', '555-4242')

In [12]:
areaCode, mainNumber = mo.groups()

In [13]:
areaCode

'415'

In [14]:
mainNumber

'555-4242'

In [15]:
phoneNumRegex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')

In [16]:
mo = phoneNumRegex.search('My phone number is (415) 555-4242.')

In [17]:
mo.group(1)

'(415)'

In [18]:
mo.group(2)

'555-4242'

# one of many |

In [26]:
heroRegex = re.compile (r'AA|BB')

heroRegex.search('AA and BB.').group()  #or

'AA'

In [28]:
heroRegex.search('BB and AA.').group()  #or

'BB'

In [29]:
batRegex = re.compile(r'AA(BB|CC|DD|EE)')

mo = batRegex.search('AACC')

mo.group() #or

'AACC'

In [30]:
mo.group(1) 

'CC'

# zero or one ? 

In [44]:
batRegex = re.compile(r'AA(BB)?CC')

batRegex.search('AACC').group()   #zero

'AACC'

In [47]:
batRegex.search('AABBCC').group()  #one

'AABBCC'

In [48]:
phoneRegex = re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')

phoneRegex.search('My number is 415-555-4242').group()  #one

'415-555-4242'

In [49]:
phoneRegex.search('My number is 555-4242').group()  #zero

'555-4242'

# zero or more *

In [35]:
batRegex = re.compile(r'AA(BB)*CC')

batRegex.search('AACC').group()  #zero

'AACC'

In [38]:
batRegex.search('AABBCC').group()  #one

'AABBCC'

In [41]:
batRegex.search('AABBBBBBCC').group()  #more

'AABBBBBBCC'

# one or more +

In [50]:
batRegex = re.compile(r'AA(BB)+CC')

batRegex.search('AABBCC').group()  #one

'AABBCC'

In [51]:
batRegex.search('AABBBBCC').group() #more

'AABBBBCC'

In [52]:
mo3 = batRegex.search('AACC')
mo3 == None

True

# repetitions { }

In [56]:
haRegex = re.compile(r'(Aa){3}')

haRegex.search('AaAaAa').group()

'AaAaAa'

In [57]:
mo2 = haRegex.search('Aa')
mo2 == None

True

# nongreedy ?

In [59]:
greedyHaRegex = re.compile(r'(Aa){3,5}')  #3 or 4 or 5

greedyHaRegex.search('AaAaAaAaAa').group()  #greedy by default

'AaAaAaAaAa'

In [61]:
nongreedyHaRegex = re.compile(r'(Aa){3,5}?')  # ?  nongreedy math

nongreedyHaRegex.search('AaAaAaAaAa').group()

'AaAaAa'

# shorthand code

In [29]:
# one or more numeric digits ( \d+ ), 
# whitespace character ( \s ), 
# one or more letter/digit/underscore characters ( \w+ )

xmasRegex = re.compile(r'\d+\s\w+') 

In [30]:
xmasRegex.findall('12 drummers, 11 pipers, 10 lords, 9 ladies, 8 maids, 7 swans, 6 geese, 5 rings, 4 birds, 3 hens, 2 doves, 1 partridge')

['12 drummers',
 '11 pipers',
 '10 lords',
 '9 ladies',
 '8 maids',
 '7 swans',
 '6 geese',
 '5 rings',
 '4 birds',
 '3 hens',
 '2 doves',
 '1 partridge']

# character class

In [62]:
vowelRegex = re.compile(r'[abcdeABCDE]')

In [64]:
vowelRegex.findall('RoboCop eats baby food. BABY FOOD.')

['b', 'C', 'e', 'a', 'b', 'a', 'b', 'd', 'B', 'A', 'B', 'D']

In [65]:
# A negative character class will match all the characters that are not in the character class.

consonantRegex = re.compile(r'[^abcdeABCDE]')

In [66]:
consonantRegex.findall('RoboCop')

['R', 'o', 'o', 'o', 'p']

# caret ^ and dollar sign $

In [68]:
beginsWithHello = re.compile(r'^A')

beginsWithHello.search('ABC')

<re.Match object; span=(0, 1), match='A'>

In [69]:
beginsWithHello.search('BCD') == None

True

In [70]:
endsWithNumber = re.compile(r'\d$')

endsWithNumber.search('42')

<re.Match object; span=(1, 2), match='2'>

In [71]:
endsWithNumber.search('two.') == None

True

In [72]:
wholeStringIsNum = re.compile(r'^\d+$')

wholeStringIsNum.search('1234567890')

<re.Match object; span=(0, 10), match='1234567890'>

In [73]:
wholeStringIsNum.search('12345xyz67890') == None

True

In [74]:
wholeStringIsNum.search('12 34567890') == None

True

# any character except newline .

In [75]:
# match any character except for a newline

atRegex = re.compile(r'.at')

atRegex.findall('cat in the hat sat on the flat mat.')

['cat', 'hat', 'sat', 'lat', 'mat']

# anything greedy .*

In [76]:
nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)')

mo = nameRegex.search('First Name: Al Last Name: Sweigart')

mo.group(1)

'Al'

In [77]:
mo.group(2)

'Sweigart'

In [78]:
greedyRegex = re.compile(r'<.*>')

mo = greedyRegex.search('<To serve man> for dinner.>')

mo.group()

'<To serve man> for dinner.>'

# anything nongreedy .*?

In [79]:
nongreedyRegex = re.compile(r'<.*?>')

nongreedyRegex.search('<To serve man> for dinner.>').group()

'<To serve man>'

# including newline re.DOTALL

In [80]:
noNewlineRegex = re.compile('.*')

noNewlineRegex.search('Serve the public trust. \nProtect the innocent. \nUphold the law.').group()

'Serve the public trust. '

In [81]:
newlineRegex = re.compile('.*', re.DOTALL)

newlineRegex.search('Serve the public trust.\nProtect the innocent. \nUphold the law.').group()

'Serve the public trust.\nProtect the innocent. \nUphold the law.'

# case-insensitive re.IGNORECASE

In [82]:
robocop = re.compile(r'robocop', re.I)

In [83]:
robocop.search('RoboCop is part man, part machine, all cop.').group()

'RoboCop'

In [84]:
robocop.search('ROBOCOP protects the innocent.').group()

'ROBOCOP'

In [85]:
robocop.search('Why does your programming book talk about robocop so much?').group()

'robocop'

# substitute

In [94]:
namesRegex = re.compile(r'Jupyter \w+')

In [96]:
namesRegex.sub('XXX', 'Jupyter Notebook.')

'XXX.'

In [42]:
agentNamesRegex = re.compile(r'Agent (\w)\w*')

In [44]:
agentNamesRegex.sub(r'\1****', 'Agent Alice told Agent Carol that Agent Eve knew Agent Bob was a double agent.')

'A**** told C**** that E**** knew B**** was a double agent.'

# re.VERBOSE

In [97]:
phoneRegex = re.compile(r'''(
(\d{3}|\(\d{3}\))?             # area code
(\s|-|\.)?                     # separator
\d{3}                          # first 3 digits
(\s|-|\.)                      # separator
\d{4}                          # last 4 digits
(\s*(ext|x|ext.)\s*\d{2,5})?   # extension
)''', re.VERBOSE)


In [99]:
someRegexValue = re.compile('foo', re.IGNORECASE | re.DOTALL)

In [100]:
someRegexValue = re.compile('foo', re.IGNORECASE | re.DOTALL | re.VERBOSE)