In [2]:
import re

### \d for digit

In [3]:
phone_number = re.compile(r"\d\d\d-\d\d\d-\d\d\d\d")
mo = phone_number.search("My num 415-555-4242")
print("Phone number found: " + mo.group() if mo else "No phone number")
phone_number_group= re.compile(r"(\d\d\d)-(\d\d\d-\d\d\d\d)")
mo2 = phone_number_group.search("My num 2 415-545-4342")
print(mo2.groups())

Phone number found: 415-555-4242
('415', '545-4342')


### | for "or"

In [4]:
heroRegex = re.compile(r"Batman|Robin")
mo = heroRegex.search("Batman and Robin")
print(mo.group())
mo2 = heroRegex.search("Robin and Batman") # findall() can be used
print(mo2.group())

batRegex = re.compile(r"Bat(man|mobile|copter|bat)")
mo3 = batRegex.search("Batmobile to the rescue")
print(mo3.group(), mo3.group(1))


Batman
Robin
Batmobile mobile


### ? for optional


In [5]:
batRegex = re.compile(r"Bat(wo)?man")
mo = batRegex.search("Adventures of Batman")
print(mo.group())
mo2 = batRegex.search("Gameplay with Batwoman")
print(mo2.group())

Batman
Batwoman


### matching zero or more with the star

In [6]:
batRegex = re.compile(r"Bat(wo)*man")
mo = batRegex.search("adventures of Batman")
mo2 = batRegex.search("Adventures of Batwoman")
mo3 = batRegex.search("Adventures of Batwowoman")
print(mo.group())
print(mo2.group())
print(mo3.group())

Batman
Batwoman
Batwowoman


### matching one or more with the plus

In [7]:
batRegex = re.compile(r"Bat(wo)+man")
mo = batRegex.search("animated series of Batwoman")
print(mo.group())
mo2 = batRegex.search("batman animated series")
print(mo2 == None)

Batwoman
True


### Matching Specific Repetitions with Braces

In [8]:
haregex = re.compile(r"(Ha){3}")
mo = haregex.search("asfasdHaHaHabcgdsgs")
print(mo.group())
haregex = re.compile(r"(Ha){3,5}")
mo2 = haregex.search("asfasdHaHaHaHabcgdsgs")
print(mo2.group())

HaHaHa
HaHaHaHa


### Greedy and Non-greedy Matching

In [9]:
greedyHaRegex = re.compile(r'(Ha){3,5}')
mo1 = greedyHaRegex.search('HaHaHaHaHa')
print(mo1.group())
nongreedyHaRegex = re.compile(r'(Ha){3,5}?')
mo2 = nongreedyHaRegex.search('HaHaHaHaHa')
print(mo2.group())

HaHaHaHaHa
HaHaHa


### The findall() Method


In [10]:
phoneNumRegex = re.compile(r"\d\d\d-\d\d\d-\d\d\d\d")
mo = phoneNumRegex.search("Cell: 415-555-9999 Work: 212-555-0000")
print(mo.group())
print(phoneNumRegex.findall("Cell: 415-555-9999 Work: 212-555-0000"))

phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')
print(phoneNumRegex.findall("415-555-9999 and 212-555-0000"))
phoneNumRegex = re.compile(r'((\d\d\d)-(\d\d\d)-(\d\d\d\d))')
print(phoneNumRegex.findall("415-555-9999 and 212-555-0000"))

415-555-9999
['415-555-9999', '212-555-0000']
[('415', '555', '9999'), ('212', '555', '0000')]
[('415-555-9999', '415', '555', '9999'), ('212-555-0000', '212', '555', '0000')]


### Character Classes

In [11]:
xmasRegex = re.compile(r"\d+\s\w+")
print(xmasRegex.findall("12 drummers, 11 pipers, 10 lords, 9 ladies"))

['12 drummers', '11 pipers', '10 lords', '9 ladies']


### Making Your Own Character Classes

In [12]:
vowelRegex = re.compile(r"[aeiouAEIOU]")
print(vowelRegex.findall("RoboCop eats baby food. BABY FOOD."))

vowelRegex = re.compile(r"[^aeiouAEIOU]")
print(vowelRegex.findall("RoboCop eats baby food. BABY FOOD."))

['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o', 'A', 'O', 'O']
['R', 'b', 'C', 'p', ' ', 't', 's', ' ', 'b', 'b', 'y', ' ', 'f', 'd', '.', ' ', 'B', 'B', 'Y', ' ', 'F', 'D', '.']


### The Caret and Dollar Sign Characters

In [13]:
begins_regex = re.compile(r"^Hello")
print(begins_regex.search("Hello there"))
print(begins_regex.search("he said hello"))

<re.Match object; span=(0, 5), match='Hello'>
None


In [14]:
endsnum = re.compile(r"\d+$")
print(endsnum.search("Your num is 42"))
print(endsnum.search("42 is the number"))


<re.Match object; span=(12, 14), match='42'>
None


In [15]:
wholeIsNum = re.compile(r"^\d+$")
print(wholeIsNum.search('1234567890'))
print(wholeIsNum.search('12345xyz67890') == None)
print(wholeIsNum.search('12 34567890') == None)

<re.Match object; span=(0, 10), match='1234567890'>
True
True


### wildcard character

In [16]:
atRegex = re.compile(r".at")
atRegex.findall("The cat in the hat sat on the flat mat")

['cat', 'hat', 'sat', 'lat', 'mat']

### Matching Everything with Dot-Star

In [17]:
nameRegex = re.compile(r"First Name: (.*) Last Name: (.*)")
mo = nameRegex.search("First Name: Emircan Last Name: Yılmaz")
mo.group(1)

'Emircan'

In [18]:
nongreedyRegex = re.compile(r'<.*?>')
mo = nongreedyRegex.search('<To serve man> for dinner.>')
print(mo.group())
greedyRegex = re.compile(r'<.*>')
mo = greedyRegex.search('<To serve man> for dinner.>')
print(mo.group())


<To serve man>
<To serve man> for dinner.>


### Matching Newlines with the Dot Character

In [19]:
noNewLineRegex = re.compile(r".*")
noNewLineRegex.search("Serve the public trust. \nProtect the innocent.").group()

'Serve the public trust. '

In [20]:
newLineRegex = re.compile(r".*", re.DOTALL)
newLineRegex.search("Serve the public trust. \nProtect the innocent.").group()

'Serve the public trust. \nProtect the innocent.'

### Case-Insensitive Matching

In [21]:
robocop = re.compile(r"robocop", re.I) #re.IGNORECASE
robocop.search("Robocop is part man part machine all cop").group()

'Robocop'

In [22]:
robocop.search("robot cop ROBOCOP").group()

'ROBOCOP'

### Substituting Strings with the sub() Metho

In [23]:
namesRegex = re.compile(r"Agent \w+")
namesRegex.sub("CENSORED", "Agent Alice gave the secret documents to Agent Bob.")

'CENSORED gave the secret documents to CENSORED.'

In [24]:
agentNamesRegex = re.compile(r"Agent (\w)\w*")
# you can type \1, \2, \3, and so on, to mean “Enter the text of group 1, 2, 3, and so on, in the substitution.”
agentNamesRegex.sub(r"\1****", "Agent Alice gave the secret documents to Agent Bob.")

'A**** gave the secret documents to B****.'