# Finding Patterns of Text with Regex

In [2]:
import re


phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d')            # phoneNubRegex containsa Regex object
matchObject = phoneNumRegex.search('My number is 452-155-122') # returns Match Object and
                                                               # match object have a group() method that will return
                                                               # the actual matched text
print('Phone number found is: ' + matchObject.group())

Phone number found is: 452-155-122


In [4]:
import re

phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d)') # adding parantheses will create group in the regex
matchObject = phoneNumRegex.search('My number is 452-155-122')
print('Phone number group 1 is: ' + matchObject.group(1))
print('Phone number group 2 is: ' + matchObject.group(2))
print('Phone number found is: ' + matchObject.group())

Phone number group 1 is: 452
Phone number group 2 is: 155-122
Phone number found is: 452-155-122


In [8]:
import re

phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d)') 
matchObject = phoneNumRegex.search('My number is 452-155-122')
areaCode, mainNumber = matchObject.groups() # retrieve all groups at once, note the plural form "groups"
                                            # groups() returns a tuple of multiple values hence we can use multiple 
                                            # assignment
print(areaCode)
print(mainNumber)

452
155-122


In [9]:
import re

# to escape the (and) characters we use back slash. 
phoneNumRegex = re.compile(r'(\(\d\d\d\))-(\d\d\d-\d\d\d)') # the \(and\) escape character in the raw string passed
                                                            # to re.compile() will match actual paranthesis characters
matchObject = phoneNumRegex.search('My number is (452)-155-122')
areaCode, mainNumber = matchObject.groups()
print(areaCode)
print(mainNumber)

(452)
155-122


## Matching Multiple Groups with the Pipe

In [10]:
heroRegex = re.compile(r'Batman|Tina Fey') # pipe character i.e. | will match either Batman or Tina Fey
mo1 = heroRegex.search('Batman Tina Fey')  # When both Batman and Tina Fey occur in the searched string,the first
                                           # occurence of matching text will be returned to Match object.
print(mo1.group())

Batman


In [2]:
import re


batRegex = re.compile(r'Bat(man|mobile|copter|bat)') # using parentheses will match Bat prefix 
mo3 = batRegex.search('Batmobile and Batman lost a wheel')
print(mo3.group())
print(mo3.group(1))

Batmobile
mobile


## Optional matching with Question Mark

In [5]:
import re

batRegex = re.compile(r'Bat(wo)?man')
mo1 = batRegex.search('The adventures of Batman')
mo2 = batRegex.search('Th Adventures of Batwoman')
print(mo1.group())
print(mo2.group())

Batman
Batwoman


## Matching Zero or More with the Star

In [7]:
import re
batRegex = re.compile(r'Bat(wo)*man') # match zero or more 'wo'
mo1 = batRegex.search('Hello Batwowowowowowoman')
mo2 = batRegex.search('Hello Batman')
print(mo1.group())
print(mo2.group())

Batwowowowowowoman
Batman


## Matching one or more with Plus

In [13]:
import re
batRegex = re.compile(r'Bat(wo)+man') # match one or more 'wo'
mo1 = batRegex.search('Hello Batwowowowowowoman')
mo2 = batRegex.search('Hello Batman')
print(mo1.group())
print(mo2)

Batwowowowowowoman
None


## Matching Specific Repetitions with Curly Brackets

In [14]:
import re

haRegex = re.compile(r'(Ha){3}') # {,3}, {3,}, {3,5} allowed
mo1 = haRegex.search('HaHaHa')
print(mo1.group())

HaHaHa


# Greedy and Nongreedy Matching