# Chapter 7

## Pattern Matching with Regular Expressions


__Problem.__ Write a function to check for telephone number (return True or False)

In [2]:
def isPhoneNumber (text):
    
    if len(text) != 12:
        return False
    for i in range(0, 3):
        if not text[i].isdecimal():
            return False
    if text[3] != '-':
        return False
    for i in range(4, 7):
        if not text[i].isdecimal():
            return False
    if text[7] != '-':
        return False
    for i in range(8, 12):
        if not text[i].isdecimal():
            return False
    return True

In [3]:
print('415-555-4242 is a phone number:')
print(isPhoneNumber('415-555-4242'))

415-555-4242 is a phone number:
True


In [4]:
print('Moshi moshi is a phone number:')
print(isPhoneNumber('Moshi moshi'))

Moshi moshi is a phone number:
False



__Problem.__ Find telephone number in the text

In [5]:
message = 'Call me at 415-555-1011 tomorrow. 415-555-9999 is my office.'
for i in range(len(message)):
    chunk = message[i:i+12]
    if isPhoneNumber(chunk):
        print('The phone number was found: ' + chunk)
print('Done')

The phone number was found: 415-555-1011
The phone number was found: 415-555-9999
Done




### Regexes

In [6]:
import re

# Creating a regex for the phone number
phoneNumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')

# Looking up in the text for the phone number (return None or Match)
matchObject = phoneNumRegex.search(message)
# group() method returns the actual match
print(matchObject.group())

415-555-1011


In [7]:
phoneNumRegex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
matchObject = phoneNumRegex.search(message)
print(matchObject.group(1))
print(matchObject.group(2))

415
555-1011


In [8]:
print(matchObject.groups())

('415', '555-1011')


In [9]:
areaCode, mainNumber = matchObject.groups()
print(areaCode + '\n' + mainNumber)

415
555-1011


In [10]:
phoneNumRegex = re.compile(r'(\(\d\d\d\)) (\d\d\d-\d\d\d\d)')
mo = phoneNumRegex.search('My phone number is (415) 555-4242.')
print(mo.groups())

('(415)', '555-4242')


In [11]:
# | = OR 
heroRegex = re.compile (r'Batman|Tina Fey')
mo1 = heroRegex.search('Batman and Tina Fey.')
mo1.group()

'Batman'

In [12]:
mo2 = heroRegex.search('Tina Fey and Batman.')
mo2.group()

'Tina Fey'

In [13]:
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
mo = batRegex.search('Batmobile lost a wheel')
mo.group()

'Batmobile'

In [14]:
# (wo)? optional part of the pattern

batRegex = re.compile(r'Bat(wo)?man')
mo1 = batRegex.search('The Adventures of Batman')
mo1.group()

'Batman'

In [15]:
mo2 = batRegex.search('The Adventures of Batwoman')
mo2.group()

'Batwoman'

In [16]:
# Optional area code in the phone number

phoneRegex = re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')
mo = phoneRegex.search('My number is 415-555-4242')
mo.group()

'415-555-4242'

In [17]:
mo = phoneRegex.search('My number is 555-4242')
mo.group()

'555-4242'

In [18]:
# (wo)* = match zero or more times

batRegex = re.compile(r'Bat(wo)*man')
mo = batRegex.search('The Adventures of Batman')
mo.group()

'Batman'

In [19]:
mo = batRegex.search('The Adventures of Batwowowowoman')
mo.group()

'Batwowowowoman'

In [21]:
# (wo)+ = match at least one time

batRegex = re.compile(r'Bat(wo)+man')
mo = batRegex.search('The Adventures of Batman')
mo == None

True

In [22]:
mo = batRegex.search('The Adventures of Batwoman')
mo.group()

'Batwoman'

In [None]:
#(Ha){3} = matching with repetition

haRegex = re.compile(r'(Ha){3}')
mo = haRegex.search('HaHaHa')
mo.group()

In [24]:
mo = haRegex.search('Ha')
mo == None

True

In [25]:
# Greedy and Non-Greedy matching

greedyHaRegex = re.compile(r'(Ha){3,5}')
mo = greedyHaRegex.search('HaHaHaHaHa')
mo.group()

'HaHaHaHaHa'

In [26]:
nongreedyHaRegex = re.compile(r'(Ha){3,5}?')
mo = nongreedyHaRegex.search('HaHaHaHaHa')
mo.group()

'HaHaHa'