In [3]:
# match
# Syntax: re.match(pattern, string, flags=0)

import re

line = "Cats are smarter than dogs"

matchObj = re.match( r'(.*) are (.*?) .*', line, re.M|re.I)

if matchObj:
    print ("matchObj.group(0) : ", matchObj.group())
    print ("matchObj.group(1) : ", matchObj.group(1))
    print ("matchObj.group(2) : ", matchObj.group(2))
else:
    print ("No match!!")

matchObj.group(0) :  Cats are smarter than dogs
matchObj.group(1) :  Cats
matchObj.group(2) :  smarter


In [9]:
# search
# Syntax: re.search(pattern, string, flags=0)

import re

line = "Cats are smarter than dogs";

searchObj = re.search( r'(.*) are (.*?) .*', line, re.M|re.I)

if searchObj:
    print ("searchObj.group() : ", searchObj.group())
    print ("searchObj.group(1) : ", searchObj.group(1))
    print ("searchObj.group(2) : ", searchObj.group(2))
else:
    print ("Nothing found!!")

searchObj.group() :  Cats are smarter than dogs
searchObj.group(1) :  Cats
searchObj.group(2) :  smarter


In [10]:
#Python offers two different primitive operations based on regular expressions: 
#match checks for a match only at the beginning of the string, 
#while search checks for a match anywhere in the string

In [6]:
import re

line = "Cats are smarter than dogs";

matchObj = re.match( r'dogs', line, re.M|re.I)
if matchObj:
    print ("match --> matchObj.group() : ", matchObj.group())
else:
    print ("No match!!")

searchObj = re.search( r'dogs', line, re.M|re.I)
if searchObj:
    print ("search --> searchObj.group() : ", searchObj.group())
else:
    print ("Nothing found!!")

No match!!
search --> searchObj.group() :  dogs


In [13]:
#search and replace
#Syntax: re.sub(pattern, repl, string, max=0)

import re

phone = "2004-959-559 # This is Phone Number"

# Delete Python-style comments
num = re.sub(r'#.*$', "", phone)
print ("Phone Num : ", num)

# Remove anything other than digits
num = re.sub(r'\D', "", phone)    
print ("Phone Num : ", num)

Phone Num :  2004-959-559 
Phone Num :  2004959559


### Regex example for getting the date format

In [14]:
 import re
# Lets use a regular expression to match a date string. Ignore
# the output since we are just testing if the regex matches.
regex = r"([a-zA-Z]+) (\d+)"
if re.search(regex, "June 24"):
    # Indeed, the expression "([a-zA-Z]+) (\d+)" matches the date string
    
    # If we want, we can use the MatchObject's start() and end() methods 
    # to retrieve where the pattern matches in the input string, and the 
    # group() method to get all the matches and captured groups.
    match = re.search(regex, "June 24")
    
    # This will print [0, 7), since it matches at the beginning and end of the 
    # string
    print ("Match at index %s, %s" % (match.start(), match.end()))
    
    # The groups contain the matched values.  In particular:
    #    match.group(0) always returns the fully matched string
    #    match.group(1) match.group(2), ... will return the capture
    #            groups in order from left to right in the input string
    #    match.group() is equivalent to match.group(0)
    
    # So this will print "June 24"
    print ("Full match: %s" % (match.group(0)))
    # So this will print "June"
    print ("Month: %s" % (match.group(1)))
    # So this will print "24"
    print ("Day: %s" % (match.group(2)))
else:
    # If re.search() does not match, then None is returned
    print ("The regex pattern does not match. :(")


Match at index 0, 7
Full match: June 24
Month: June
Day: 24


### Regex example using findall and finditer


In [16]:
#matchList = re.findall(pattern, input_str, flags=0)
#matchList = re.finditer(pattern, input_str, flags=0)

In [15]:
import re
# Lets use a regular expression to match a few date strings.
regex = r"[a-zA-Z]+ \d+"
matches = re.findall(regex, "June 24, August 9, Dec 12")
for match in matches:
    # This will print:
    #   June 24
    #   August 9
    #   Dec 12
    print ("Full match: %s" % (match))

# To capture the specific months of each date we can use the following pattern
regex = r"([a-zA-Z]+) \d+"
matches = re.findall(regex, "June 24, August 9, Dec 12")
for match in matches:
    # This will now print:
    #   June
    #   August
    #   Dec
    print ("Match month: %s" % (match))

# If we need the exact positions of each match
regex = r"([a-zA-Z]+) \d+"
matches = re.finditer(regex, "June 24, August 9, Dec 12")
for match in matches:
    # This will now print:
    #   0 7
    #   9 17
    #   19 25
    # which corresponds with the start and end of each match in the input string
    print ("Match at index: %s, %s" % (match.start(), match.end()))

Full match: June 24
Full match: August 9
Full match: Dec 12
Match month: June
Match month: August
Match month: Dec
Match at index: 0, 7
Match at index: 9, 17
Match at index: 19, 25


### Regex example for finding & replacing the strings

In [18]:
#replacedString = re.sub(pattern, replacement_pattern, input_str, count, flags=0)

In [17]:
import re
# Lets try and reverse the order of the day and month in a date 
# string. Notice how the replacement string also contains metacharacters
# (the back references to the captured groups) so we use a raw 
# string for that as well.
regex = r"([a-zA-Z]+) (\d+)"

# This will reorder the string and print:
#   24 of June, 9 of August, 12 of Dec
print (re.sub(regex, r"\2 of \1", "June 24, August 9, Dec 12"))

24 of June, 9 of August, 12 of Dec


### One more case study for you to make it more clear

In [19]:
import re
# Lets create a pattern and extract some information with it
regex = re.compile(r"(\w+) World")
result = regex.search("Hello World is the easiest")
if result:
    # This will print:
    #   0 11
    # for the start and end of the match
    print (result.start(), result.end())

# This will print:
#   Hello
#   Bonjour
# for each of the captured groups that matched
for result in regex.findall("Hello World, Bonjour World"):
    print (result)

# This will substitute "World" with "Earth" and print:
#   Hello Earth
print (regex.sub(r"\1 Earth", "Hello World"))

0 11
Hello
Bonjour
Hello Earth
