In [1]:
import re

In [2]:
regex = r"([A-Za-z]+) (\d+)"

In [7]:
if re.search(regex, "June 24"):
    match = re.search(regex, "June 24")
    print("Match at index %s, %s" %(match.start(), match.end()))
    
    print("Full match : %s "%(match.group()))
    print("Month match : %s "%(match.group(1)))
    print("Date match : %s "%(match.group(2)))
else:
    print("The regex pattern doesn't match.")

Match at index 0, 7
Full match : June 24 
Month match : June 
Date match : 24 


In [8]:
# Example 2

In [9]:
regex = r"[A-Za-z]+ \d+"

In [10]:
matches = re.findall(regex, "June 24, August 9, Dec 21")

In [12]:
for match in matches:
    print("Full match : ",match)

Full match :  June 24
Full match :  August 9
Full match :  Dec 21


In [13]:
# To capture the specific months of each date we can use the following pattern

regex = r"([A-Za-z]+) \d+"

In [14]:
matches = re.findall(regex, "June 24, August 9, Dec 21")
for match in matches:
    print("Full match : ",match)

Full match :  June
Full match :  August
Full match :  Dec


In [15]:
# If we need the exact positions of each match
regex = r"([A-Za-z]+) \d+"

In [16]:
matches = re.finditer(regex, "June 24, August 9, Dec21")
for match in matches:
    print("Match at index : %s, %s "%(match.start(), match.end()))

Match at index : 0, 7 
Match at index : 9, 17 


In [17]:
# Example 3

In [18]:
# Lets try and reverse the order of the day and month in a date 
# string. Notice how the replacement string also contains metacharacters
# (the back references to the captured groups) so we use a raw 
# string for that as well.
regex = r"([a-zA-Z]+) (\d+)"

# This will reorder the string and print:
#   24 of June, 9 of August, 12 of Dec
print(re.sub(regex, r"\2 of \1", "June 24, August 9, Dec 12"))

24 of June, 9 of August, 12 of Dec


In [19]:
# Example 4

In [20]:
# Lets create a pattern and extract some information with it
regex = re.compile(r"(\w+) World")
result = regex.search("Hello World is the easiest")
if result:
    # This will print:
    #   0 11
    # for the start and end of the match
    print(result.start(), result.end())

# This will print:
#   Hello
#   Bonjour
# for each of the captured groups that matched
for result in regex.findall("Hello World, Bonjour World"):
    print(result)

# This will substitute "World" with "Earth" and print:
#   Hello Earth
print(regex.sub(r"\1 Earth", "Hello World"))

0 11
Hello
Bonjour
Hello Earth


In [21]:
# https://regexone.com/references/python

In [22]:
# From geeks for geeks

In [23]:
# A sample text string where regular expression  
# is searched. 
string = """Hello my Number is 123456789 and 
             my friend's number is 987654321"""
    
# A sample regular expression to find digits. 
regex = '\d+'             
    
match = re.findall(regex, string) 
print(match) 

['123456789', '987654321']


In [24]:
# Lets use a regular expression to match a date string 
# in the form of Month name followed by day number 
regex = r"([a-zA-Z]+) (\d+)"
    
match = re.search(regex, "I was born on June 24") 
    
if match != None: 
    
    # We reach here when the expression "([a-zA-Z]+) (\d+)" 
    # matches the date string. 
    
    # This will print [14, 21), since it matches at index 14 
    # and ends at 21.  
    print("Match at index % s, % s" % (match.start(), match.end()))
    
    # We us group() method to get all the matches and 
    # captured groups. The groups contain the matched values. 
    # In particular: 
    # match.group(0) always returns the fully matched string 
    # match.group(1) match.group(2), ... return the capture 
    # groups in order from left to right in the input string 
    # match.group() is equivalent to match.group(0) 
    
    # So this will print "June 24" 
    print("Full match: % s" % (match.group(0)))
    
    # So this will print "June" 
    print("Month: % s" % (match.group(1)))
    
    # So this will print "24" 
    print("Day: % s" % (match.group(2)))
    
else: 
    print("The regex pattern does not match.")

Match at index 14, 21
Full match: June 24
Month: June
Day: 24


In [25]:
# There are a total of 14 metacharacters and will be discussed as they follow into functions:

# \   Used to drop the special meaning of character
#     following it (discussed below)
# []  Represent a character class
# ^   Matches the beginning
# $   Matches the end
# .   Matches any character except newline
# ?   Matches zero or one occurrence.
# |   Means OR (Matches with any of the characters
#     separated by it.
# *   Any number of occurrences (including 0 occurrences)
# +   One or more occurrences
# {}  Indicate number of occurrences of a preceding RE 
#     to match.
# ()  Enclose a group of REs