# Regular Expression in Python with Examples | Set 1
A Regular Expressions (RegEx) is a special sequence of characters that uses a search pattern to find a string or set of strings. 
It can detect the presence or absence of a text by matching with a particular pattern, and also can split a pattern into one or more sub-patterns. 
Python provides a re module that supports the use of regex in Python. 
Its primary function is to offer a search, where it takes a regular expression and a string. Here, it either returns the first match or else none.

In [1]:
import re

txt = "It can detect the presence or absence of a text by matching with a particular pattern"

data = re.search("text",txt)

print("Start Index",data.start())
print("End Index",data.end())

Start Index 43
End Index 47


In [2]:

import re
 
s = 'geeks.forgeeks'
 
# without using \
match = re.search(r'.', s)
print(match)
 
# using \
match = re.search(r'\.', s)
print(match)

<re.Match object; span=(0, 1), match='g'>
<re.Match object; span=(5, 6), match='.'>


# Regex Module in Python
Python has a module named re that is used for regular expressions in Python. We can import this module by using the import statement.
Example: Importing re module in Python 

# re.findall()
Return all non-overlapping matches of pattern in string, as a list of strings. The string is scanned left-to-right, and matches are returned in the order found.

In [5]:
import re

txt = """Hello my Number is 03077837001 and  my friend's number is 03117374417"""

regex = "\d+"

match= re.findall(regex,txt)
print(match)

['03077837001', '03117374417']


# re.compile() 
Regular expressions are compiled into pattern objects, which have methods for various operations such as searching for pattern matches or performing string substitutions. 

In [9]:
import re
 
# compile() creates regular expression
# character class [a-e],
# which is equivalent to [abcde].
# class [abcde] will match with string with
# 'a', 'b', 'c', 'd', 'e'.
p = re.compile('[b-z]')
 
# findall() searches for the Regular Expression
# and return a list upon finding
print(p.findall("Aye, said Mr. Gibenson Stark"))

['y', 'e', 's', 'i', 'd', 'r', 'i', 'b', 'e', 'n', 's', 'o', 'n', 't', 'r', 'k']


In [18]:
import re

t = re.compile('\d')
print(t.findall("I went to him at 11 A.M. on 4th July 1886"))
print("\n")
print("..."*20)
print("\n")
t_1 = re.compile('\d+')
print(t_1.findall("I went to him at 11 A.M. on 4th July 1886"))


['1', '1', '4', '1', '8', '8', '6']


............................................................


['11', '4', '1886']


In [19]:
import re
 
# \w is equivalent to [a-zA-Z0-9_].
p = re.compile('\w')
print(p.findall("He said * in some_lang."))
 
# \w+ matches to group of alphanumeric character.
p = re.compile('\w+')
print(p.findall("I went to him at 11 A.M., he \
said *** in some_language."))
 
# \W matches to non alphanumeric characters.
p = re.compile('\W')
print(p.findall("he said *** in some_language."))

['H', 'e', 's', 'a', 'i', 'd', 'i', 'n', 's', 'o', 'm', 'e', '_', 'l', 'a', 'n', 'g']
['I', 'went', 'to', 'him', 'at', '11', 'A', 'M', 'he', 'said', 'in', 'some_language']
[' ', ' ', '*', '*', '*', ' ', ' ', '.']


In [20]:
import re
 
# '*' replaces the no. of occurrence
# of a character.
p = re.compile('ab*')
print(p.findall("ababbaabbb"))

['ab', 'abb', 'a', 'abbb']


# re.split() 
Split string by the occurrences of a character or a pattern, upon finding that pattern, the remaining characters from the string are returned as part of the resulting list. 

In [21]:
from re import split
 
# '\W+' denotes Non-Alphanumeric Characters
# or group of characters Upon finding ','
# or whitespace ' ', the split(), splits the
# string from that point
print(split('\W+', 'Words, words , Words'))
print(split('\W+', "Word's words Words"))
 
# Here ':', ' ' ,',' are not AlphaNumeric thus,
# the point where splitting occurs
print(split('\W+', 'On 12th Jan 2016, at 11:02 AM'))
 
# '\d+' denotes Numeric Characters or group of
# characters Splitting occurs at '12', '2016',
# '11', '02' only
print(split('\d+', 'On 12th Jan 2016, at 11:02 AM'))

['Words', 'words', 'Words']
['Word', 's', 'words', 'Words']
['On', '12th', 'Jan', '2016', 'at', '11', '02', 'AM']
['On ', 'th Jan ', ', at ', ':', ' AM']


# Regular Expressions in Python – Set 2 (Search, Match and Find All)
Searching an occurrence of pattern 

re.search() : This method either returns None (if the pattern doesn’t match), or a re.MatchObject that contains information about the matching part of the string. This method stops after the first match, so this is best suited for testing a regular expression more than extracting data.

In [1]:
# A Python program to demonstrate working of re.match(). 
import re 
   
# Lets use a regular expression to match a date string 
# in the form of Month name followed by day number 
regex = r"([a-zA-Z]+) (\d+)"
   
match = re.search(regex, "I was born on June 24") 
   
if match != None: 
   
    # We reach here when the expression "([a-zA-Z]+) (\d+)" 
    # matches the date string. 
   
    # This will print [14, 21), since it matches at index 14 
    # and ends at 21. 
    print ("Match at index %s, %s" % (match.start(), match.end())) 
   
    # We us group() method to get all the matches and 
    # captured groups. The groups contain the matched values. 
    # In particular: 
    # match.group(0) always returns the fully matched string 
    # match.group(1) match.group(2), ... return the capture 
    # groups in order from left to right in the input string 
    # match.group() is equivalent to match.group(0) 
   
    # So this will print "June 24" 
    print ("Full match: %s" % (match.group(0))) 
   
    # So this will print "June" 
    print ("Month: %s" % (match.group(1))) 
   
    # So this will print "24" 
    print ("Day: %s" % (match.group(2)))
   
else: 
    print ("The regex pattern does not match.")

Match at index 14, 21
Full match: June 24
Month: June
Day: 24


# Matching a Pattern with Text 
re.match() : This function attempts to match pattern to whole string. The re.match function returns a match object on success, None on failure. 

In [2]:
# A Python program to demonstrate working
# of re.match().
import re
    
# a sample function that uses regular expressions
# to find month and day of a date.
def findMonthAndDate(string):
        
    regex = r"([a-zA-Z]+) (\d+)"
    match = re.match(regex, string)
        
    if match == None: 
        print ("Not a valid date")
        return
    
    print ("Given Data: %s" % (match.group()))
    print ("Month: %s" % (match.group(1)))
    print ("Day: %s" % (match.group(2)))
    
        
# Driver Code
findMonthAndDate("Jun 24")
print("")
findMonthAndDate("I was born on June 24")

Given Data: Jun 24
Month: Jun
Day: 24

Not a valid date


# Finding all occurrences of a pattern 

re.findall() : Return all non-overlapping matches of pattern in string, as a list of strings. The string is scanned left-to-right, and matches are returned in the order found (Source : Python Docs). 

In [3]:
# A Python program to demonstrate working of
# findall()
import re
   
# A sample text string where regular expression 
# is searched.
string  = """Hello my Number is 123456789 and
             my friend's number is 987654321"""
   
# A sample regular expression to find digits.
regex = '\d+'            
   
match = re.findall(regex, string)
print(match)
   
# This example is contributed by Ayush Saluja.

['123456789', '987654321']


In [4]:
email = "my email addresss is > aleemraza661@gmail.com"
# extract all email addresses and add them into the resulting set
new_emails = set(re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", email, re.I))
print(new_emails)


{'aleemraza661@gmail.com'}


# Verbose in Python Regex
re.VERBOSE : This flag allows you to write regular expressions that look nicer and are more readable by allowing you to visually separate logical sections of the pattern and add comments.
Whitespace within the pattern is ignored, except when in a character class, or when preceded by an unescaped backslash, or within tokens like *?, (?: or (?P. When a line contains a # that is not in a character class and is not preceded by an unescaped backslash, all characters from the leftmost such # through the end of the line are ignored.

In [None]:
# Python3 program to show the Implementation of VERBOSE in RegEX
import re
  
def validate_email(email):
  
    # RegexObject = re.compile( Regular expression, flag )
    # Compiles a regular expression pattern into 
    # a regular expression object
    regex_email=re.compile(r"""
                           ^([a-z0-9_\.-]+)                 # local Part
                           @                             # single @ sign
                            ([0-9a-z\.-]+)                 # Domain name
                           \.                             # single Dot .
                            ([a-z]{2,6})$                 # Top level Domain     
                           """,re.VERBOSE | re.IGNORECASE)
  
    # RegexObject is matched with the desired
    # string using fullmatch function
    # In case a match is found, search()
    # returns a MatchObject Instance
    res=regex_email.fullmatch(email)
  
    #If match is found, the string is valid
    if res:
        print("{} is Valid. Details are as follow:".format(email))
          
        #prints first part/personal detail of Email Id
        print("Local:{}".format(res.group(1)))
          
        #prints Domain Name of Email Id
        print("Domain:{}".format(res.group(2)))
          
        #prints Top Level Domain Name of Email Id
        print("Top Level domain:{}".format(res.group(3)))
        print()
          
    else:
        #If match is not found,string is invalid
        print("{} is Invalid".format(email))
  
# Driver Code
validate_email("expectopatronum@gmail.com")
validate_email("avadakedavra@yahoo.com@")
validate_email("Crucio@.com")

In [5]:
import re

def check_eamil(email):
    resg_eamil=re.compile(r"""
                           ^([a-z0-9_\.-]+)                 # local Part
                           @                             # single @ sign
                            ([0-9a-z\.-]+)                 # Domain name
                           \.                             # single Dot .
                            ([a-z]{2,6})$                 # Top level Domain     
                           """,re.VERBOSE | re.IGNORECASE)
    
    res = resg_eamil.fullmatch(email)
    
    if res:
        print("{} is Valid. Details are as follow:".format(email))
        
        print("Local {}".format(res.group(1)))
        print("Domain Name {}".format(res.group(2)))
        print("Top Level domain:{}".format(res.group(3)))
        print()
    else:
        print("{} is Invald".format(email))
check_eamil("aleemraza661@gmail.com")        

aleemraza661@gmail.com is Valid. Details are as follow:
Local aleemraza661
Domain Name gmail
Top Level domain:com



# Password validation in Python
Let’s take a password as a combination of alphanumeric characters along with special characters, and check whether the password is valid or not with the help of few conditions.

Conditions for a valid password are:

Should have at least one number.
Should have at least one uppercase and one lowercase character.
Should have at least one special symbol.
Should be between 6 to 20 characters long.

In [12]:
def password_check(password):
    
    s_word = ['$', '@', '#', '%']
    
    val = True
    
    if len(password) < 6:
        print("The password is less then 6")
        val = False
    if len(password) > 20:
        print("the password is greater then the 20")
        
        val = False
    if not any(char.isdigit() for char in password):
        print("Password should have at least one numeral")
        val = False
    if not any(char.isupper() for char in password):
        print('Password should have at least one uppercase letter')
        val = False
          
    if not any(char.islower() for char in password):
        print('Password should have at least one lowercase letter')
        val = False
          
    if not any(char in s_word for char in password):
        print('Password should have at least one of the symbols $@#')
        val = False 
    if val :
        return val

Password is valid


In [14]:
user_input = input("Enter the password")
if (password_check(user_input)):
    print("Password is valid")
else:
    print("Invalid Password !!")

Enter the passwordAleeem@111
Password is valid


In [15]:
# importing re library
import re
  
def main():
    passwd = 'Geek12@'
    reg = "^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*#?&])[A-Za-z\d@$!#%*?&]{6,20}$"
      
    # compiling regex
    pat = re.compile(reg)
      
    # searching regex                 
    mat = re.search(pat, passwd)
      
    # validating conditions
    if mat:
        print("Password is valid.")
    else:
        print("Password invalid !!")
  
# Driver Code     
if __name__ == '__main__':
    main()

Password is valid.
