A RegEx, or Regular Expression, is a sequence of characters that forms a search pattern.

RegEx can be used to check if a string contains the specified search pattern.

# [ ]	A set of characters	"[a-m]"

In [1]:
import re

txt = "The rain in Spain"

#Find all lower case characters alphabetically between "a" and "m":

x = re.findall("[a-m]", txt)
print(x)


['h', 'e', 'a', 'i', 'i', 'a', 'i']


# \	Signals a special sequence (can also be used to escape special characters)	"\d"

In [2]:
import re

txt = "That will be 59 dollars"

#Find all digit characters:

x = re.findall("\d", txt)
print(x)


['5', '9']


# .	Any character (except newline character)

In [3]:
import re

txt = "hello world"

#Search for a sequence that starts with "he", followed by two (any) characters, and an "o":

x = re.findall("he..o", txt)
print(x)


['hello']


# ^	Starts with	"^hello"

In [6]:
import re

txt = "hello world"

#Check if the string starts with 'hello':

x = re.findall("^hello", txt)
print (x)
if x:
  print("Yes, the string starts with 'hello'")
else:
  print("No match")

['hello']
Yes, the string starts with 'hello'


# $	Ends with	"world$"

In [7]:
import re

txt = "hello world"

#Check if the string ends with 'world':

x = re.findall("world$", txt)
if x:
  print("Yes, the string ends with 'world'")
else:
  print("No match")


Yes, the string ends with 'world'


# *	Zero or more occurrences	"aix*"

In [10]:
import re

txt = "The rain in Spain falls mainly in the plain!"

#Check if the string contains "ai" followed by 0 or more "x" characters:

x = re.findall("aix*", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['ai', 'ai', 'ai', 'ai']
Yes, there is at least one match!


# +	One or more occurrences	"aix+"

In [11]:
import re

txt = "The rain in Spain falls mainly in the plain!"

#Check if the string contains "ai" followed by 1 or more "x" characters:

x = re.findall("aix+", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")


[]
No match


# {}	Exactly the specified number of occurrences	"al{2}"	


In [12]:
import re

txt = "The rain in Spain falls mainly in the plain!"

#Check if the string contains "a" followed by exactly two "l" characters:

x = re.findall("al{2}", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['all']
Yes, there is at least one match!


# |	Either or	"falls|stays"

In [13]:
import re

txt = "The rain in Spain falls mainly in the plain!"

#Check if the string contains either "falls" or "stays":

x = re.findall("falls|stays", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['falls']
Yes, there is at least one match!


# \A	Returns a match if the specified characters are at the beginning of the string	"\AThe"

In [15]:
import re

txt = "the rain in Spain"

#Check if the string starts with "The":

x = re.findall("\AThe", txt)

print(x)

if x:
  print("Yes, there is a match!")
else:
  print("No match")


[]
No match


# \b	Returns a match where the specified characters are at the beginning or at the end of a word
(the "r" in the beginning is making sure that the string is being treated as a "raw string")	r"\bain"
r"ain\b"

In [20]:
import re

txt = "The rain in Spain"

#Check if "ain" is present at the end of a WORD:

x = re.findall(r"ain\b", txt)
print(x)

if x:
  print("Yes, there is at least one match at end!")
else:
  print("No match")

y= re.findall(r"\bain", txt)
print(y)


if y:
  print("Yes, there is at least one match at beginning!")
else:
  print("No match at beginning")

['ain', 'ain']
Yes, there is at least one match at end!
[]
No match at beginning


# \B	Returns a match where the specified characters are present, but NOT at the beginning (or at the end) of a word
(the "r" in the beginning is making sure that the string is being treated as a "raw string")	r"\Bain"
r"ain\B

In [21]:
import re

txt = "The rain in Spain"

#Check if "ain" is present, but NOT at the beginning of a word:

x = re.findall(r"\Bain", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")


['ain', 'ain']
Yes, there is at least one match!


# Capitial denotes teh negation of the small letter representation

# \d	Returns a match where the string contains digits (numbers from 0-9)	"\d"

In [24]:
import re

txt = "The rain 45 times in Spain"

#Check if the string contains any digits (numbers from 0-9):

x = re.findall("\d", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")


['4', '5']
Yes, there is at least one match!


# \s	Returns a match where the string contains a white space character	"\s"

In [25]:
import re

txt = "The rain in Spain"

#Return a match at every white-space character:

x = re.findall("\s", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")


[' ', ' ', ' ']
Yes, there is at least one match!


# \w	Returns a match where the string contains any word characters (characters from a to Z, digits from 0-9, and the underscore _ character)	

In [26]:
import re

txt = "The rain in Spain"

#Return a match at every word character (characters from a to Z, digits from 0-9, and the underscore _ character):

x = re.findall("\w", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['T', 'h', 'e', 'r', 'a', 'i', 'n', 'i', 'n', 'S', 'p', 'a', 'i', 'n']
Yes, there is at least one match!


# \Z	Returns a match if the specified characters are at the end of the string	"Spain\Z"

In [27]:
import re

txt = "The rain in Spain"

#Check if the string ends with "Spain":

x = re.findall("Spain\Z", txt)

print(x)

if x:
  print("Yes, there is a match!")
else:
  print("No match")

['Spain']
Yes, there is a match!


# [arn]	Returns a match where one of the specified characters (a, r, or n) are present

In [28]:
import re

txt = "The rain in Spain"

#Check if the string has any a, r, or n characters:

x = re.findall("[arn]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['r', 'a', 'n', 'n', 'a', 'n']
Yes, there is at least one match!


# [a-n]	Returns a match for any lower case character, alphabetically between a and n

In [29]:
import re

txt = "The rain in Spain"

#Check if the string has any characters between a and n:

x = re.findall("[a-n]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")


['h', 'e', 'a', 'i', 'n', 'i', 'n', 'a', 'i', 'n']
Yes, there is at least one match!


# [^arn]	Returns a match for any character EXCEPT a, r, and n	


In [30]:
import re

txt = "The rain in Spain"

#Check if the string has other characters than a, r, or n:

x = re.findall("[^arn]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")


['T', 'h', 'e', ' ', 'i', ' ', 'i', ' ', 'S', 'p', 'i']
Yes, there is at least one match!


# [0123]	Returns a match where any of the specified digits (0, 1, 2, or 3) are present

In [31]:
import re

txt = "The rain in Spain"

#Check if the string has any 0, 1, 2, or 3 digits:

x = re.findall("[0123]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")


[]
No match


# [0-9]	Returns a match for any digit between 0 and 9

In [32]:
import re

txt = "8 times before 11:45 AM"

#Check if the string has any digits:

x = re.findall("[0-9]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['8', '1', '1', '4', '5']
Yes, there is at least one match!


# [0-5][0-9]	Returns a match for any two-digit numbers from 00 and 59

In [34]:
import re

txt = "8 times before 11:45 AM"

#Check if the string has any two-digit numbers, from 00 to 59:

x = re.findall("[0-5][0-9]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")

['11', '45']
Yes, there is at least one match!


# [a-zA-Z]	Returns a match for any character alphabetically between a and z, lower case OR upper case

In [35]:
import re

txt = "8 times before 11:45 AM"

#Check if the string has any characters from a to z lower case, and A to Z upper case:

x = re.findall("[a-zA-Z]", txt)

print(x)

if x:
  print("Yes, there is at least one match!")
else:
  print("No match")


['t', 'i', 'm', 'e', 's', 'b', 'e', 'f', 'o', 'r', 'e', 'A', 'M']
Yes, there is at least one match!


# The findall() function returns a list containing all matches.

In [36]:
import re

txt = "The rain in Spain"
x = re.findall("ai", txt)
print(x)

['ai', 'ai']


In [37]:
import re

txt = "The rain in Spain"
x = re.findall("Portugal", txt)
print(x)

[]


# The search() function returns only 1st occurence

In [38]:
import re

txt = "The rain in Spain"
x = re.search("\s", txt)

print("The first white-space character is located in position:", x.start())

The first white-space character is located in position: 3


# The split() function returns a list where the string has been split at each match

In [40]:
import re

txt = "The rain in the Spain"
x = re.split("in", txt)
print(x)

['The ra', ' ', ' the Spa', '']


# We can control the number of occurrences by specifying the maxsplit parameter

In [41]:
import re

txt = "The rain in Spain"
x = re.split("\s", txt, 1)
print(x)

['The', 'rain in Spain']


# The sub() function replaces the matches with the text of your choice

In [42]:
import re

txt = "The rain in Spain"
x = re.sub("\s", "9", txt)
print(x)

The9rain9in9Spain


# control the number of replacements by specifying the count parameter

In [43]:
import re

txt = "The rain in Spain"
x = re.sub("\s", "9", txt, 2)
print(x)

The9rain9in Spain


# A Match Object is an object containing information about the search and the result.

In [44]:
import re

txt = "The rain in Spain"
x = re.search("ai", txt)
print(x) #this will print an object

<re.Match object; span=(5, 7), match='ai'>
