In [2]:
# Import Regular Expression
import re

#### **RegEx Functions**

**`findall()`**- returns a list containing all matches

In [2]:
text = "returing all containing all matchings"
reg = re.findall("ing", text)
print(reg)

['ing', 'ing', 'ing']


In [3]:
text = "returing all containing all matchings"
reg = re.findall("ain", text)
print(reg)

['ain']


In [5]:
text = "returing all the containing of all matchings"
reg = re.findall("dev", text)
print(reg)

[]


**`search()`**- returns a match object if there is a match. If there is more than one match, only the first occurrence of the match will be returned.

In [15]:
text = "returing all the containing of all matchings"
reg = re.search("ing", text)
print(f"The first occurrence of 'ing' in position {reg.start()}-{reg.end()}")

The first occurrence of 'ing' in position 5-8


In [17]:
text = "returing all the containing of all matchings"
reg = re.search("aap", text)
print(reg)

None


**`split()`**- returns a list where the string has been split at each match

In [19]:
text = "returing all the containing of all matchings"
reg = re.split("\s", text)      # ["\s"- white space] Split at each white-space character
print(reg)

['returing', 'all', 'the', 'cotaining', 'of', 'all', 'matchings']


In [22]:
# We can also control the number of occurrences by specifying the `maxsplit` parameter
text = "returing all the containing of all matchings"
reg = re.split("\s", text, 1)      # Here, 1 = maxsplit
print(reg)

['returing', 'all the cotaining of all matchings']


**`sub()`**- replaces the matches with the text of your choice

In [24]:
text = "returing all the containing of all matchings"
reg = re.sub("\s", "-", text)       # Replacing all the white-space character with the "-"(desh)
print(reg)

returing-all-the-cotaining-of-all-matchings


#### **RegEx Special Sequences**
*A special sequence is a `\` followed by one of the characters(english alphabet), has a special meaning.*

In [6]:
# `\A` - Returns a match if the specified characters are at the beginning of the string. ("\Axxx")
text = "returing all the containing of all matchings"
reg = re.sub("\Aret", "_", text)
print(reg)

_uring all the containing of all matchings


In [8]:
# `\b` - Returns a match where the specified characters are at the beginning or at the end of a word. (r"\bxxx" or r"xxx\b")
# 'r' in the beginning makes sure that the string is being trated as araw string.
text = "returing all the containing of all matchings"
reg = re.sub(r"\bret", "_", text)       # Beginning of the string
print(reg)

_uring all the containing of all matchings


In [10]:
text = "returing all the containing of all matchings"
reg = re.sub(r"ngs\b", "_", text)       # At the end of the string
print(reg)

returing all the containing of all matchi_


In [12]:
# '\B' - Returns a match where the specified characters are present, but not at he beginning or at the end of a word. (r"\Bxxx" or r"xxx\B").
text = "returing all the containing of all matchings"
reg = re.sub(r"\Bing", "_", text)       # At the end of the string
print(reg)

retur_ all the contain_ of all match_s


In [14]:
text = "returing all the containing of all matchings"
reg = re.sub(r"ing\B", "_", text)       # At the end of the string
print(reg)

returing all the containing of all match_s


In [15]:
# '\d' - Returns a match where the string contains digits (numbers from 0-9) ()"\d"
text = "Excel will change numbers like 0784367998 to 784367998."
reg = re.sub("\d", "_", text)       # At the end of the string
print(reg)

Excel will change numbers like __________ to _________.


In [16]:
# '\D' - Returns a match where the string doesnot contain digits ("\D")
text = "Excel will change numbers like 0784367998 to 784367998."
reg = re.sub("\D", "_", text)       # At the end of the string
print(reg)

_______________________________0784367998____784367998_


In [17]:
# '\s' - Return a match where the string contain a white space character ("\s")
text = "Excel will change numbers like 0784367998 to 784367998."
reg = re.sub("\s", "_", text)       # At the end of the string
print(reg)

Excel_will_change_numbers_like_0784367998_to_784367998.


In [18]:
# '\S' - Return a match where the string doesnot contain a white space character ("\S")
text = "Excel will change numbers like 0784367998 to 784367998."
reg = re.sub("\S", "_", text)       # At the end of the string
print(reg)

_____ ____ ______ _______ ____ __________ __ __________


In [19]:
# '\w' - Return a match where the string contains any word characters (characters from a-z, digits from 0-9, and the underscore _ character). ("\w")
text = "Excel will change numbers like 0784367998 to 784367998."
reg = re.sub("\w", "_", text)       # At the end of the string
print(reg)

_____ ____ ______ _______ ____ __________ __ _________.


In [20]:
# '\W' - Return a match where the string doesnot contain any word characters. ("\W")
text = "Excel will change numbers like 0784367998 to 784367998."
reg = re.sub("\W", "_", text)       # At the end of the string
print(reg)

Excel_will_change_numbers_like_0784367998_to_784367998_


In [22]:
# '\Z' - Returns a match if the specified characters are at the end of the string. ("xxx\Z")
text = "Excel will change numbers like 0784367998 to 784367998."
reg = re.sub("998.\Z", "_", text)       # At the end of the string
print(reg)

Excel will change numbers like 0784367998 to 784367_


#### **RegEx Metacharacters**
###### **Metacharacters are characters with a special meaning.**

In [37]:
# [] - A set of characters "[a-m]"
text = "returing all the containing of all matchings"
reg = re.sub("[r-z]", "_", text)    # Follows the alphabetic order
print(reg)

_e___ing all _he con_aining of all ma_ching_


In [38]:
# [] - A set of characters "[7-9]"
text = "Excel will change numbers like 078436 7998 to 784367998."
reg = re.sub("[7-9]", "_", text)    # Follows the alphabetic order
print(reg)

Excel will change numbers like 0__436 ____ to __436____.


In [90]:
# [] - A set of characters "[+=-_,:;'"./!@#$%&*()^{}[]`~\|]"
text = "Excel will change a-lot-of numbers like 0784367998 to 784367998[], where meta_characters includes: '(+a=b-c_d)z\"e(){f*g&h}{}^i%j$k#l@m[!n~o`p,q.r]\/s?t;u:v'w/x|y."
reg = re.sub("[+=,-.;:`~'\"(){}\[\]*&^%$#@!?/|_]", " ", text)
print(reg)

Excel will change a lot of numbers like 0784367998 to 784367998    where meta characters includes     a b c d z e   f g h    i j k l m  n o p q r \ s t u v w x y 
