**EXAMPLE 1: Simple Regular Expressions**

In [1]:
import re

In [2]:
# To search for datascience, we type /datascience/. The expression r'datascience' matches any string containing the
# substring datascience; grep with that expression would return the line 'I really want to be study datascience'.
# The search string can consist of a single character (like /!/) or a sequence of characters (like /ugly/)

s1 = "I really want to be study datascience"
s2 = "The virus was exceptionally ugly"
s3 =  "Computers are amazing!"

In [3]:

matching =re.search(r'datascience', s1)
print(matching.group())
matching =re.search(r'ugly', s2)
print(matching.group())
matching =re.search(r'!', s3)
print(matching.group())

datascience
ugly
!


**EXAMPLE 2: Disjunction of Characters**

In [4]:
#Case-sensitive and character disjunctions

s1 = "Datascience and datascience"
s2 = "The dog is on the sand"
s3 = "The 10% of students like programming"
matching =re.search(r'[dD]atascience', s1)
print(matching.group())
matching =re.search(r'[abc]', s2)
print(matching.group())
matching =re.search(r'[0123456789]', s3)
print(matching.group())

Datascience
a
1


**EXAMPLE 3: Range of Characters**

In [5]:
# r'[A-Z]' an upper case letter “I enrolled in the NLP course”
# r'[a-z]' a lower case letter “Learning about language is great”
# r'[0-9]' a single digit “Only 12 lessons of the NLP”
s1 = "enrolled in the NLP course"
s2 = "learning about language is great"
s3 = "only 12 lessons of the NLP"

matching=re.search(r'[A-Z]',s1)
print(matching.group())
matching=re.search(r'[a-z]',s2)
print(matching.group())
matching=re.search(r'[0-9]',s3)
print(matching.group())

N
l
1


In [19]:
#r'[ˆA-Z]' not an upper case letter “Enrolled In tHe NLP course”
#r'[ˆSs]' neither ‘S’ nor ‘s’ “Spring is sweet and safe”
#r'[ˆ.]' not a period “our professor Rosso”
#r'[eˆ]' either ‘e’ or ‘ˆ’ “Look up ˆ now ”
#r'aˆb' the pattern ‘aˆb’ “look up aˆb now”

s1 = "Enrolled In tHe NLP course "
s2 = "Spring is sweet and safe"
s3 = ".our professor Rosso."
s4 = "Look up ^ now "
s5 = "Look up a^b now"

matching=re.search(r'[^A-Z]',s1)
print(matching.group())
matching=re.search(r'[^Ss]',s2)
print(matching.group())
matching=re.search(r'[^.]',s3)
print(matching.group())
matching=re.search(r'[e^]',s4)
print(matching.group())
matching=re.search(r'a\^b',s5)
print(matching.group())

n
p
o
^
a^b


**EXAMPLE 4: Using Perido (.) and ?**

In [7]:
s1 = "The progamers are bussy"
s2 = "True colours in the sky"
s3 = "The show begin at 8pm"
matching=re.search(r'progamm?ers',s1)
print(matching.group())
matching=re.search(r'colou?rs',s2)
print(matching.group())
matching=re.search(r'beg.n',s3)
print(matching.group())

progamers
colours
begin


**EXAMPLE 5: Using Kleene (*) and Kleene+**

In [30]:
s1 = "baa! baaaa! baaaa! ... "
s2 = "aa aaaaa aaaaaaa ... "
s3 = "ab aab baab, babba ... "
s4 = "0 01 12 359 99999 ... "

matching=re.search(r'baa*!',s1)
print(matching.group())
matching=re.search(r'aa*',s2)
print(matching.group())
matching=re.search(r'[ab]*',s3)
print(matching.group())
matching=re.search(r'[0-9]+',s4)
print(matching.group())

baa!
aa
ab
0


**EXAMPLE 6: Anchors**

In [21]:
s1 = "Python is easy"
s2 = "Programming is useful."
s3 = "Python is useful."
s4 = "Students have the Test"
s5 = "Students have another Test"

matching = re.search(r'^Python is',s1)
print(matching.group())
matching = re.search(r'useful\.$',s2)
print(matching.group())
matching = re.search(r'^Python is useful\.$', s3)
print(matching.group())
matching = re.search(r'\bthe\b',s4)
print(matching.group())
matching = re.search(r'\bthe\b', s5)
if matching:
  print(matching.group())
else:
  print("No match found")
matching = re.search(r'\Bthe\B', s5)
print(matching.group())

Python is
useful.
Python is useful.
the
No match found
the


**EXAMPLE 7: Groups**

In [None]:
matching=re.search(r'cat|dog',"the cat is on the table")
print(matching.group())
matching=re.search(r'cat|dog',"the dog is on the table")
print(matching.group())

matching=re.search(r'(cat|dog) is (black|white)',"the dog is black")
print(matching.group())
matching=re.search(r'(cat|dog) is (black|white)',"the cat is white")
print(matching.group())

In [12]:
pattern = "(\w+)@((\w+\.)+(com|org|net|edu|es))"
matching = re.search(pattern,"student@upvnet.upv.es")
total_matches = len(matching.groups())
print("The number of matched groups is:", total_matches)

print("group 1",matching.group(1))
print("group 2", matching.group(2))
print("group 3",matching.group(3))
print("group 4",matching.group(4))

The number of matched groups is: 4
group 1 student
group 2 upvnet.upv.es
group 3 upv.
group 4 es


**EXAMPLE 8: Named Groups**

In [13]:
#NAMED GROUPS 1

pattern = "(?P<user>\w+)@(?P<fulldomain>(?P<middledomain>\w+\.)+(?P<lastdomain>com|org|net|edu|es))"
matching = re.search(pattern,"student@upvnet.upv.es")
total_matches = len(matching.groups())
print("The number of matched groups is:", total_matches)

print("group user",matching.group("user"))
print("group fulldomain", matching.group("fulldomain"))
print("group middledomain",matching.group("middledomain"))
print("group lastdomain",matching.group("lastdomain"))

The number of matched groups is: 4
group user student
group fulldomain upvnet.upv.es
group middledomain upv.
group lastdomain es


**EXAMPLE 9: Non-Capturing Groups**

In [23]:
#NAMED GROUPS 2

pattern = "(?P<user>\w+)@(?P<fulldomain>(?:\w+\.)+(?P<lastdomain>com|org|net|edu|es))"
matching = re.search(pattern,"student@upvnet.upv.es")
total_matches = len(matching.groups())
print("The number of matched groups is:", total_matches)

print("group user",matching.group("user"))
print("group fulldomain", matching.group("fulldomain"))
#print("group middledomain",matching.group("middledomain"))
print("group lastdomain",matching.group("lastdomain"))

The number of matched groups is: 3
group user student
group fulldomain upvnet.upv.es
group lastdomain es


**EXAMPLE 10: Back References**

In [27]:
#Back reference using named groups
pattern= r"(?P<word>\w+)\s+(?P=word)"
matching = re.search(pattern,"hello  hello")
print(matching.group("word"))

#Back reference using unnamed groups
pattern = r"(\w+)\s+(\1)"
matching = re.search(pattern,"hello  hello")
print(matching.group(1))

hello
hello


**EXAMPLE 11: Methods 'search' and 'match'**

In [16]:
pattern= r"(?P<word>\w+)\s+(?P=word)"
matching = re.search(pattern,"Only saying hello  hello")
print(matching)

matching = re.match(pattern,"Only saying hello  hello")
print(matching)

<re.Match object; span=(12, 24), match='hello  hello'>
None


**EXAMPLE 12: Methods and Flags in Compiled Regular Expressions**

In [17]:
text = """Hello World!
This is a test.
HELLO again."""

# Compile a regex pattern with multiple flags: IGNORECASE and MULTILINE
pattern = re.compile(r"^hello", re.IGNORECASE | re.MULTILINE)

# Checks if the pattern matches the beginning of the string
match_result = pattern.match(text)

# Finds the first occurrence of the pattern anywhere in the string
search_result = pattern.search(text)

# Finds all substrings matching the pattern and returns them as a list
findall_result = pattern.findall(text)


# Returns an iterator of match objects for all matches in the string
finditer_result = pattern.finditer(text)
for match in finditer_result:
    print(f"finditer(): Found '{match.group()}' at position {match.start()}")

# Using DOTALL flag to match across newlines
dotall_pattern = re.compile(r"Hello.*again", re.DOTALL)
dotall_match = dotall_pattern.search(text)
if dotall_match:
    print(f"DOTALL flag: Found '{dotall_match.group()}' across newlines.")

# Using VERBOSE flag to write readable regular expressions with comments
verbose_pattern = re.compile(r"""
    ^       # start of the line
    hello   # match 'hello'
""", re.IGNORECASE | re.VERBOSE | re.MULTILINE)

verbose_search = verbose_pattern.search(text)
if verbose_search:
    print(f"VERBOSE flag: Found '{verbose_search.group()}' with comments.")

finditer(): Found 'Hello' at position 0
finditer(): Found 'HELLO' at position 29
DOTALL flag: Found 'Hello World!
This is a test.
HELLO again' across newlines.
VERBOSE flag: Found 'Hello' with comments.


**EXAMPLE 13: Modifying Sequence with Regular Expressions**

In [18]:
pattern = re.compile(r",")
result = pattern.split("apple,banana,orange")
print(result)

pattern = re.compile(r"\d+")
result = pattern.sub("number", "hello 123 world 456")
print(result)

pattern = re.compile(r"\d+")
result, count = pattern.subn("number", "hello 123 world 456")
print(result)
print(count)

['apple', 'banana', 'orange']
hello number world number
hello number world number
2
