In [1]:
import re

# re is a built-in Python module for working with regular expressions (regex). Regular expressions are patterns used to match, search, extract, and manipulate strings.

###  What is re?
re stands for regular expression. It provides powerful tools to:

Search for specific patterns in text

Replace text using patterns

Extract information (e.g., phone numbers, emails)

Validate formats (e.g., date, password)

### Common Uses of re:

Task	Example

Search for a pattern	Check if a string contains a date or keyword

Extract data	Extract all email addresses from a document

Replace parts of text	Replace all whitespace with a dash

Split by pattern	Split text on punctuation or multiple spaces

Validate input format	Check if a password is strong or email is valid

## Most Common re Functions:

Function	Purpose

re.search()	Search for a pattern (returns first match object)

re.findall()	Return all matches as a list

re.sub()	Replace matched patterns with something else

re.match()	Match pattern only at the beginning of the string

re.split()	Split string based on a pattern

re.compile()	Pre-compile a regex pattern for reuse

In [6]:
import re

# Example 1
result = re.search(r'\d+', "My age is 35")  
print(result.group())  # Output: 35

# Example 2
result = re.findall(r'\d+', "My age is 35 and my friend's age is 25")
print(result)  # Output: ['35', '25']



35
['35', '25']


In [9]:
r1=re.search(r'cat', "The cat sat on the mat")             # ➜ Match for 'cat'

# Example 3
r2=re.search(r'\b[A-Z][a-z]+\b', "Welcome John!")          # ➜ Match for 'Welcome'

# Example 4
r3=re.search(r'apple', "banana apple orange")              # ➜ Match for 'apple'

# Example 5
r4=re.search(r'[\w.-]+@[\w.-]+', "Email: user@mail.com")   # 
[r1.group(),r2.group(),r3.group(),r4.group()]

['cat', 'Welcome', 'apple', 'user@mail.com']

In [10]:
r2

<re.Match object; span=(0, 7), match='Welcome'>

In [11]:
# Example 1
r5=re.findall(r'\d+', "Age: 30, Roll: 45, Code: 99")       # ➜ ['30', '45', '99']

# Example 2
r6=re.findall(r'[aeiou]', "education")                     # ➜ ['e', 'u', 'a', 'i', 'o']

# Example 3
r7=re.findall(r'\b\w{4}\b', "This line has many four-letter words")  # ➜ ['This', 'line', 'many']

# Example 4
r8=re.findall(r'[A-Z]\w+', "Mr. John and Mrs. Smith")      # ➜ ['John', 'Smith']

# Example 5
r9=re.findall(r'@\w+', "Follow us @twitter @chatgpt")      # ➜ ['@twitter', '@chatgpt']
[r5,r6,r7,r8,r9]


[['30', '45', '99'],
 ['e', 'u', 'a', 'i', 'o'],
 ['This', 'line', 'many', 'four'],
 ['Mr', 'John', 'Mrs', 'Smith'],
 ['@twitter', '@chatgpt']]

In [7]:
# Example 1
import re

r10=re.sub(r'\d+', ' ', "Roll 23, Age 45")                  # ➜ 'Roll X, Age X'

# Example 2
r11=re.sub(r'\s+', '-', "This   is   text")                 # ➜ 'This-is-text'

# Example 3
r12=re.sub(r'apple', 'banana', "apple pie is good")         # ➜ 'banana pie is good'

# Example 4
r13=re.sub(r'[^\w\s]', '', "Hello! Are you #1?")            # ➜ 'Hello Are you 1'

# Example 5
r14=re.sub(r'(Mr|Mrs|Ms)\.', '', "Mr. John, Ms. Smith")     # ➜ ' John,  Smith'
#[r10,r11,r12,r13,r14]
r14



' John,  Smith'

In [15]:
# Example 1
r15=re.match(r'Hello', "Hello World!")                      # ➜ Match for 'Hello'

# Example 2
r16=re.match(r'\d+', "2023 is the year")                    # ➜ Match for '2023'

# Example 3
r17=re.match(r'[A-Z]\w+', "Title Case sentence")            # ➜ Match for 'Title'

# Example 4
r18=re.match(r'.*', "This is a full line")                  # ➜ Match for entire string

# Example 5
r19=re.match(r'Python', "Python is great")                  # ➜ Match for 'Python'
[r15.group(),r16.group(),r17.group(),r18.group(),r19.group()]

['Hello', '2023', 'Title', 'This is a full line', 'Python']

In [17]:
import re
text = "Hello@# World!! 2025."
cleaned = re.sub(r'[^\w\s]', '', text)
cleaned
# Output: 'Hello World 2025'


'Hello World 2025'

In [19]:
text = "This   is\n a          sentence\twith  irregular spacing."
cleaned = re.sub(r'\s+', ' ', text).strip()
# Output: 'This is a sentence with irregular spacing.'
cleaned

'This is a sentence with irregular spacing.'

In [20]:
text = "<div>Hello <b>World</b></div>"
cleaned = re.sub(r'<.*?>', '', text)
# Output: 'Hello World'
cleaned

'Hello World'

In [22]:
text = "Total amount: Rs 5000 on 12-04-2025"
digits = re.findall(r'\d+', text)
digits

['5000', '12', '04', '2025']

In [27]:
text = "Contact us at test@example.com or support@site.org  abc@t.main dc-text"
cleaned = re.sub(r'\b[\w.-]+@[\w.-]+\.\w+\b', '', text)
cleaned


'Contact us at  or    dc-text'

In [28]:
text = "By Raman Devgan - Updated: 07 Nov 2024"
cleaned = re.sub(r'(By Raman Devgan|Updated: .*?\d{4})', '', text)
cleaned

' - '

In [29]:
text = "Data123 with $% unwanted 456 symbols"
cleaned = re.sub(r'[^a-zA-Z\s]', '', text)
cleaned
# Output: 'Data with  unwanted  symbols'


'Data with  unwanted  symbols'