# Regular Expressions
RegEx (Regular Expression) is a sequence of characters which describes the textual patterns.
In Python, re module provides a set of methods to perform RegEx operations.

In [2]:
text = 'My primary phone number is 9999988888'

In [3]:
import re

In [4]:
mobile_no_regex = re.compile(r'\d\d\d\d\d\d\d\d\d\d')

In [5]:
type(mobile_no_regex)

re.Pattern

### Search
- search() will return None if the regex pattern is not found.
- if pattern is found then it returns Match object.

In [6]:
mobile_number = mobile_no_regex.search(text)

In [7]:
type(mobile_number)

re.Match

In [8]:
mobile_number.group()

'9999988888'

In [9]:
mobile_number.span()

(27, 37)

In [10]:
mobile_number.start()

27

In [11]:
mobile_number.end()

37

In [12]:
text = 'My primary phone number is 9999988888'

mobile_no_regex = re.compile(r'\d\d\d\d\d\d\d\d\d\d')         # Create a regular expression
mobile_number = mobile_no_regex.search(text)                  # Apply the search method on text
mobile_number.group()

'9999988888'

In [12]:
# Practice
text = 'My roll number is 1001'
roll_no_regex = re.compile(r'\d\d\d\d')
roll_no = roll_no_regex.search(text)
roll_no.group()

'1001'

In [15]:
text = 'My roll number is 1001, his roll number is 1002, her roll number is 1003'
roll_no_regex = re.compile(r'\d\d\d\d')
roll_no = roll_no_regex.search(text)                   # The search function only extracts the first match
roll_no.group()            

'1001'

### Grouping

In [18]:
text2 = 'The date is 18-05-2023'                 
date_regex = re.compile(r'\d\d-\d\d-\d\d\d\d')          # without grouping
date = date_regex.search(text2)
date.group()

'18-05-2023'

In [19]:
text2 = 'The date is 18-05-2023'                 
date_regex = re.compile(r'(\d\d)-(\d\d)-(\d\d\d\d)')    # with grouping
date = date_regex.search(text2)
date.group()

'18-05-2023'

In [20]:
date.group(0)

'18-05-2023'

In [21]:
date.group(1)

'18'

In [22]:
date.group(2)

'05'

In [23]:
date.group(3)

'2023'

In [24]:
# Extracting only year
text2 = 'The date is 18-05-2023'                 
date_regex = re.compile(r'\d\d-\d\d-(\d\d\d\d)')    # with grouping
date = date_regex.search(text2)
date.group()

'18-05-2023'

In [21]:
date.group(1)

'2023'

In [25]:
# practice
text3 = 'My telephone number is 0141-245678'
phone_regex = re.compile(r'(\d\d\d\d)-\d\d\d\d\d\d')
phone_no = phone_regex.search(text3)
print('STD Code : ', phone_no.group(1))
print('Number   : ', phone_no.group())

STD Code :  0141
Number   :  0141-245678


In [27]:
#Search can be used like this also
text4 = 'Good morning, we are going to learn regular expressions today, 123'
match1 = re.search(r'\d\d\d', text4)
print(match1.group())
match2 = re.search('regular', text4)
print(match2.group())

123
regular


In [28]:
text = 'My primary phone number is 9999988888'
phone_no = re.search(r'\d\d\d\d\d\d\d\d\d\d', text)
phone_no.group()

'9999988888'

## findall

In [29]:
text5 = 'My primary phone number is 9999999999. My alternative number is 8888888888'
mobile_number_regex = re.compile(r'\d\d\d\d\d\d\d\d\d\d')
mobile_numbers = mobile_number_regex.findall(text5)
mobile_numbers

['9999999999', '8888888888']

In [30]:
text5 = 'My primary phone number is 9999999999.'
mobile_number_regex = re.compile(r'\d\d\d\d\d\d\d\d\d\d')
mobile_numbers = mobile_number_regex.findall(text5)
mobile_numbers

['9999999999']

In [31]:
text5 = 'My primary phone number is 99999......'
mobile_number_regex = re.compile(r'\d\d\d\d\d\d\d\d\d\d')
mobile_numbers = mobile_number_regex.findall(text5)
mobile_numbers

[]

In [33]:
text6 = """Interested in Jio? Talk to us on	1860-893-3333
For recharge plans, data balance, validity, recharge confirmation & offers	1991
For Queries	199
For Complaints	198
For Other numbers	1800-889-9999
Tele-verification to activate both HD voice & data services	1977
Tele-verification to activate data services only	1800-890-1977
For support on International Roaming
(accessible only when roaming abroad)	+917018899999 (charges applicable)
Device Care Helpline (JioPhone, LYF Mobile & JioFi)

Our experts are available for your assistance on all Days, from 9am to 9pm

Helpline	1800-890-9999
Jio Enterprise Mobility & Business Solutions

Our experts are available for your assistance 24x7 (Monday - Sunday)

Enterprise Mobility Services	1800-889-9333
Enterprise Connectivity Services & Business Solutions	1800-889-9444
New Business Connection	1800-889-9555
Care Helpline for JioFiber Customers

Our experts are available for your assistance 24X7 (Monday – Sunday)

Helpline	1800-896-9999
Online shopping

For any online shopping related assistance, reach out to our experts between 9 am to 9 pm (Monday – Sunday)

Helpline	1800-893-3399"""

In [38]:
phone_no_regex = re.compile(r'\d\d\d\d-\d\d\d-\d\d\d\d')
phone_no = phone_no_regex.findall(text6)
phone_no

['1860-893-3333',
 '1800-889-9999',
 '1800-890-1977',
 '1800-890-9999',
 '1800-889-9333',
 '1800-889-9444',
 '1800-889-9555',
 '1800-896-9999',
 '1800-893-3399']

In [35]:
number_regex = re.compile(r'(\d\d\d\d)-(\d\d\d)-(\d\d\d\d)') # with grouping
numbers = number_regex.findall(text6)
numbers

[('1860', '893', '3333'),
 ('1800', '889', '9999'),
 ('1800', '890', '1977'),
 ('1800', '890', '9999'),
 ('1800', '889', '9333'),
 ('1800', '889', '9444'),
 ('1800', '889', '9555'),
 ('1800', '896', '9999'),
 ('1800', '893', '3399')]

In [36]:
number_regex = re.compile(r'\d\d\d\d-\d\d\d-(\d\d\d\d)') # with grouping
numbers = number_regex.findall(text6)
numbers

['3333', '9999', '1977', '9999', '9333', '9444', '9555', '9999', '3399']

In [39]:
pattern = re.compile(r'man|woman')
result = pattern.findall('this is a woman, this a man, this is a woman, this a man')
result

['woman', 'man', 'woman', 'man']

In [40]:
text8 = """Jio Mobility users can click Here to open the support channel on WhatsApp. Alternatively, you can send "Hi" to (+91)7000770007 to get started.
 
JioFiber users can click Here to open the support channel on WhatsApp. Alternatively, you can send "Hi" to (+91)7000570005 to get started,7000770008,7000570008 """

In [43]:
num_pattern = re.compile(r'\(\+\d\d\)\d\d\d\d\d\d\d\d\d\d|\d\d\d\d\d\d\d\d\d\d')
num_pattern.findall(text8)

['(+91)7000770007', '(+91)7000570005', '7000770008', '7000570008']

# Identifiers
<table ><tr><th>Character</th><th>Description</th><th>Example Pattern Code</th><th >Exammple Match</th></tr>

<tr ><td><span >\d</span></td><td>A digit</td><td>file_\d\d</td><td>file_25</td></tr>

<tr ><td><span >\w</span></td><td>Alphanumeric</td><td>\w-\w\w\w</td><td>A-b_1</td></tr>



<tr ><td><span >\s</span></td><td>White space</td><td>a\sb\sc</td><td>a b c</td></tr>



<tr ><td><span >\D</span></td><td>A non digit</td><td>\D\D\D</td><td>ABC</td></tr>

<tr ><td><span >\W</span></td><td>Non-alphanumeric</td><td>\W\W\W\W\W</td><td>*-+=)</td></tr>

<tr ><td><span >\S</span></td><td>Non-whitespace</td><td>\S\S\S\S</td><td>Yoyo</td></tr></table>

# Quantifiers
<table ><tr><th>Character</th><th>Description</th><th>Example Pattern Code</th><th >Exammple Match</th></tr>

<tr ><td><span >+</span></td><td>Occurs one or more times</td><td>	Version \w-\w+</td><td>Version A-b1_1</td></tr>

<tr ><td><span >{3}</span></td><td>Occurs exactly 3 times</td><td>\D{3}</td><td>abc</td></tr>



<tr ><td><span >{2,4}</span></td><td>Occurs 2 to 4 times</td><td>\d{2,4}</td><td>123</td></tr>



<tr ><td><span >{3,}</span></td><td>Occurs 3 or more</td><td>\w{3,}</td><td>anycharacters</td></tr>

<tr ><td><span >*</span></td><td>Occurs zero or more times</td><td>A*B*C*</td><td>AAACC</td></tr>

<tr ><td><span >?</span></td><td>Once or none</td><td>plurals?</td><td>plural</td></tr></table>

In [44]:
text = 'my mobile number is 9999999999'
mobile_no_regex = re.compile(r'\d{10}')
mobile_no = mobile_no_regex.findall(text)
mobile_no

['9999999999']

In [46]:
num_pattern = re.compile(r'\(\+\d{2}\)\d{10}|\d{10}')
num_pattern.findall(text8)

['(+91)7000770007', '(+91)7000570005', '7000770008', '7000570008']

In [48]:
re.findall(r'[abc]+','aabb ggdhdh abbabba baaba caa xyz')

['aabb', 'abbabba', 'baaba', 'caa']

In [52]:
text9 = 'my email is aaaa_4567bb@gmail.com'
email_regex = re.compile(r'[\w_]+@gmail.com')
email_regex.findall(text9)

['aaaa_4567bb@gmail.com']

In [55]:
text9 = 'my email is abcd_4567bb@gmail.com'
email_regex = re.compile(r'[A-Za-z0-9\S]+gmail.com')
email_regex.findall(text9)

['abcd_4567bb@gmail.com']

In [63]:
text10 = '8GB RAM, 128GB ROM, SD805 Processor'
ram_regex = re.compile(r'[A-Za-z0-9\s]+RAM')
ram_regex.findall(text10)

['8GB RAM']

In [62]:
text10 = '8GB RAM, 128GB ROM, SD805 Processor'
ram_regex = re.compile(r'([A-Za-z0-9\s]+)RAM')
ram = ram_regex.findall(text10)[0].strip()
ram

'8GB'

In [78]:
text10 = '8GB RAM, 128GB ROM, SD805 Processor'
ram_regex = re.compile(r'[A-Za-z0-9\s]+RAM')
ram = ram_regex.findall(text10)[0]
ram_size_regex = re.compile(r'(\d+)GB')
ram_size = float(ram_size_regex.findall(ram)[0])
ram_size

8.0

In [73]:
text10 = '16GB RAM, 128GB SSD, 500GB HDD, ryzen 7000 Processor'
ssd_regex = re.compile(r'[A-Za-z0-9\s]+SSD')
ssd = ssd_regex.findall(text10)[0]
print(ssd)
ssd_size_regex = re.compile(r'(\d+)GB')
ssd_size = float(ssd_size_regex.findall(ssd)[0])
ssd_size

 128GB SSD


128.0