### finditer

In [17]:
import re

test_string = r"123abc456789abc123ABC"

pattern = re.compile(r"abc")
matches = pattern.finditer(test_string)

for match in matches:
    print(match)
    print(match.span(), match.start(), match.end(), match.group())

<re.Match object; span=(3, 6), match='abc'>
(3, 6) 3 6 abc
<re.Match object; span=(12, 15), match='abc'>
(12, 15) 12 15 abc


### Findall

In [7]:
import re

test_string = r"123abc456789abc123ABC"

pattern = re.compile(r"abc")
matches = pattern.findall(test_string)

for match in matches:
    print(match)

abc
abc


### Match

In [15]:
### Gets pattern if it is present in the beginning
import re

test_string = r"123abc456789abc123ABC"

pattern = re.compile(r"abc")
match = pattern.match(test_string)

print(match)

None


### Search

In [20]:
### Returns First Match
import re

test_string = r"123abc456789abc123ABC"

pattern = re.compile(r"abc")
match = pattern.search(test_string)

print(match)

<re.Match object; span=(3, 6), match='abc'>


### Meta Character

In [25]:
import re

test_string = r"123abc456789abc123ABC"

print([i for i in re.finditer(r"^abc",test_string)])  # search at beginning
print([i for i in re.finditer(r"ABC$",test_string)])  # search at end


[]
[<re.Match object; span=(18, 21), match='ABC'>]


### Special Characters

In [37]:
import re

test_string = r"helLo 123_"

print([i.group() for i in re.finditer(r"[elo]",test_string)])  # Find charcter in set(e,l,o)
print([i.group() for i in re.finditer(r"[12]",test_string)])  # Find charcter in set(1,2)
print([i.group() for i in re.finditer(r"[a-z]",test_string)])  # Find charcter in lower case
print([i.group() for i in re.finditer(r"[A-Z]",test_string)])  # Find charcter in upper case 
print([i.group() for i in re.finditer(r"[a-zA-Z0-9]",test_string)])  # Find charcter in lower and upper case and numbers 
print([i.group() for i in re.finditer(r"[0-9]",test_string)])  # Find charcter in 0-9 


['e', 'l', 'o']
['1', '2']
['h', 'e', 'l', 'o']
['L']
['h', 'e', 'l', 'L', 'o', '1', '2', '3']
['1', '2', '3']


### Sets

In [None]:
import re

test_string = r"hello 123_ heyho hohey"

print([i.group() for i in re.finditer(r"\d",test_string)])  # Find Digits 0-9
print([i.group() for i in re.finditer(r"\D",test_string)])  # Find Non Digits 
print([i.group() for i in re.finditer(r"\s",test_string)])  # Find space characters 
print([i.group() for i in re.finditer(r"\S",test_string)])  # Find Non space characters
print([i.group() for i in re.finditer(r"\w",test_string)])  # Find Alpha Numeric characters
print([i.group() for i in re.finditer(r"\W",test_string)])  # Find Non Alpha Numeric (Spaces) characters
print([i.group() for i in re.finditer(r"\bho",test_string)])  # Find pattern in start of block characters
print([i.group() for i in re.finditer(r"\Bhey",test_string)])  # Find pattern in end of block characters


### Qualifier

In [38]:
import re

test_string = r"hello_123"

print([i.group() for i in re.finditer(r"\d*",test_string)])  # Find get 0 or many instances
print([i.group() for i in re.finditer(r"\d+",test_string)])  # Find get 1 or many instances 
print([i.group() for i in re.finditer(r"_?\d+",test_string)])  # Find ? 0 or 1
print([i.group() for i in re.finditer(r"\d{3}",test_string)])  # Find no of instaces


['', '', '', '', '', '', '123', '']
['123']
['_123']
['123']


### Practice 

In [46]:
import re

dates = """
helo
01.04.2020
13.05.2020
01-04-2020
19-11-2024
hi
09-01-2021
hode
2009-01-21
2019-12-31
2029/12/07
lop
2019/10/25
"""

print([i.group() for i in re.finditer(r"\d{2}\.\d{2}\.\d{4}",dates)])
print([i.group() for i in re.finditer(r"\d{2}-\d{2}-\d{4}",dates)])
print([i.group() for i in re.finditer(r"\d{4}-\d{2}-\d{2}",dates)])
print([i.group() for i in re.finditer(r"\d{4}/\d{2}/\d{2}",dates)])


['01.04.2020', '13.05.2020']
['01-04-2020', '19-11-2024', '09-01-2021']
['2009-01-21', '2019-12-31']
['2029/12/07', '2019/10/25']


### Grouping

In [16]:
import re

dates = """
Mr Saud
hello
120kcxe02
Mrs Saud
Mr. Bilawal
Ms Ahmed
12-23-2020
Mr. Umer
abdullahalijahangir1234@gmail.com
abdullah12ali34_@tags.org
Bitf19a015@pucit.pk
"""

print([i.group() for i in re.finditer(r"(Mr|Mrs|Ms)\.?\s\w+",dates)])
print([i.group() for i in re.finditer(r"([a-zA-Z0-9_-])+@([a-zA-Z0-9_-]+)\.+([a-zA-Z0-9_-]+)",dates)])


['Mr Saud', 'Mrs Saud', 'Mr. Bilawal', 'Ms Ahmed', 'Mr. Umer']
['abdullahalijahangir1234@gmail.com', 'abdullah12ali34_@tags.org', 'Bitf19a015@pucit.pk']
