In [48]:
import re

# Part 1 - re.search, re.findall, re.finditer

In [2]:
text = "The agent's phone number is 988-600-4888. Call soon!"

In [4]:
"phone" in text

True

In [5]:
pattern = "phone"

In [6]:
re.search(pattern, text)

<re.Match object; span=(12, 17), match='phone'>

In [7]:
pattern = "Not in text"

In [8]:
re.search(pattern, text)

In [9]:
pattern = "phone"

In [10]:
match = re.search(pattern, text)

In [12]:
match.span()

(12, 17)

In [13]:
match.start()

12

In [14]:
match.end()

17

In [15]:
text = "my phone once, my phone twice"

In [16]:
match = re.search(pattern, text)

In [17]:
match.span()

(3, 8)

In [18]:
matches = re.findall('phone', text)

In [19]:
matches

['phone', 'phone']

In [20]:
len(matches)

2

In [21]:
matches = re.finditer('phone', text)

In [24]:
matches

<callable_iterator at 0x289ea8d2e20>

In [51]:
for match in re.finditer('phone', text):
    print(match.span())
    print(match.group())

(3, 8)
phone


# Part 2 - re.compile

In [29]:
text = "My phone number is 988-600-4888"

In [30]:
phone = re.search(r'\d\d\d-\d\d\d-\d\d\d\d', text)

In [32]:
phone.span()

(19, 31)

In [33]:
phone.group()

'988-600-4888'

In [35]:
text = "My phone number is 988-600-4888"

In [36]:
phone = re.search(r'\d{3}-\d{3}-\d{4}', text)

In [37]:
phone

<re.Match object; span=(19, 31), match='988-600-4888'>

In [38]:
phone.span()

(19, 31)

In [39]:
phone.group()

'988-600-4888'

In [40]:
phone_pattern = re.compile(r'(\d{3})-(\d{3})-(\d{4})')

In [41]:
results = re.search(phone_pattern, text)

In [42]:
results.group()

'988-600-4888'

In [44]:
results.group(0)

'988-600-4888'

In [45]:
results.group(1)

'988'

In [46]:
results.group(2)

'600'

In [47]:
results.group(3)

'4888'

# Part 3 - Additional Regex Syntax 

In [73]:
cat = re.search('cat|dog', 'The dog is here cat') # The pipe operator suggests 'or' here.

In [74]:
cat

<re.Match object; span=(4, 7), match='dog'>

In [75]:
cat.span()

(4, 7)

In [76]:
cat.group()

'dog'

In [77]:
re.findall(r'at', 'The cat in the hat sat there.')

['at', 'at', 'at']

In [78]:
re.findall(r'.at', 'The cat in the hat sat there.') # The '.' here indicates wild card character. 

['cat', 'hat', 'sat']

In [79]:
re.findall(r'...at', 'The cat in the hat went splat.')

['e cat', 'e hat', 'splat']

In [91]:
re.findall(r'\Dat', 'The cat in the hat went splat.')  # The \D here indicates it's a character or string.

['cat', 'hat', 'lat']

In [92]:
re.findall(r'^\d', '2 is a number') # ^ finds a number if available at the start of the search string

['2']

In [151]:
re.findall(r'\d$', 'The number is 3') # $  finds a number if available at the end of the search string

['3']

In [101]:
phrase = 'There are 3 numbers 34 inside 5 this sentence'

In [135]:
pattern = r'[^\d]+'

In [136]:
re.findall(pattern, phrase)

['There are ', ' numbers ', ' inside ', ' this sentence']

In [141]:
pattern = r'[\d$]+'

In [142]:
re.findall(pattern, phrase)

['3', '34', '5']

In [143]:
test_phrase = "This is a string! But it has punctuation. How can we remove it?"

In [147]:
re.findall(r'[^!.?]+', test_phrase)

['This is a string', ' But it has punctuation', ' How can we remove it']

In [161]:
re.findall(r'[!.?]+', test_phrase)

['!', '.', '?']

In [162]:
re.findall(r'[^!.? ]+', test_phrase) # space added after ? to remove the spaces in output

['This',
 'is',
 'a',
 'string',
 'But',
 'it',
 'has',
 'punctuation',
 'How',
 'can',
 'we',
 'remove',
 'it']

In [154]:
clean = re.findall(r'[^!.? ]+', test_phrase)

In [159]:
' '.join(clean)

'This is a string But it has punctuation How can we remove it'

In [163]:
text = "Find hyphenated ad-verb words in this long-ish sentence"

In [170]:
pattern = r'[\w]+-[\w]+'

In [171]:
re.findall(pattern, text)

['ad-verb', 'long-ish']

In [268]:
text = "The Orchard Retreat & Spa:4.6|WelcomHotel Pine N Peak Pahalgam - Member ITC Hotel Group:4.1|The Orchard Retreat & Spa:4.6"

In [217]:
pattern1 = r'[:][0-9]*[.]?[0-9]+'

In [251]:
pattern2 = r'[0-9][.][0-9]'

In [266]:
re.findall(pattern2, text)

['4.6', '4.1', '4.6']

In [267]:
len(re.findall(pattern2, text))

3

In [263]:
for match in re.finditer(pattern2, text):
    print(match)

<re.Match object; span=(26, 29), match='4.6'>
<re.Match object; span=(88, 91), match='4.1'>
<re.Match object; span=(118, 121), match='4.6'>


In [308]:
for i in re.findall(r'[:][0-9]*[.]?[0-9]+', text):
    i = i.replace(':', '')
    i = float(i)
    print(i)
    

4.6
4.1
4.6


In [309]:
i

4.6

In [312]:
type(i)

float