#### A method designed for searching patterns in a string/text
- Regex for short
- ^ and $ describe the position
- '\W' and "[a-t]" describe a set of characters
- +, *, and {9} specify a quantifier (number of character occurrences)
#### Some patterns
- Minne ------ Minnesota or Tom Minneso
- ^Minne ------ Minnesota [specifies the location, here in the beginning]
- i$ (meaning i is the end) ------- Resham Firiri
- \d matches a numerical digit
- \d\d matches a two consecutive digit
- \s matches a white space
- \w would match lower or upper case, digit, or an underscore
- ^\w\w\w\w\s would match Mike Salamanca
- [aeiou]{2} would look for two consecutive vowels
- ^\w{7}$ would match a 7 charactered string like 'Rascals'
- \w{7} would look for any string with 7 consecutive word characters 

In [2]:
import re

In [4]:
names = ['Ram Budathoki',
         'Alexander Dumas',
         'Drew Barrymore',
         'Inigo Montaya',
         'Jack Dawson',
         'Rock'
        ]

In [5]:
# Find names with first and last names
regex = '^\w+ \w+$'
for name in names:
    result = re.search(regex, name)
    if result:
        print(name)

Ram Budathoki
Alexander Dumas
Drew Barrymore
Inigo Montaya
Jack Dawson


In [22]:
# What if there were multiple spaces between the two names?
namess = ['Ram  Budathoki',
         'Alexander Dumas',
         'Drew   Barrymore',
         'Inigo Montaya',
         'Jack!',
         'Rock'
        ]
regex = '^\w+ \w+$'
for name in namess:
    result = re.search(regex, name)
    if result:
        print(name)

Alexander Dumas
Inigo Montaya


In [14]:
# Solution
regex = '^\w+\s+\w+$'
for name in namess:
    result = re.search(regex, name)
    if result:
        print(name)

Ram  Budathoki
Alexander Dumas
Drew   Barrymore
Inigo Montaya


In [8]:
# So far we're only returning the searched object
regex = '^\w+\s+\w+$'
for name in namess:
    result = re.search(regex, name)
    if result:
        print(result)

<re.Match object; span=(0, 14), match='Ram  Budathoki'>
<re.Match object; span=(0, 15), match='Alexander Dumas'>
<re.Match object; span=(0, 16), match='Drew   Barrymore'>
<re.Match object; span=(0, 13), match='Inigo Montaya'>
<re.Match object; span=(0, 11), match='Jack Dawson'>


In [12]:
# Search for names with starting A
regex = 'A\w*'
for name in namess:
    match = re.search(regex, name)
    if match:
        print(name)
        print(match.start())
        print(match.end())
        print(match.span())
        print(match.group())

Alexander Dumas
0
9
(0, 9)
Alexander


In [18]:
# Test for first and last name
regex = '(^\w+)\s+(\w+)$'
for name in namess:
    match = re.search(regex, name)
    if match:
        print(name)
        print(match.group(1))
        print(match.group(2))

Ram  Budathoki
Ram
Budathoki
Alexander Dumas
Alexander
Dumas
Drew   Barrymore
Drew
Barrymore
Inigo Montaya
Inigo
Montaya


In [20]:
# Test for first and last name
regex = '(?P<fn>^\w+)\s+(?P<ln>\w+)$'
for name in namess:
    match = re.search(regex, name)
    if match:
        print(match.group('fn'))

Ram
Alexander
Drew
Inigo


In [23]:
# Did you notice Jack! Dawson has always been avoided?
regex = '^[a-zA-Z!]+$'
for name in namess:
    if re.search(regex, name):
        print(name)

Jack!
Rock


In [24]:
# Scan for blocks of lower case letters
regex = '[a-z]+'
for name in names:
    matches = re.findall(regex, name)
    if matches:
        print(matches)

['am', 'udathoki']
['lexander', 'umas']
['rew', 'arrymore']
['nigo', 'ontaya']
['ack', 'awson']
['ock']
