In [1]:
import re

# Back-Referencing

In [3]:
res = re.search(r'(\d+)-(.*)-\2','123-456-456')
print(res)

<re.Match object; span=(0, 11), match='123-456-456'>


In [4]:
res = re.search(r'(\d+)-(456)-(456)','123-456-456')

# Split

In [5]:
text = "The quick brown fox jumps over the lazy dog."
words = re.split(r'\s+', text)  # Splits the text at one or more spaces
print(words)

['The', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog.']


In [6]:
date = "2023-04-30"
split_date = re.split(r'-', date, maxsplit=1)
print(split_date)

['2023', '04-30']


In [7]:
text = "Words, words, words."
split_words = re.split(r'[,\s]\s*', text)
print(split_words)

['Words', 'words', 'words.']


In [8]:
text = "The quick brown fox jumps over the lazy dog."
text_ = text.split(r'\s+')
print(text_)

['The quick brown fox jumps over the lazy dog.']


In [None]:
text = "lazy. dog."
text_ = text.split('.')
print(text_)

In [None]:
text = "lazy. dog."
split_words = re.split(r'\.', text)
print(split_words)

# Verbose

In [9]:
addr = re.compile(r"""
        \s*             # possible leading white space
        \#?             # optional, use \ before # to disambiguate from comment #
        \s*             # possible whitespace
        (\d+)           # capture apt number
        \s+             # at least one white space 
        (.*)?,          # capture street name, non-greedy sequence until ',', 
        \s*             # possible whitespace
        (.*)?,          # capture city name, non-greedy sequence until ',', 
        \s*             # possible white space
        ([A-Z]{2})      # capture state code
        \s*             # possible white space
        (\d{5})         # capture zip code
        \s*             # possible t railing whitespace
        $               # end of string
        """, re.VERBOSE)

In [11]:
addr = re.compile(r" \s*\#?\s*(\d+)\s+(.*)?,\s*(.*)?,\s*([A-Z]{2})\s*(\d{5})\s*$",)

In [12]:
res = addr.match(' # 25 Infinite Loop,Cupertino,CA 12345')
if res:
    for gr in res.groups():
        print(gr)

25
Infinite Loop
Cupertino
CA
12345


# Group with name

In [13]:
# Can give names to the captured fields for easier access, using ?P in group
named_addr = re.compile(r"""
        \s*             # possible leading white space
        \#?             # optional, use \ before # to disambiguate from comment #
        \s*             # possible whitespace
        (?P<apt>\d+)    # capture apt number
        \s+             # at least one white space 
        (?P<street>.*)?, # capture street name, non-greedy sequence until ',', 
        \s*             # possible whitespace
        (?P<city>.*)?,  # capture city name, non-greedy sequence until ',', 
        \s*             # possible white space
        (?P<state>[A-Z]{2})      # capture state code
        \s*             # possible white space
        (?P<zip>\d{5})  # capture zip code
        \s*             # possible trailing whitespace
        $               # end of string
        """, re.VERBOSE)

In [14]:
res = named_addr.match(' # 10 California Avenue,Palo Alto,CA 94304')
res.groupdict()

{'apt': '10',
 'street': 'California Avenue',
 'city': 'Palo Alto',
 'state': 'CA',
 'zip': '94304'}

# Exercise

## 1

In [30]:
text = 'Daata'

In [31]:
m = re.search(r'^(\w+)[\w\s]+\1', text)

In [32]:
m

In [33]:
# Captured string can be back referenced
backref = re.compile(r"""
            (?P<match1>air)     # capture the string 'air', name it as 'air'
            .*               # greedy
            (?P=match1)         # capture backreference to previous name 'air'
            """, re.VERBOSE)
res = backref.search('cool air or hot air today')
print(res)

<re.Match object; span=(5, 19), match='air or hot air'>


In [28]:
text = 'shds23d'
m = re.search(r'^[\w+\d+]', text)

In [29]:
m

<re.Match object; span=(0, 1), match='s'>

## 2

In [None]:
text = 'bake sale on the 3rd'
text = 'bake sale on the 3'

In [None]:
m = re.search(r'^[\w+\s]+\d+$', text)

In [None]:
m

## 3

In [None]:
res = re.search(r'^\s*\((\d{3})\)(\d{3})-(\d{4})', '(848)555-4321')

In [None]:
res

In [None]:
print(res.group())  # for the whole thing
print(res.groups()) # for all parts grouped with ( )
print(res.group(0)) # entire thing
print(res.group(1)) # first grouping with ( )
print(res.group(2)) # second grouping with ( )
print(res.group(3))

In [None]:
# alternatively, you can index into the groups() tuple
print(res.groups()[0])
print(res.groups()[1])

## 4

- At least one lowercase and one uppercase letter.
- At least one special symbol.
- The uppercase letter cannot be the first character.
- At least 9 characters in length.

In [None]:
'aasS!w2orda'

In [None]:
'AasS!w2orda'

In [None]:
'aasSsw2orda'

In [None]:
'aaspsw2ord@'

In [39]:
import re

# Define the regex pattern
password_pattern = r"""
^(?=.*[a-z])                # at least one lowercase letter
(?=.*[A-Z])(?!^[A-Z])       # at least one uppercase letter, but not at the start
(?=.*[!@#$%^&*()\-_+=])     # at least one special symbol
.{9,}$                      # at least 9 total characters
"""

# Example password to test
password = "aasS!w2orda"

# Check the password
if re.match(password_pattern, password, re.VERBOSE):
    print(f"'{password}': Valid password")
else:
    print(f"'{password}': Invalid password")


'aasS!w2orda': Valid password
