## Finding Patterns of Text Without Regular Expressions (Not Recommended)

In [5]:
def is_phone_number(text):
    """
    Checks if the given text is a valid phone number in the format XXX-XXX-XXXX.

    Args:
        text (str): The text to be checked.

    Returns:
        bool: True if the text is a valid phone number, False otherwise.
    """
    if len(text) != 12:
        return False
    for i in range(0, 3):
        if not text[i].isdecimal():
            return False
    if text[3] != "-":
        return False
    for i in range(4, 7):
        if not text[i].isdecimal():
            return False
    if text[7] != "-":
        return False
    for i in range(8, 12):
        if not text[i].isdecimal():
            return False
    return True


print(f"Is 415-555-4242 a phone number: {is_phone_number('415-555-4242')}")
print(f"Is Machine Learning a phone number: {is_phone_number('Machine Learning')}")

Is 415-555-4242 a phone number: True
Is Machine Learning a phone number: False


## Finding Patterns of Text With Regular Expressions

In [6]:
import re

phone_number_regex = re.compile(r"\d\d\d-\d\d\d-\d\d\d\d")
text = "My phone number is 415-555-4242"

match_object = phone_number_regex.search(text)
print(match_object.group())

415-555-4242


## Grouping with Parentheses

In [7]:
phone_number_regex = re.compile(r"(\d\d\d)-(\d\d\d-\d\d\d)")
text = "My phone number is 415-555-4242"

match_object = phone_number_regex.search(text)
print(f"group(): {match_object.group()}")
print(f"group(1): {match_object.group(1)}")
print(f"group(2): {match_object.group(2)}\n")

# Retrieve all the groups at once
print(f"match_object.group(): {match_object.groups()}")
area_code, phone_number = match_object.groups()
print(f"Area Code: {area_code}")
print(f"Phone Number: {phone_number}")

group(): 415-555-424
group(1): 415
group(2): 555-424

match_object.group(): ('415', '555-424')
Area Code: 415
Phone Number: 555-424


## Matching Multiple Groups with the Pipe

In [9]:
hero_regex = re.compile(r"Moon Knight|Deadpool|Wolverine")

hero_match_object1 = hero_regex.search("Moon Knight and Deadpool")
print(f"Hero: {hero_match_object1.group()}")

hero_match_object2 = hero_regex.search("Deadpool and Moon Knight")
print(f"Hero: {hero_match_object2.group()}")

heroes_match_object = hero_regex.findall(
    "Moon Knight, Spider-Man, Deadpool, Wolverine, and Doctor Strange"
)
print(f"All Heroes: {heroes_match_object}")

Hero: Moon Knight
Hero: Deadpool
All Heroes: ['Moon Knight', 'Deadpool', 'Wolverine']


## Optional Matching with the Question Mark ``?``

In [16]:
bat_regex = re.compile(r"Bat(wo)?man")
bat_match_object = bat_regex.search("The Adventures of Batman")
bat_match_object2 = bat_regex.search("The Adventures of Batwoman")

print(f"Bat Match Object 1: {bat_match_object.group()}")
print(f"Bat Match Object 2: {bat_match_object2.group()}\n")

phone_regex = re.compile(r"(\d\d\d-)?\d\d\d-\d\d\d\d")
phone_number_match_object = phone_regex.search("My number is 415-555-4242")
phone_number_match_object2 = phone_regex.search("My number is 555-4242")

print(f"Phone Match Object 1: {phone_number_match_object.group()}")
print(f"Phone Match Object 2: {phone_number_match_object2.group()}")

Bat Match Object 1: Batman
Bat Match Object 2: Batwoman

Phone Match Object 1: 415-555-4242
Phone Match Object 2: 555-4242


## Matching Zero or More with the Star

In [33]:
bat_regex = re.compile(r"Bat(wo)*man")
bat_match_object = bat_regex.search("The Adventures of Batman")
bat_match_object2 = bat_regex.search("The Adventures of Batwoman")
bat_match_object3 = bat_regex.search("The Adventures of Batwowowoman")

print(f"Bat Match Object 1: {bat_match_object.group()}")
print(f"Bat Match Object 2: {bat_match_object2.group()}")
print(f"Bat Match Object 3: {bat_match_object3.group()}\n")

try:
    bat_match_object4 = bat_regex.search("The Adventures of Batwomamanaman")
    print(f"Bat Match Object 4: {bat_match_object4.group()}")
except AttributeError as AE:
    print(f"Error: {AE}")

Bat Match Object 1: Batman
Bat Match Object 2: Batwoman
Bat Match Object 3: Batwowowoman

Error: 'NoneType' object has no attribute 'group'


## Matching One or More with the Plus

In [36]:
bat_regex = re.compile(r"Bat(wo)+man")
bat_match_object = bat_regex.search("The Adventures of Batwoman")
bat_match_object2 = bat_regex.search("The Adventures of Batwowowoman")

print(f"Bat Match Object 1: {bat_match_object.group()}")
print(f"Bat Match Object 2: {bat_match_object2.group()}\n")

try:
    bat_match_object3 = bat_regex.search("The Adventures of Batman")
    print(f"Bat Match Object 3: {bat_match_object3.group()}")
except AttributeError as AE:
    print(f"Error: {AE}")

Bat Match Object 1: Batwoman
Bat Match Object 2: Batwowowoman

Error: 'NoneType' object has no attribute 'group'


## Matching Specific Repetitions with Braces

In [50]:
laugh_regex = re.compile(r"(Ha){3,}")
laugh_match_object = laugh_regex.search("HaHaHaHa")
print(laugh_match_object.group())

try:
    laugh_match_object2 = laugh_regex.search("HaHa")
    print(laugh_match_object2.group())
except AttributeError as AE:
    print(f"Error: {AE}")

HaHaHaHa
Error: 'NoneType' object has no attribute 'group'


In [62]:
date_regex = re.compile(r"\d{2}/\d{2}/\d{4}")
date_match_object = date_regex.findall(
    "Today is 03/01/2024. Yesterday was February 29, 2024. Tomorrow is 03/02/2024"
)
print(f"Date Match Object 1: {date_match_object}")

Date Match Object 1: ['03/01/2024', '03/02/2024']


## Greedy and Non-Greedy Matching