In [1]:
import re

In [2]:
line = 'asdf fjdk; afed, fjek,asdf, foo'
re.split(r'[;,\s]\s*', line)

['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

In [3]:
# re.search(pattern, string): Searches for the first occurrence of the pattern in the string
result = re.search(r'apple', 'I like apples and oranges')
print(result.group())  # Output: 'apple'

apple


In [4]:
# re.match(pattern, string): Matches the pattern at the beginning of the string.

result = re.match(r'apple', 'I like apples and oranges')
print(result.group())  # Output: None (because 'apple' doesn't occur at the start of the string)

AttributeError: 'NoneType' object has no attribute 'group'

In [5]:
# re.findall(pattern, string): Finds all occurrences of the pattern in the string.
results = re.findall(r'app\w*', 'I like apples and applesauce')
print(results)  # Output: ['apples', 'applesauce']

['apples', 'applesauce']


In [6]:
# re.sub(pattern, replacement, string): Replaces occurrences of the pattern in the string with the replacement.
new_string = re.sub(r'apple', 'orange', 'I like apples and applesauce')
print(new_string)  # Output: 'I like oranges and orangesauce'

I like oranges and orangesauce


### Regex Patterns:

In [7]:
# 1. Literals: Characters that match themselves (e.g., a, 5, apple).
text = "I have an apple, but I want more apples for my apple pie."

# Using a regex literal to find occurrences of the word 'apple'
matches = re.findall(r'apple', text)

print(matches)  # Output: ['apple', 'apple', 'apple']

['apple', 'apple', 'apple']


**Character Classes:**
* \d: Matches any digit (equivalent to [0-9]).
* \w: Matches any alphanumeric character or underscore (equivalent to [a-zA-Z0-9_]).
* \s: Matches any whitespace character (space, tab, newline).




**Quantifiers:**
* *: Matches zero or more occurrences.
* +: Matches one or more occurrences.
* ?: Matches zero or one occurrence.
* {n}: Matches exactly n occurrences.
* {n,}: Matches n or more occurrences.
* {n,m}: Matches between n and m occurrences.

In [8]:
text = "ab abb abbb"
matches = re.findall(r'ab*', text)
print(matches)  # Output: ['ab', 'abb', 'abbb']

['ab', 'abb', 'abbb']


In [9]:
matches = re.findall(r'ab+', text)
print(matches)  # Output: ['ab', 'abb', 'abbb']

['ab', 'abb', 'abbb']


In [10]:
text = "cat cats"
matches = re.findall(r'cats?', text)
print(matches)  # Output: ['cat', 'cats']

['cat', 'cats']


In [11]:
text = "aa aaa aaaa aaaaa"
matches = re.findall(r'a{3}', text)
print(matches)  # Output: ['aaa', 'aaa']

['aaa', 'aaa', 'aaa']


In [12]:
matches = re.findall(r'a{2,}', text)
print(matches)  # Output: ['aa', 'aaa', 'aaaa']

['aa', 'aaa', 'aaaa', 'aaaaa']


In [14]:
matches = re.findall(r'a{3,4}', text)
print(matches)  # Output: ['aa', 'aaa', 'aaaa']

['aaa', 'aaaa', 'aaaa']


**Anchors:**
* ^: Matches the start of a string.
* $: Matches the end of a string.

In [15]:
#  Match lines that start with 'Hello'
text = "Hello World\nHello Python\nHi there"
matches = re.findall(r'^Hello', text, flags=re.MULTILINE)
print(matches)  # Output: ['Hello', 'Hello']

['Hello', 'Hello']


In [19]:
#  pattern 'Python' is matched only when it appears at the end of a line.
text = "Hello Python\nPython is great\nPython programming"
matches = re.findall(r'Python$', text, flags=re.MULTILINE)
print(matches)  # Output: ['Python']

['Python']


In [18]:
text = "Hello World\nPrograaming Python is great\nProgramming with Python is fun"

# Using anchors to match lines that start with 'Python' and end with 'Python'
matches_start = re.findall(r'^Python', text, flags=re.MULTILINE)
matches_end = re.findall(r'Python$', text, flags=re.MULTILINE)

print(matches_start)  # Output: []
print(matches_end)    # Output: []

[]
[]


**Alternation:**
|: Matches either/or (e.g., apple|orange matches 'apple' or 'orange').

In [20]:
text = "I like apples, oranges, and bananas."

# Using alternation to match 'apples', 'oranges', or 'bananas'
matches = re.findall(r'apples|oranges|bananas', text)
print(matches)  # Output: ['apples', 'oranges', 'bananas']

['apples', 'oranges', 'bananas']
