# regular expression (regex) :
    A regular expression (regex) is a sequence of characters that forms a search pattern. You can use this pattern to search, match, and manipulate strings. Regex is often used in programming, data validation, and text processing.

# Functions :
    

# 1. re.match()
* Purpose: Checks for a match only at the beginning of the string.
* Usage: re.match(pattern, string)

In [118]:
import regex as re

In [3]:
x = 'Text messages are used for personal, family, business, and social purposes.'

In [4]:
re.match('Text', x) # snap is give the index position of the word 

<regex.Match object; span=(0, 4), match='Text'>

In [6]:
print(re.match('used', x)) # if the given text was not found at the frist its gives 'None'

None


# 2. re.search()
* Purpose: Searches the string for a match, anywhere in the string.
* Usage: re.search(pattern, string)

In [8]:
x = 'Give papa a cup of proper coffee in a copper coffee cup.'

In [9]:
re.search('cup', x) # its consider the frist word from the string 

<regex.Match object; span=(12, 15), match='cup'>

# 3. re.findall()
Purpose: Returns a list of all matches in the string.
Usage: re.findall(pattern, string)

In [None]:
x = 'Give papa a cup of proper coffee in a copper coffee cup.'

In [10]:
re.findall('cup', x) 

['cup', 'cup']

# 4. re.split()
* Purpose: Splits the string by the occurrences of the pattern.
* Usage: re.split(pattern, string)

In [33]:
x = 'Give papa a cup of proper coffee in a copper coffee cup.'

In [34]:
re.split(r'papa', x, ) # its split with the (comma ',') 

['Give ', ' a cup of proper coffee in a copper coffee cup.']

# 5. re.sub()
* Purpose: Replaces the matches with a string.
* Usage: re.sub(pattern, replacement, string)

In [None]:
x = 'Give papa a cup of proper coffee in a copper coffee cup.'

In [12]:
re.sub('papa', 'dad', x) 

'Give dad a cup of proper coffee in a copper coffee cup.'

# 6. re.compile()
* Purpose: Compiles a regex pattern into a regex object for reuse.
* Usage: re.compile(pattern)

In [65]:
x = 'Give papa a cup of proper coffee in a copper coffee cup.'

In [66]:
pattern = re.compile(r'papa')

In [67]:
print(re.search(pattern,x))

<regex.Match object; span=(5, 9), match='papa'>


# 7. flags (re.IGNORECASE (or re.I)):
    Makes the pattern matching case-insensitive.

In [77]:
x = 'Give PAPA a cup of proper coffee in a copper coffee cup.'

In [78]:
pattern = re.compile(r'papa')

In [80]:
print(re.search(pattern,x))

None


In [81]:
pattern = re.compile(r'papa', re.I)

In [82]:
print(re.search(pattern,x))

<regex.Match object; span=(5, 9), match='PAPA'>


# 8.group()
* Purpose: Returns the string matched by the regex pattern or a specific subgroup if an argument is provided.
* Usage: match.group([group1, ...])

In [151]:
x = '25-07-2024'

In [152]:
pattern = re.compile(r'(\d.*)-(\d.*)-(\d.*)')

In [153]:
y = re.match(pattern,x)

In [181]:
print(y.group())
print(y.group(1))
print(y.group(2))
print(y.group(3))

25-07-2024
25
07
2024


# 9. groups()
The groups() method returns a tuple of all the subgroups in the match, from 1 to the last. It does not include the entire match, only the captured groups defined by parentheses in the pattern.

In [376]:
x = '25-07-2024'

In [377]:
pattern = re.compile(r'(\d.*)-(\d.*)-(\d.*)')

In [378]:
y = re.match(pattern,x)
y

<regex.Match object; span=(0, 10), match='25-07-2024'>

In [379]:
print(y.groups())

('25', '07', '2024')


# Meta characters:
    Meta characters in regular expressions are special characters that have a unique meaning and help define patterns.

# 1. '.' (Dot)

Purpose: Matches any single character except newline.

In [191]:
x = 'Give papa a cup of proper coffee in a copper coffee cup.'

In [198]:
re.findall('c.f', x) # it gives the single character bewtten ('c.f') 

['cof', 'cof']

In [199]:
re.findall('c.e', x) 

[]

In [201]:
re.findall('c...e', x) 

['coffe', 'coppe', 'coffe']

# 2. ^ (Caret)

Purpose: Matches the start of the string.

In [None]:
x = 'Give papa a cup of proper coffee in a copper coffee cup.'

In [204]:
re.findall(r'^Give',x)

['Give']

In [205]:
re.findall(r'^papa',x)

[]

# 3. $ (Dollar)

Purpose: Matches the end of the string.

In [207]:
x = 'Give papa a cup of proper coffee in a copper coffee cup'

In [208]:
re.findall(r'cup$',x)

['cup']

In [209]:
re.findall(r'Give$',x)

[]

# 4. * (Asterisk)

Purpose: Matches 0 or more repetitions of the preceding pattern.


In [260]:
x = 'abbbc'

In [263]:
re.findall(r'a.*c',x)

['abbbc']

In [264]:
re.findall(r'l.*', x)

[]

# 5. + (Plus)

Purpose: Matches 1 or more repetitions of the preceding pattern.

In [266]:
x = 'abbc'

In [269]:
re.findall(r'ab+c',x)

['abbc']

In [271]:
re.findall(r'a+c',x)

[]

# 6. ? (Question Mark)

Purpose: Matches 0 or 1 repetition of the preceding pattern.

In [297]:
x = 'abc' # 1 occerance

In [298]:
re.findall(r'ab?c',x)

['abc']

In [299]:
x = 'ac' # 0 occerence

In [300]:
re.findall(r'ab?c',x)

['ac']

# 7. [ ] (Square Brackets)

Purpose: Matches any one of the characters inside the brackets.

In [314]:
x = 'nani1234'

In [315]:
re.findall(r'[0-9]', x)

['1', '2', '3', '4']

In [316]:
re.findall(r'[a-z]', x)

['n', 'a', 'n', 'i']

# 8. | (Pipe)

Purpose: Matches either the pattern before or the pattern after the '|' .

In [322]:
x = 'Give papa a cup of proper coffee in a copper coffee cup'

In [323]:
re.findall(r'papa|nani', x)

['papa']

In [324]:
re.findall(r'papa|Give', x)

['Give', 'papa']

# 9. () (Parentheses)

Purpose: Groups patterns together and captures the matched text.

In [382]:
x =  'abcd'

In [387]:
y = re.match(r'(ab)(cd)', x)
y

<regex.Match object; span=(0, 4), match='abcd'>

In [389]:
if y:
    print(y.group(1))
    print(y.group(2))

ab
cd


# 10. \ (Backslash)

Purpose: Escapes a meta character to match it literally.

In [391]:
x = 'abcd$'

In [393]:
re.findall('\$', x)

['$']

# Special Sequences :
Special sequences in regular expressions provide a shorthand way to represent commonly used character classes and anchors. They simplify patterns and make regex more readable.

# 1.  \d

Purpose: Matches any digit (0-9).


In [402]:
x = 'abcd1230'

In [403]:
re.findall(r'\d', x)

['1', '2', '3', '0']

# 2. \D

Purpose: Matches any non-digit character.

In [404]:
x = 'abcd1230'

In [405]:
re.findall(r'\D', x)

['a', 'b', 'c', 'd']

# 3. \w

Purpose: Matches any word character (alphanumeric + underscore).

In [413]:
x = 'abcd_1230$#'

In [414]:
re.findall(r'\w+', x)

['abcd_1230']

# 4. \W

Purpose: Matches any non-word character.

In [415]:
x = 'abcd_1230$#'

In [416]:
re.findall(r'\W+', x)

['$#']

# 5. \s

Purpose: Matches any whitespace character (space, tab, newline).

In [418]:
x = 'abcd 1230 $#'

In [419]:
re.findall(r'\s+', x)

[' ', ' ']

# 6. \S

Purpose: Matches any non-whitespace character.

In [426]:
x = 'abcd_1230 $#'

In [427]:
re.findall(r'\S+', x)

['abcd_1230', '$#']

# 7. \b

Purpose: Matches a word boundary (position between a word and a non-word character).

In [461]:
x =  'abcd'

In [463]:
re.findall(r'\babcd', x)

['abcd']

In [464]:
re.findall(r'abcd\b', x)

['abcd']

# 8. \B

Purpose: Matches a non-word boundary.

In [473]:
x =  'abcd'

In [474]:
re.findall(r'\Bd', x)

['d']

In [475]:
re.findall(r'a\B', x)

['a']

# 9. \A

Purpose: Matches the start of the string.

In [None]:
x = 'abcd'

In [477]:
re.findall(r'\Aa', x)

['a']

In [478]:
re.findall(r'\Ab', x)

[]

# 10. \Z

Purpose: Matches the end of the string.

In [511]:
x = 'abcd'

In [513]:
re.findall(r'd\Z', x)

['d']

In [514]:
re.findall(r'a\Z', x)

[]