<a href="https://colab.research.google.com/github/digitechit07/Python-Tutorial-with-Excercise/blob/main/Python_RegEx_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Python import re**
Raw Strings (r""): It is recommended to use raw strings for regex patterns (e.g., r"pattern"). This prevents Python from interpreting backslashes as escape sequences, ensuring that regex special characters like \d (digit) are treated as intended.
Core Functions:
re.search(pattern, string): Scans through a string looking for the first location where the regular expression pattern produces a match. Returns a match object if a match is found, otherwise None.
re.match(pattern, string): Checks for a match only at the beginning of the string. Returns a match object if a match is found at the start, otherwise None.
re.findall(pattern, string): Returns a list of all non-overlapping matches of the pattern in the string.
re.finditer(pattern, string): Returns an iterator yielding match objects for all non-overlapping matches.
re.sub(pattern, repl, string, count=0, flags=0): Replaces occurrences of the pattern in the string with repl. repl can be a string or a function.
re.compile(pattern): Compiles a regular expression pattern into a regular expression object, which can then be used for more efficient repeated matching operations.

In [11]:
import re

text = "The quick brown fox jumps over the lazy dog. My email is test@example.com."

# Find all occurrences of "fox"
matches = re.findall(r"fox", text)
print(f"Found 'fox': {matches}")

# Search for an email address
email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
email_match = re.search(email_pattern, text)
if email_match:
    print(f"Found email: {email_match.group()}")

# Replace "quick" with "slow"
new_text = re.sub(r"quick", "slow", text)
print(f"Modified text: {new_text}")

import re
m = re.search('(?<=abc)def', 'abcdef')
m.group(0)

m = re.search(r'(?<=-)\w+', 'spam-egg')
m.group(0)

re.split(r'\W+', 'Words, words, words.')

re.split(r'(\W+)', 'Words, words, words.')

re.split(r'\W+', 'Words, words, words.', maxsplit=1)

re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)

re.split(r'(\W+)', '...words, words...')

re.split(r'\b', 'Words, words, words.')

re.split(r'\W*', '...words...')

re.split(r'(\W*)', '...words...')


re.findall(r'\bf[a-z]*', 'which foot or hand fell fastest')

re.findall(r'(\w+)=(\d+)', 'set width=20 and height=10')

re.sub(r'def\s+([a-zA-Z_][a-zA-Z_0-9]*)\s*\(\s*\):',
       r'static PyObject*\npy_\1(void)\n{',
       'def myfunc():')

def dashrepl(matchobj):
    if matchobj.group(0) == '-': return ' '
    else: return '-'

re.sub('-{1,2}', dashrepl, 'pro----gram-files')

re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE)
print(re.escape('https://www.python.org'))

'''
string = "The quick brown fox jumps over the lazy dog. My email is test@example.com."
legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:"
print('[%s]+' % re.escape(legal_chars))
'''

operators = ['+', '-', '*', '/', '**']
print('|'.join(map(re.escape, sorted(operators, reverse=True))))

digits_re = r'\d+'
sample = '/usr/sbin/sendmail - 0 errors, 12 warnings'
print(re.sub(digits_re, digits_re.replace('\\', r'\\'), sample))

pattern = re.compile("d")
pattern.search("dog")     # Match at index 0

pattern.search("dog", 1)  # No match; search doesn't include the "d"

pattern = re.compile("o")
pattern.match("dog")      # No match as "o" is not at the start of "dog".
pattern.match("dog", 1)   # Match as "o" is the 2nd character of "dog".

pattern = re.compile("o[gh]")
pattern.fullmatch("dog")      # No match as "o" is not at the start of "dog".
pattern.fullmatch("ogre")     # No match as not the full string matches.
pattern.fullmatch("doggie", 1, 3)   # Matches within given limits.

m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
m.group(0)       # The entire match

m.group(1)       # The first parenthesized subgroup.

m.group(2)       # The second parenthesized subgroup.

m.group(1, 2)    # Multiple arguments give us a tuple.

m = re.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", "Malcolm Reynolds")
m.group('first_name')

m.group('last_name')

m = re.match(r"(..)+", "a1b2c3")  # Matches 3 times.
m.group(1)                        # Returns only the last match.
m = re.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", "Isaac Newton")
m['first_name']

m['last_name']

m = re.match(r"(\d+)\.(\d+)", "24.1632")
m.groups()

m = re.match(r"(\d+)\.?(\d+)?", "24")
m.groups()      # Second group defaults to None.

m.groups('0')   # Now, the second group defaults to '0'.
m = re.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", "Malcolm Reynolds")
m.groupdict()
email = "tony@tiremove_thisger.net"
m = re.search("remove_this", email)
email[:m.start()] + email[m.end():]

def displaymatch(match):
    if match is None:
        return None
    return '<Match: %r, groups=%r>' % (match.group(), match.groups())

valid = re.compile(r"^[a2-9tjqk]{5}$")
displaymatch(valid.match("akt5q"))  # Valid.

displaymatch(valid.match("akt5e"))  # Invalid.
displaymatch(valid.match("akt"))    # Invalid.
displaymatch(valid.match("727ak"))  # Valid.

pair = re.compile(r".*(.).*\1")
displaymatch(pair.match("717ak"))     # Pair of 7s.

displaymatch(pair.match("718ak"))     # No pairs.
displaymatch(pair.match("354aa"))     # Pair of aces.

pair = re.compile(r".*(.).*\1")
pair.match("717ak").group(1)



#pair.match("718ak").group(1)
pair.match("354aa").group(1)

re.match("c", "abcdef")    # No match
re.search("c", "abcdef")   # Match

re.fullmatch("p.*n", "python") # Match

re.fullmatch("r.*n", "python") # No match

re.match("c", "abcdef")    # No match
re.search("^c", "abcdef")  # No match
re.search("^a", "abcdef")  # Match

re.match("X", "A\nB\nX", re.MULTILINE)  # No match
re.search("^X", "A\nB\nX", re.MULTILINE)  # Match

'''
def repl(m):
    inner_word = list(m.group(2))
    random.shuffle(inner_word)
    return m.group(1) + "".join(inner_word) + m.group(3)

text = "Professor Abdolmalek, please report your absences promptly."
re.sub(r"(\w)(\w+)(\w)", repl, text)

re.sub(r"(\w)(\w+)(\w)", repl, text)
'''
text = "He was carefully disguised but captured quickly by police."
re.findall(r"\w+ly\b", text)

text = "He was carefully disguised but captured quickly by police."
for m in re.finditer(r"\w+ly\b", text):
    print('%02d-%02d: %s' % (m.start(), m.end(), m.group(0)))


re.match(r"\W(.)\1\W", " ff ")

re.match("\\W(.)\\1\\W", " ff ")

re.match(r"\\", r"\\")

re.match("\\\\", r"\\")

from typing import NamedTuple
import re

class Token(NamedTuple):
    type: str
    value: str
    line: int
    column: int

def tokenize(code):
    keywords = {'IF', 'THEN', 'ENDIF', 'FOR', 'NEXT', 'GOSUB', 'RETURN'}
    token_specification = [
        ('NUMBER',   r'\d+(\.\d*)?'),  # Integer or decimal number
        ('ASSIGN',   r':='),           # Assignment operator
        ('END',      r';'),            # Statement terminator
        ('ID',       r'[A-Za-z]+'),    # Identifiers
        ('OP',       r'[+\-*/]'),      # Arithmetic operators
        ('NEWLINE',  r'\n'),           # Line endings
        ('SKIP',     r'[ \t]+'),       # Skip over spaces and tabs
        ('MISMATCH', r'.'),            # Any other character
    ]
    tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
    line_num = 1
    line_start = 0
    for mo in re.finditer(tok_regex, code):
        kind = mo.lastgroup
        value = mo.group()
        column = mo.start() - line_start
        if kind == 'NUMBER':
            value = float(value) if '.' in value else int(value)
        elif kind == 'ID' and value in keywords:
            kind = value
        elif kind == 'NEWLINE':
            line_start = mo.end()
            line_num += 1
            continue
        elif kind == 'SKIP':
            continue
        elif kind == 'MISMATCH':
            raise RuntimeError(f'{value!r} unexpected on line {line_num}')
        yield Token(kind, value, line_num, column)

statements = '''
    IF quantity THEN
        total := total + price * quantity;
        tax := price * 0.05;
    ENDIF;
'''

for token in tokenize(statements):
    print(token)



Found 'fox': ['fox']
Found email: test@example.com
Modified text: The slow brown fox jumps over the lazy dog. My email is test@example.com.
https://www\.python\.org
/|\-|\+|\*\*|\*
07-16: carefully
40-47: quickly
Token(type='IF', value='IF', line=2, column=4)
Token(type='ID', value='quantity', line=2, column=7)
Token(type='THEN', value='THEN', line=2, column=16)
Token(type='ID', value='total', line=3, column=8)
Token(type='ASSIGN', value=':=', line=3, column=14)
Token(type='ID', value='total', line=3, column=17)
Token(type='OP', value='+', line=3, column=23)
Token(type='ID', value='price', line=3, column=25)
Token(type='OP', value='*', line=3, column=31)
Token(type='ID', value='quantity', line=3, column=33)
Token(type='END', value=';', line=3, column=41)
Token(type='ID', value='tax', line=4, column=8)
Token(type='ASSIGN', value=':=', line=4, column=12)
Token(type='ID', value='price', line=4, column=15)
Token(type='OP', value='*', line=4, column=21)
Token(type='NUMBER', value=0.05, line

  re.sub(r"(\w)(\w+)(\w)", repl, text)


# **Special Characters and Metacharacters:**
.: Matches any character (except newline).
^: Matches the beginning of a string.
$: Matches the end of a string.
*: Matches zero or more occurrences of the preceding character/group.
+: Matches one or more occurrences of the preceding character/group.
?: Matches zero or one occurrence of the preceding character/group.
{m,n}: Matches between m and n occurrences of the preceding character/group.
[]: Defines a character set (e.g., [a-z], [0-9]).
|: Acts as an OR operator.
`\`: Escapes special characters (e.g., \. to match a literal dot).
\d: Matches a digit (0-9).
\D: Matches a non-digit.
\w: Matches a word character (alphanumeric + underscore).
\W: Matches a non-word character.
\s: Matches a whitespace character.
\S: Matches a non-whitespace character.
\b: Matches a word boundary.
\B: Matches a non-word boundary

In [16]:
from typing import NamedTuple
import re

class Token(NamedTuple):
    type: str
    value: str
    line: int
    column: int

def tokenize(code):
    keywords = {'IF', 'THEN', 'ENDIF', 'FOR', 'NEXT', 'GOSUB', 'RETURN'}
    token_specification = [
        ('NUMBER',   r'\d+(\.\d*)?'),  # Integer or decimal number
        ('ASSIGN',   r':='),           # Assignment operator
        ('END',      r';'),            # Statement terminator
        ('ID',       r'[A-Za-z]+'),    # Identifiers
        ('OP',       r'[+\-*/]'),      # Arithmetic operators
        ('NEWLINE',  r'\n'),           # Line endings
        ('SKIP',     r'[ \t]+'),       # Skip over spaces and tabs
        ('MISMATCH', r'.'),            # Any other character
    ]
    tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
    line_num = 1
    line_start = 0
    for mo in re.finditer(tok_regex, code):
        kind = mo.lastgroup
        value = mo.group()
        column = mo.start() - line_start
        if kind == 'NUMBER':
            value = float(value) if '.' in value else int(value)
        elif kind == 'ID' and value in keywords:
            kind = value
        elif kind == 'NEWLINE':
            line_start = mo.end()
            line_num += 1
            continue
        elif kind == 'SKIP':
            continue
        elif kind == 'MISMATCH':
            raise RuntimeError(f'{value!r} unexpected on line {line_num}')
        yield Token(kind, value, line_num, column)

statements = '''
    IF quantity THEN
        total := total + price * quantity;
        tax := price * 0.05;
    ENDIF;
'''

for token in tokenize(statements):
    print(token)


Token(type='IF', value='IF', line=2, column=4)
Token(type='ID', value='quantity', line=2, column=7)
Token(type='THEN', value='THEN', line=2, column=16)
Token(type='ID', value='total', line=3, column=8)
Token(type='ASSIGN', value=':=', line=3, column=14)
Token(type='ID', value='total', line=3, column=17)
Token(type='OP', value='+', line=3, column=23)
Token(type='ID', value='price', line=3, column=25)
Token(type='OP', value='*', line=3, column=31)
Token(type='ID', value='quantity', line=3, column=33)
Token(type='END', value=';', line=3, column=41)
Token(type='ID', value='tax', line=4, column=8)
Token(type='ASSIGN', value=':=', line=4, column=12)
Token(type='ID', value='price', line=4, column=15)
Token(type='OP', value='*', line=4, column=21)
Token(type='NUMBER', value=0.05, line=4, column=23)
Token(type='END', value=';', line=4, column=27)
Token(type='ENDIF', value='ENDIF', line=5, column=4)
Token(type='END', value=';', line=5, column=9)


import re

s = 'GeeksforGeeks: A computer science portal for geeks'
match = re.search(r'portal', s)

print('Start Index:', match.start())
print('End Index:', match.end())

import re
string = """Hello my Number is 123456789 and
            my friend's number is 987654321"""

regex = '\d+'
match = re.findall(regex, string)
print(match)


import re
p = re.compile('[a-e]')
print(p.findall("Aye, said Mr. Gibenson Stark"))


import re
p = re.compile('\d')
print(p.findall("I went to him at 11 A.M. on 4th July 1886"))

p = re.compile('\d+')
print(p.findall("I went to him at 11 A.M. on 4th July 1886"))


import re

p = re.compile('\w')
print(p.findall("He said * in some_lang."))

p = re.compile('\w+')
print(p.findall("I went to him at 11 A.M., he \
said *** in some_language."))

p = re.compile('\W')
print(p.findall("he said *** in some_language."))

import re
p = re.compile('ab*')
print(p.findall("ababbaabbb"))

from re import split

print(split('\W+', 'Words, words , Words'))
print(split('\W+', "Word's words Words"))
print(split('\W+', 'On 12th Jan 2016, at 11:02 AM'))
print(split('\d+', 'On 12th Jan 2016, at 11:02 AM'))

import re
print(re.split('\d+', 'On 12th Jan 2016, at 11:02 AM', 1))
print(re.split('[a-f]+', 'Aey, Boy oh boy, come here', flags=re.IGNORECASE))
print(re.split('[a-f]+', 'Aey, Boy oh boy, come here'))

import re

# Case-insensitive replacement of all 'ub'
print(re.sub('ub', '~*', 'Subject has Uber booked already', flags=re.IGNORECASE))

# Case-sensitive replacement of all 'ub'
print(re.sub('ub', '~*', 'Subject has Uber booked already'))

# Replace only the first 'ub', case-insensitive
print(re.sub('ub', '~*', 'Subject has Uber booked already', count=1, flags=re.IGNORECASE))

# Replace "AND" with "&", ignoring case
print(re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE))

import re

# Case-sensitive replacement
print(re.subn('ub', '~*', 'Subject has Uber booked already'))

# Case-insensitive replacement
t = re.subn('ub', '~*', 'Subject has Uber booked already', flags=re.IGNORECASE)
print(t)
print(len(t))      # tuple length
print(t[0])        # modified string

import re
print(re.escape("This is Awesome even 1 AM"))
print(re.escape("I Asked what is this [a-9], he said \t ^WoW"))


import re

regex = r"([a-zA-Z]+) (\d+)"
match = re.search(regex, "I was born on June 24")

if match:
    print("Match at index %s, %s" % (match.start(), match.end()))
    print("Full match:", match.group(0))
    print("Month:", match.group(1))
    print("Day:", match.group(2))
else:
    print("The regex pattern does not match.")


import re

s = 'geeks.forgeeks'

# without using \
match = re.search(r'.', s)
print(match)

# using \
match = re.search(r'\.', s)
print(match)

import re

string = "The quick brown fox jumps over the lazy dog"
pattern = "[a-m]"
result = re.findall(pattern, string)

print(result)


import re
regex = r'^The'
strings = ['The quick brown fox', 'The lazy dog', 'A quick brown fox']
for string in strings:
    if re.match(regex, string):
        print(f'Matched: {string}')
    else:
        print(f'Not matched: {string}')


import re

string = "Hello World!"
pattern = r"World!$"

match = re.search(pattern, string)
if match:
    print("Match found!")
else:
    print("Match not found.")


import re

string = "The quick brown fox jumps over the lazy dog."
pattern = r"brown.fox"

match = re.search(pattern, string)
if match:
    print("Match found!")
else:
    print("Match not found.")


import re
s = "Welcome to GeeksForGeeks"
res = re.search(r"\bG", s)

print(res.re)
print(res.string)

import re

s = "Welcome to GeeksForGeeks"
res = re.search(r"\bGee", s)

print(res.start())
print(res.end())
print(res.span())

import re
s = "Welcome to GeeksForGeeks"
res = re.search(r"\D{2} t", s)
print(res.group())


Token(type='IF', value='IF', line=2, column=4)
Token(type='ID', value='quantity', line=2, column=7)
Token(type='THEN', value='THEN', line=2, column=16)
Token(type='ID', value='total', line=3, column=8)
Token(type='ASSIGN', value=':=', line=3, column=14)
Token(type='ID', value='total', line=3, column=17)
Token(type='OP', value='+', line=3, column=23)
Token(type='ID', value='price', line=3, column=25)
Token(type='OP', value='*', line=3, column=31)
Token(type='ID', value='quantity', line=3, column=33)
Token(type='END', value=';', line=3, column=41)
Token(type='ID', value='tax', line=4, column=8)
Token(type='ASSIGN', value=':=', line=4, column=12)
Token(type='ID', value='price', line=4, column=15)
Token(type='OP', value='*', line=4, column=21)
Token(type='NUMBER', value=0.05, line=4, column=23)
Token(type='END', value=';', line=4, column=27)
Token(type='ENDIF', value='ENDIF', line=5, column=4)
Token(type='END', value=';', line=5, column=9)
Start Index: 34
End Index: 40
['123456789', '98765

  regex = '\d+'
  p = re.compile('\d')
  p = re.compile('\d+')
  p = re.compile('\w')
  p = re.compile('\w+')
  p = re.compile('\W')
  print(split('\W+', 'Words, words , Words'))
  print(split('\W+', "Word's words Words"))
  print(split('\W+', 'On 12th Jan 2016, at 11:02 AM'))
  print(split('\d+', 'On 12th Jan 2016, at 11:02 AM'))
  print(re.split('\d+', 'On 12th Jan 2016, at 11:02 AM', 1))


# **Regular Expression Syntax**
A regular expression (or RE) specifies a set of strings that matches it; the functions in this module let you check if a particular string matches a given regular expression (or if a given regular expression matches a particular string, which comes down to the same thing).

Regular expressions can be concatenated to form new regular expressions; if A and B are both regular expressions, then AB is also a regular expression. In general, if a string p matches A and another string q matches B, the string pq will match AB. This holds unless A or B contain low precedence operations; boundary conditions between A and B; or have numbered group references. Thus, complex expressions can easily be constructed from simpler primitive expressions like the ones described here. For details of the theory and implementation of regular expressions, consult the Friedl book [Frie09], or almost any textbook about compiler construction.

In [20]:
import re
s = "Welcome to GeeksForGeeks"
res = re.search(r"\D{2} t", s)
print(res.group())

import re
print(re.findall(r'[Gg]eeks', 'GeeksforGeeks: \
                 A computer science portal for geeks'))
import re
print('Range',re.search(r'[a-zA-Z]', 'x'))


import re

print(re.search(r'[^a-z]', 'c'))
print(re.search(r'G[^e]', 'Geeks'))

import re

print('Geeks:', re.search(r'\bGeeks\b', 'Geeks'))
print('GeeksforGeeks:', re.search(r'\bGeeks\b', 'GeeksforGeeks'))

import re


# Beginning of String
match = re.search(r'^Geek', 'Campus Geek of the month')
print('Beg. of String:', match)

match = re.search(r'^Geek', 'Geek of the month')
print('Beg. of String:', match)

# End of String
match = re.search(r'Geeks$', 'Compute science portal-GeeksforGeeks')
print('End of String:', match)

import re


# Beginning of String
match = re.search(r'^Geek', 'Campus Geek of the month')
print('Beg. of String:', match)

match = re.search(r'^Geek', 'Geek of the month')
print('Beg. of String:', match)

# End of String
match = re.search(r'Geeks$', 'Compute science portal-GeeksforGeeks')
print('End of String:', match)

import re

print('Color',re.search(r'colou?r', 'color'))
print('Colour',re.search(r'colou?r', 'colour'))

import re
print('Date{mm-dd-yyyy}:', re.search(r'[\d]{2}-[\d]{2}-[\d]{4}','18-08-2020'))

import re

print('Three Digit:', re.search(r'[\d]{3,4}', '189'))
print('Four Digit:', re.search(r'[\d]{3,4}', '2145'))

import re

print(re.search(r'[\d]{1,}','5th Floor, A-118,\
Sector-136, Noida, Uttar Pradesh - 201305'))

import re

print(re.search(r'[\d]+', '5th Floor, A-118,\
Sector-136, Noida, Uttar Pradesh - 201305'))


import re
grp = re.search(r'([\d]{2})-([\d]{2})-([\d]{4})', '26-08-2020')
print(grp)


import re
grp = re.search(r'([\d]{2})-([\d]{2})-([\d]{4})','26-08-2020')
print(grp.group())

import re
grp = re.search(r'([\d]{2})-([\d]{2})-([\d]{4})','26-08-2020')
print(grp.groups())


import re
grp = re.search(r'([\d]{2})-([\d]{2})-([\d]{4})','26-08-2020')
print(grp.group(3))

import re
match = re.search(r'(?P<dd>[\d]{2})-(?P<mm>[\d]{2})-(?P<yyyy>[\d]{4})',
                  '26-08-2020')
print(match.group('mm'))

import re
match = re.search(r'(?P<dd>[\d]{2})-(?P<mm>[\d]{2})-(?P<yyyy>[\d]{4})',
                  '26-08-2020')
print(match.groupdict())


import re
print('negation:', re.search(r'n[^e]', 'Python'))
print('lookahead:', re.search(r'n(?!e)', 'Python'))


import re
print('positive lookahead', re.search(r'n(?=e)', 'jasmine'))


import re
print(re.sub(r'([\d]{4})-([\d]{4})-([\d]{4})-([\d]{4})',r'\1\2\3\4',
             '1111-2222-3333-4444'))

import re

pattern = '^a...s$'
test_string = 'abyss'
result = re.match(pattern, test_string)

if result:
  print("Search successful.")
else:
  print("Search unsuccessful.")



# Program to extract numbers from a string

import re

string = 'hello 12 hi 89. Howdy 34'
pattern = '\d+'

result = re.findall(pattern, string)
print(result)

# Output: ['12', '89', '34']



import re

string = 'Twelve:12 Eighty nine:89.'
pattern = '\d+'

result = re.split(pattern, string)
print(result)

# Output: ['Twelve:', ' Eighty nine:', '.']



import re

string = 'Twelve:12 Eighty nine:89 Nine:9.'
pattern = '\d+'

# maxsplit = 1
# split only at the first occurrence
result = re.split(pattern, string, 1)
print(result)

# Output: ['Twelve:', ' Eighty nine:89 Nine:9.']




# Program to remove all whitespaces
import re

# multiline string
string = 'abc 12\
de 23 \n f45 6'

# matches all whitespace characters
pattern = '\s+'

# empty string
replace = ''

new_string = re.sub(pattern, replace, string)
print(new_string)

# Output: abc12de23f456




import re

# multiline string
string = 'abc 12\
de 23 \n f45 6'

# matches all whitespace characters
pattern = '\s+'
replace = ''

new_string = re.sub(r'\s+', replace, string, 1)
print(new_string)

# Output:
# abc12de 23
# f45 6


# Program to remove all whitespaces
import re

# multiline string
string = 'abc 12\
de 23 \n f45 6'

# matches all whitespace characters
pattern = '\s+'

# empty string
replace = ''

new_string = re.subn(pattern, replace, string)
print(new_string)

# Output: ('abc12de23f456', 4)



import re

string = "Python is fun"

# check if 'Python' is at the beginning
match = re.search('\APython', string)

if match:
  print("pattern found inside the string")
else:
  print("pattern not found")

# Output: pattern found inside the string



import re

string = '39801 356, 2102 1111'

# Three digit number followed by space followed by two digit number
pattern = '(\d{3}) (\d{2})'

# match variable contains a Match object.
match = re.search(pattern, string)

if match:
  print(match.group())
else:
  print("pattern not found")

# Output: 801 35



me t
['Geeks', 'Geeks', 'geeks']
Range <re.Match object; span=(0, 1), match='x'>
None
None
Geeks: <re.Match object; span=(0, 5), match='Geeks'>
GeeksforGeeks: None
Beg. of String: None
Beg. of String: <re.Match object; span=(0, 4), match='Geek'>
End of String: <re.Match object; span=(31, 36), match='Geeks'>
Beg. of String: None
Beg. of String: <re.Match object; span=(0, 4), match='Geek'>
End of String: <re.Match object; span=(31, 36), match='Geeks'>
Color <re.Match object; span=(0, 5), match='color'>
Colour <re.Match object; span=(0, 6), match='colour'>
Date{mm-dd-yyyy}: <re.Match object; span=(0, 10), match='18-08-2020'>
Three Digit: <re.Match object; span=(0, 3), match='189'>
Four Digit: <re.Match object; span=(0, 4), match='2145'>
<re.Match object; span=(0, 1), match='5'>
<re.Match object; span=(0, 1), match='5'>
<re.Match object; span=(0, 10), match='26-08-2020'>
26-08-2020
('26', '08', '2020')
2020
08
{'dd': '26', 'mm': '08', 'yyyy': '2020'}
negation: None
lookahead: <re.Match obj

  pattern = '\d+'
  pattern = '\d+'
  pattern = '\d+'
  pattern = '\s+'
  pattern = '\s+'
  pattern = '\s+'
  match = re.search('\APython', string)
  pattern = '(\d{3}) (\d{2})'


# **Used to indicate a set of characters. In a set:**

Characters can be listed individually, e.g. [amk] will match 'a', 'm', or 'k'.

Ranges of characters can be indicated by giving two characters and separating them by a '-', for example [a-z] will match any lowercase ASCII letter, [0-5][0-9] will match all the two-digits numbers from 00 to 59, and [0-9A-Fa-f] will match any hexadecimal digit. If - is escaped (e.g. [a\-z]) or if it’s placed as the first or last character (e.g. [-a] or [a-]), it will match a literal '-'.

Special characters except backslash lose their special meaning inside sets. For example, [(+*)] will match any of the literal characters '(', '+', '*', or ')'.

Backslash either escapes characters which have special meaning in a set such as '-', ']', '^' and '\\' itself or signals a special sequence which represents a single character such as \xa0 or \n or a character class such as \w or \S (defined below). Note that \b represents a single “backspace” character, not a word boundary as outside a set, and numeric escapes such as \1 are always octal escapes, not group references. Special sequences which do not match a single character such as \A and \z are not allowed.

Characters that are not within a range can be matched by complementing the set. If the first character of the set is '^', all the characters that are not in the set will be matched. For example, [^5] will match any character except '5', and [^^] will match any character except '^'. ^ has no special meaning if it’s not the first character in the set.

To match a literal ']' inside a set, precede it with a backslash, or place it at the beginning of the set. For example, both [()[\]{}] and []()[{}] will match a right bracket, as well as left bracket, braces, and parentheses.

Support of nested sets and set operations as in Unicode Technical Standard #18 might be added in the future. This would change the syntax, so to facilitate this change a FutureWarning will be raised in ambiguous cases for the time being. That includes sets starting with a literal '[' or containing literal character sequences '--', '&&', '~~', and '||'. To avoid a warning escape them with a backslash.