In [30]:
from re import *

In [21]:
# Regular expressions use the backslash character ('\') to indicate special forms
# or to allow special characters to be used without invoking their special meaning.

# The solution is to use Python’s raw string notation for regular expression patterns;
# backslashes are not handled in any special way in a string literal prefixed with 'r'. So r"\n" is a two-character string containing '\' and 'n'

# Some characters, like '|' or '(', are special. Special characters either stand for classes of ordinary characters,
# or affect how the regular expressions around them are interpreted.

# Repetition operators or quantifiers (*, +, ?, {m,n}, etc) cannot be directly nested.
# This avoids ambiguity with the non-greedy modifier suffix ?, and with other modifiers in other implementations.
# To apply a second repetition to an inner repetition, parentheses may be used.
# For example, the expression (?:a{6})* matches any multiple of six 'a' characters.

"""
.
matches any character except a newline
^
Matches the start of the string, and in MULTILINE mode also matches immediately after each newline.
$
Matches the end of the string or just before the newline at the end of the string
*
0 or more
+
1 or more
?
Causes the resulting RE to match 0 or 1 repetitions of the preceding RE. ab? will match either ‘a’ or ‘ab’.
{m}
Specifies that exactly m copies of the previous RE should be matched
{m,n}
Causes the resulting RE to match from m to n repetitions of the preceding RE
\
Either escapes special characters (permitting you to match characters like '*', '?', and so forth),
or signals a special sequence; special sequences are discussed below.
[]
Used to indicate a set of characters.
|
A|B, where A and B can be arbitrary REs, creates a regular expression that will match either A or B
(...)
Matches whatever regular expression is inside the parentheses, and indicates the start and end of a group;
the contents of a group can be retrieved after a match has been performed,
and can be matched later in the string with the \number special sequence, described below.
To match the literals '(' or ')', use \( or \), or enclose them inside a character class: [(], [)].
(?...)
This is an extension notation (a '?' following a '(' is not meaningful otherwise).
The first character after the '?' determines what the meaning and further syntax of the construct is.
Extensions usually do not create a new group; (?P<name>...) is the only exception to this rule.Following are the currently supported extensions.
(?P<name>...)
Similar to regular parentheses, but the substring matched by the group is accessible via the symbolic group name name.
Group names must be valid Python identifiers, and each group name must be defined only once within a regular expression.
A symbolic group is also a numbered group, just as if the group were not named.
(?P=name)
A backreference to a named group; it matches whatever text was matched by the earlier group named name.
\A
Matches only at the start of the string.
\b
Matches the empty string, but only at the beginning or end of a word
\B
Matches the empty string, but only when it is not at the beginning or end of a word
\d
Matches any Unicode decimal digit: [0-9]
\D
Matches any character which is not a decimal digit: [^0-9]
\s
Matches Unicode whitespace characters (which includes [ \t\n\r\f\v]
\S
Matches any character which is not a whitespace character: [^ \t\n\r\f\v]
\w
Matches Unicode word characters: [a-zA-Z0-9_]
\W
Matches any character which is not a word character: [^a-zA-Z0-9_]
\Z
Matches only at the end of the string.
"""

"\n.\nmatches any character except a newline\n^\nMatches the start of the string, and in MULTILINE mode also matches immediately after each newline.\n$\nMatches the end of the string or just before the newline at the end of the string\n*\n0 or more\n+\n1 or more\n?\nCauses the resulting RE to match 0 or 1 repetitions of the preceding RE. ab? will match either ‘a’ or ‘ab’.\n{m}\nSpecifies that exactly m copies of the previous RE should be matched\n{m,n}\nCauses the resulting RE to match from m to n repetitions of the preceding RE\nEither escapes special characters (permitting you to match characters like '*', '?', and so forth),\nor signals a special sequence; special sequences are discussed below.\n[]\nUsed to indicate a set of characters.\n|\nA|B, where A and B can be arbitrary REs, creates a regular expression that will match either A or B\n(...)\nMatches whatever regular expression is inside the parentheses, and indicates the start and end of a group;\nthe contents of a group can be

In [22]:
match
search
sub
compile
finditer
findall
split
fullmatch

<function re.fullmatch(pattern, string, flags=0)>

In [23]:
search(r'[a-zA-Z0-9_\.]{3,64}@[a-zA-Z0-9_\.]{1,64}\.[a-zA-Z0-9_\.]{2,64}', 'abcdefg@hi.jklmnopqrstuvwxyzA amir4vx@gmail.google.com BCD1234567890 <a> hi a </a> end-.')

<re.Match object; span=(0, 29), match='abcdefg@hi.jklmnopqrstuvwxyzA'>

In [24]:
match(r'[a-zA-Z0-9_\.]{3,64}@[a-zA-Z0-9_\.]{1,64}\.[a-zA-Z0-9_\.]{2,64}', 'amir4vx@gmail.google.com')

<re.Match object; span=(0, 24), match='amir4vx@gmail.google.com'>

In [25]:
findall(r'[a-zA-Z0-9_\.]{3,64}@[a-zA-Z0-9_\.]{1,64}\.[a-zA-Z0-9_\.]{2,64}', 'abcdefg@hi.jklmnopqrstuvwxyzA amir4vx@gmail.google.com BCD1234567890 <a> hi a </a> end-.')

['abcdefg@hi.jklmnopqrstuvwxyzA', 'amir4vx@gmail.google.com']

In [26]:
split(r'[a-zA-Z0-9_\.]{3,64}@[a-zA-Z0-9_\.]{1,64}\.[a-zA-Z0-9_\.]{2,64}', 'abcdefg@hi.jklmnopqrstuvwxyzA amir4vx@gmail.google.com BCD1234567890 <a> hi a </a> end-.')

['', ' ', ' BCD1234567890 <a> hi a </a> end-.']

In [27]:
'''
[]	A set of characters : "[a-m]"	
\	Signals a special sequence (can also be used to escape special characters) : "\d"	
.	Any character (except newline character) : "he..o"	
^	Starts with : "^hello"	
$	Ends with : "planet$"	
*	Zero or more occurrences : "he.*o"	
+	One or more occurrences : "he.+o"	
?	Zero or one occurrences : "he.?o"	
{}	Exactly the specified number of occurrences : "he.{2}o"	
|	Either or : "falls|stays"	
()	Capture and group
'''

'\n[]\tA set of characters : "[a-m]"\t\n\\\tSignals a special sequence (can also be used to escape special characters) : "\\d"\t\n.\tAny character (except newline character) : "he..o"\t\n^\tStarts with : "^hello"\t\n$\tEnds with : "planet$"\t\n*\tZero or more occurrences : "he.*o"\t\n+\tOne or more occurrences : "he.+o"\t\n?\tZero or one occurrences : "he.?o"\t\n{}\tExactly the specified number of occurrences : "he.{2}o"\t\n|\tEither or : "falls|stays"\t\n()\tCapture and group\n'

In [31]:
txt = "The rain in Spain"
x = findall("ai", txt)
print(x)

['ai', 'ai']


In [33]:
txt = "The rain in Spain"
x = findall("Portugal", txt)
print(x)

[]


In [34]:
txt = "The rain in Spain"
x = search("\s", txt)
x

<re.Match object; span=(3, 4), match=' '>

In [35]:
txt = "The rain in Spain"
x = split("\s", txt)
print(x)

['The', 'rain', 'in', 'Spain']


In [36]:
txt = "The rain in Spain"
x = split("\s", txt, 1)
print(x)

['The', 'rain in Spain']


In [37]:
txt = "The rain in Spain"
x = sub("\s", "9", txt)
print(x)

The9rain9in9Spain


In [38]:
txt = "The rain in Spain"
x = sub("\s", "9", txt, 2)
print(x)

The9rain9in Spain


In [42]:
txt = "The rain in Spain"
x = sub("[a-z]?", "x , ", txt)
print(x)

x , Tx , x , x ,  x , x , x , x , x ,  x , x , x ,  x , Sx , x , x , x , x , 


In [43]:
txt = "The rain in Spain"
x = search("ai", txt)
print(x)

<re.Match object; span=(5, 7), match='ai'>


In [44]:
txt = "The rain in Spain"
x = search(r"\bS\w+", txt)
print(x.span())

(12, 17)


In [45]:
txt = "The rain in Spain"
x = search(r"\bS\w+", txt)
print(x.string)

The rain in Spain


In [46]:
txt = "The rain in Spain"
x = search(r"\bS\w+", txt)
print(x.group())

Spain
