# Regular Expression
* https://docs.python.org/3/library/re.html

# Example in 'Python for DevOps'

## Search

In [1]:
cc_list = '''Ezra Koenig <ekoenig@vpwk.com>,
Rostam Batmanglij <rostam@vpwk.com>,
Chris Tomson <ctomson@vpwk.com,
Bobbi Baio <bbaio@vpwk.com'''

In [3]:
'Rostam' in cc_list

True

In [4]:
import re

# found: return the first match found
re.search(r'Rostam', cc_list)

<re.Match object; span=(32, 38), match='Rostam'>

In [8]:
# not found
print(re.search(r'Rostam2', cc_list))

None


## Character Sets, Character Classes

In [9]:
re.search(r'[R,B]obb[i,y]', cc_list)

<re.Match object; span=(101, 106), match='Bobbi'>

In [10]:
# range
re.search(r'Chr[a-z][a-z]', cc_list)

<re.Match object; span=(69, 74), match='Chris'>

In [11]:
# +, {n}, \
print(re.search(r'[A-Za-z]+', cc_list))
print(re.search(r'[A-Za-z]{6}', cc_list))
print(re.search(r'[A-Za-z]+@[a-z]+\.[a-z]+', cc_list))

<re.Match object; span=(0, 4), match='Ezra'>
<re.Match object; span=(5, 11), match='Koenig'>
<re.Match object; span=(13, 29), match='ekoenig@vpwk.com'>


In [12]:
# character classes
# \w
print(re.search(r'\w+', cc_list))
print(re.search(r'\w+\@\w+\.\w+', cc_list))

<re.Match object; span=(0, 4), match='Ezra'>
<re.Match object; span=(13, 29), match='ekoenig@vpwk.com'>


## Groups, Named Groupds

In [14]:
# group
matched = re.search(r'(\w+)\@(\w+)\.(\w+)', cc_list)
matched, matched.group(0), matched.group(1), matched.group(2), matched.group(3)

(<re.Match object; span=(13, 29), match='ekoenig@vpwk.com'>,
 'ekoenig@vpwk.com',
 'ekoenig',
 'vpwk',
 'com')

In [15]:
matched.group(4)

IndexError: no such group

In [17]:
# named group: ?P<NAME>
# 引用命名的组: ?P<quote>
#  在同一个模式中: (?P<quote>['"]).*?(?P=quote), \1
#  处理匹配对象m时: m.group('quote'), m.end('quote'), ...
#  re.sub()的替换字符串: \g<quote>, \g<1>, \1
matched = re.search(r'(?P<name>\w+)\@(?P<SLD>\w+)\.(?P<TLD>\w+)', cc_list)
matched, matched.group('name'), matched.group('SLD'), matched.group('TLD')

(<re.Match object; span=(13, 29), match='ekoenig@vpwk.com'>,
 'ekoenig',
 'vpwk',
 'com')

In [18]:
matched.group('NONE')

IndexError: no such group

## Find All, Find Iterator

In [20]:
re.findall(r'\w+\@\w+\.\w+', cc_list)

['ekoenig@vpwk.com', 'rostam@vpwk.com', 'ctomson@vpwk.com', 'bbaio@vpwk.com']

In [21]:
# with group
re.findall(r'(\w+)\@(\w+)\.(\w+)', cc_list)

[('ekoenig', 'vpwk', 'com'),
 ('rostam', 'vpwk', 'com'),
 ('ctomson', 'vpwk', 'com'),
 ('bbaio', 'vpwk', 'com')]

In [22]:
# return iterator
matched = re.finditer(r'\w+\@\w+\.\w+', cc_list)
matched, next(matched)

(<callable_iterator at 0x23f1f87a3e0>,
 <re.Match object; span=(13, 29), match='ekoenig@vpwk.com'>)

In [28]:
re.Match?

[1;31mInit signature:[0m [0mre[0m[1;33m.[0m[0mMatch[0m[1;33m([0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
The result of re.match() and re.search().
Match objects always have a boolean value of True.
[1;31mFile:[0m           d:\software\miniconda3\lib\re\__init__.py
[1;31mType:[0m           type
[1;31mSubclasses:[0m     

In [31]:
matched = re.finditer(r'\w+\@\w+\.\w+', cc_list)
for m in matched:
  print(m.group(0))

ekoenig@vpwk.com
rostam@vpwk.com
ctomson@vpwk.com
bbaio@vpwk.com


In [24]:
matched = re.finditer(r"(?P<name>\w+)\@(?P<SLD>\w+)\.(?P<TLD>\w+)", cc_list)
for m in matched:
    print(m.groupdict())

{'name': 'ekoenig', 'SLD': 'vpwk', 'TLD': 'com'}
{'name': 'rostam', 'SLD': 'vpwk', 'TLD': 'com'}
{'name': 'ctomson', 'SLD': 'vpwk', 'TLD': 'com'}
{'name': 'bbaio', 'SLD': 'vpwk', 'TLD': 'com'}


## Substitution

In [33]:
# \d to #
re.sub(r"\d", "#", "The passcode you entered was  09876")

'The passcode you entered was  #####'

In [35]:
# 替换字符串中引用组
re.sub(r"(?P<name>\w+)\@(?P<SLD>\w+)\.(?P<TLD>\w+)",
  r"\g<TLD>.\g<SLD>.\g<name>", cc_list)

'Ezra Koenig <com.vpwk.ekoenig>,\nRostam Batmanglij <com.vpwk.rostam>,\nChris Tomson <com.vpwk.ctomson,\nBobbi Baio <com.vpwk.bbaio'

## Compiling

In [36]:
# 编译
regex = re.compile(r'\w+\@\w+\.\w+')
regex, regex.search(cc_list)

(re.compile(r'\w+\@\w+\.\w+', re.UNICODE),
 <re.Match object; span=(13, 29), match='ekoenig@vpwk.com'>)

# More
TODO