### Re

A regular expression (shortened as regex [...]) is a sequence of characters that specifies a search pattern in text. [...] used by string-searching algorithms for "find" or "find and replace" operations on strings, or for input validation.

Links: https://www.pythoncheatsheet.org/cheatsheet/regular-expressions


1. Import the regex module with import re.
2. Create a Regex object with the re.compile() function. (Remember to use a raw string.)
3. Pass the string you want to search into the Regex object’s search() method. This returns a Match object.
4. Call the Match object’s group() method to return a string of the actual matched text.

In [1]:
import re

In [2]:
str = 'abcdab'
pattern = 'ab'

res = re.match(pattern, str)

print(res)
print(res.group())
print(re.findall(pattern, str))

<re.Match object; span=(0, 2), match='ab'>
ab
['ab', 'ab']


In [106]:
print(re.match('.', 'a'))
print(re.match('.', 'D'))
print(re.match('.', '\n')) # any character, except newline characters

print('===============')

print(re.match('\d\d', '122D'))
print(re.match('\D', '122D'))

print('===============')

print(re.match('\s', '\nD'))
print(re.match('\s', '\tD'))
print(re.match('\s', ' D'))
print(re.match('\s', '\\D'))
print(re.match('\S', ' 122D'))

print('===============')

print(re.match('\w', 'sad122D'))
print(re.match('\w', 'wer122D'))
print(re.match('\W', '\nsad122D'))
print(re.match('\W', '4sad122D'))
print(re.match('\W', '*sad122D'))
print(re.match('\W', '%sad122D'))



<re.Match object; span=(0, 1), match='a'>
<re.Match object; span=(0, 1), match='D'>
None
<re.Match object; span=(0, 2), match='12'>
None
<re.Match object; span=(0, 1), match='\n'>
<re.Match object; span=(0, 1), match='\t'>
<re.Match object; span=(0, 1), match=' '>
None
None
<re.Match object; span=(0, 1), match='s'>
<re.Match object; span=(0, 1), match='w'>
<re.Match object; span=(0, 1), match='\n'>
None
<re.Match object; span=(0, 1), match='*'>
<re.Match object; span=(0, 1), match='%'>


In [46]:
print(re.match('[abcd]', 'ab'))
print(re.match('[abcd]', 'zb'))
print(re.findall('[abcd]', 'zb'))
print(re.match('[a-e]', 'eb'))
print(re.match('[0-9]', '3eb'))
print(re.match('[A-Z]', 'Seb'))

print('==============')

print(re.match('[^a-e]', 'zb')) # ^: not
print(re.match('[a-zA-Z]', 'Seb'))
print(re.match('[a-zA-Z]', '2Seb'))
print(re.match('[0-9a-zA-Z]', '2Seb'))

print('==============')

print(re.match('[0-9]', '33'))
print(re.match('[0-9][0-9]', '33'))
print(re.match('[0-9][a-z]', '3a'))
print(re.match('\w\w', '55'))
print(re.match('[\w\W][\w\W][\w\W][\w\W][\w\W]', '我aD5&'))





<re.Match object; span=(0, 1), match='a'>
None
['b']
<re.Match object; span=(0, 1), match='e'>
<re.Match object; span=(0, 1), match='3'>
<re.Match object; span=(0, 1), match='S'>
<re.Match object; span=(0, 1), match='z'>
<re.Match object; span=(0, 1), match='S'>
None
<re.Match object; span=(0, 1), match='2'>
<re.Match object; span=(0, 1), match='3'>
<re.Match object; span=(0, 2), match='33'>
<re.Match object; span=(0, 2), match='3a'>
<re.Match object; span=(0, 2), match='55'>
<re.Match object; span=(0, 5), match='我aD5&'>


In [75]:
# how to use regular expression to deal with path
str1 = 'c:\\\python\\ne\\test.txt'
print(str1)

str2 = r'c:\\python\ne\test.txt'
print(str2)


print(re.match('\d', '\d')) # \d is a regular expression, \d is a path, which is processed into \\d
print(re.match('\\d', '\d')) # The first one is a regular expression, the first '\' wouble translate the second '\', the last \d is a path, which is processed into \\d.  \d is not \\d
print(re.match('\\\d', '\d')) # \\d \\d 
print(re.match('\\\\d', '\d')) # \\d \\d 
# print(re.match(r'\d', r'\d'))

print('=====================')

# 1. treat it as a python string, 2. and then process it as a regular expression
print(re.match('\n', '\n')) 

print('=====================')

print(re.match(r'c:\\python\\ne\\test.txt', 'c:\\python\\ne\\test.txt'))


c:\\python\ne\test.txt
c:\\python\ne\test.txt
None
None
<re.Match object; span=(0, 2), match='\\d'>
<re.Match object; span=(0, 2), match='\\d'>
<re.Match object; span=(0, 1), match='\n'>
<re.Match object; span=(0, 21), match='c:\\python\\ne\\test.txt'>


In [86]:
# Matching a number
# number: 585-555-1212

print(re.match('\d\d\d\d\d\d\d\d\d\d', '5855551212'))

print(re.match('\d*', '5855551212')) # zero or more digits
print(re.match('\d+', 'a5855551212')) # at least one digit
print(re.match('\d?', 'a5855551212')) # zero or one digit
print(re.match('\d?', '5855551212')) # zero or one digit

print(re.match('\d{4}', '5855551212')) # {n} exactly n times
print(re.match('\d{4,}', '5855551212')) # {n,} n or more times
print(re.match('\d{,4}', '5855551212')) # {,n} up to n times
print(re.match('\d{2,6}', '5855551212')) # {m,n} at least m and at most n times


<re.Match object; span=(0, 10), match='5855551212'>
<re.Match object; span=(0, 10), match='5855551212'>
None
<re.Match object; span=(0, 0), match=''>
<re.Match object; span=(0, 1), match='5'>
<re.Match object; span=(0, 4), match='5855'>
<re.Match object; span=(0, 10), match='5855551212'>
<re.Match object; span=(0, 4), match='5855'>
<re.Match object; span=(0, 6), match='585555'>


In [89]:
tel = '58555551212asdas1234'
tel01 = '58555551212'


print(re.match('^585[0-9][0-9][0-9][0-9]\d{4}$', tel))
print(re.match('^585[0-9][0-9][0-9][0-9]\d{4}$', tel01))

None
<re.Match object; span=(0, 11), match='58555551212'>


In [101]:
str1 = 'relod readkn asdre sdaresd asdewfdsresadaf!'

print(re.findall(r're\b', str1)) # \b: word boundary
print(re.findall('re\\b', str1)) # the end with re
print(re.findall('\\bre', str1)) # the start with re

print(re.findall('\\Bre', str1)) # \B: not word boundary

['re']
['re']
['re', 're']
['re', 're', 're']


In [111]:
# match a data
t = '2023-12-34'
print(re.match('2023-12-31', '2023-12-31'))
print(re.match('\d{4}-12-31', '2023-12-31'))
print(re.match('\d{4}-\d{2}-\d{2}', t))


print(re.match('\d{4}-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t))




<re.Match object; span=(0, 10), match='2023-12-31'>
<re.Match object; span=(0, 10), match='2023-12-31'>
<re.Match object; span=(0, 10), match='2023-12-34'>
None


In [163]:
t = '2023-12-24'
print(re.match('2023-12-31', '2023-12-31'))
print(re.match('\d{4}-12-31', '2023-12-31'))
print(re.match('\d{4}-\d{2}-\d{2}', t))

print('================')

print(re.match('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t).group())
print(re.match('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t).group(1))
print(re.match('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t).group(2))
print(re.match('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t).group(3))

<re.Match object; span=(0, 10), match='2023-12-31'>
<re.Match object; span=(0, 10), match='2023-12-31'>
<re.Match object; span=(0, 10), match='2023-12-24'>
2023-12-24
2023
12
24


In [161]:
print(re.match('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t).groups())
print(re.match('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t).start())
print(re.match('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t).end())
print(re.match('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t).span())
print(re.match('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t).string)



('2023', '12', '24')
0
10
(0, 10)
2023-12-24


In [164]:
print(re.search('a', 'dsafadsfsd'))


<re.Match object; span=(2, 3), match='a'>


In [165]:
print(re.findall('a', 'dsafadsfsd'))

['a', 'a']


In [167]:
print(re.split('(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|^3[0-1])', t))
print(re.split('-', t))

['', '2023', '12', '24', '']
['2023', '12', '24']


In [169]:
print(re.sub('-','/', t))
print(re.sub('-','/', t, count=1))


2023/12/24
2023/12-24


In [145]:
con = '''<title>this is python</title>'''
print(re.match('<title>([\w\W]*)</title>', con))
print(re.match('<title>([\w\W]*)</title>', con).group())
print(re.match('<title>([\w\W]*)</title>', con).group(1))

print(re.match('<(\w+)>([\w\W]*)</(\w+)>', con))
print(re.match('<(\w+)>([\w\W]*)</(\w+)>', con).group())

print(re.match(r'<(\w+)>([\w\W]*)</(\1)>', con).group(1))

print('======================')

# alias
print(re.match(r'<(?P<name1>\w+)>([\w\W]*)</(?P=name1)>', con).group())
print(re.match(r'<(?P<name1>\w+)>([\w\W]*)</(?P=name1)>', con).group('name1'))

<re.Match object; span=(0, 29), match='<title>this is python</title>'>
<title>this is python</title>
this is python
<re.Match object; span=(0, 29), match='<title>this is python</title>'>
<title>this is python</title>
title
<title>this is python</title>
title


In [153]:
content = '''<title>this is python</title>'''

# compile(pattern, flags=0)

res = re.compile(r'<title>([\w\W]*)</title>')

print(res.match(content))


res1 = re.compile(r'Aa', flags=re.I) # re.I case insensitive
print(res1.match('aa'))

<re.Match object; span=(0, 29), match='<title>this is python</title>'>
<re.Match object; span=(0, 2), match='aa'>


In [114]:
phone_num_regex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
mo = phone_num_regex.search('My number is 415-555-4242.')
mo

<re.Match object; span=(13, 25), match='415-555-4242'>

In [115]:
print(f'Phone number found: {mo.group()}')

Phone number found: 415-555-4242


In [128]:
# Grouping with parentheses
phone_num_regex = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')
mo = phone_num_regex.search('My number is 415-555-4242.')

print(mo.group())
print(mo.group(1))
print(mo.group(2))
print(mo.group(3))
print(mo.group(0))


415-555-4242
415
555
4242
415-555-4242


In [150]:
phone_num_regex = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mo = phone_num_regex.search('My number is 415-555-4242.')

print(mo.groups())

area_code, main_number = mo.groups()
print(area_code)
print(main_number)

('415', '555-4242')
415
555-4242


In [155]:
hero_regex = re.compile (r'Batman|Tina Fey')

mo1 = hero_regex.search('Batman and Tina Fey.')
mo1.groups()

()

In [1]:
"""
验证输入用户名和QQ号是否有效并给出对应的提示信息

要求：用户名必须由字母、数字或下划线构成且长度在6~20个字符之间，QQ号是5~12的数字且首位不能为0
"""
import re


def main():
    username = input('请输入用户名: ')
    qq = input('请输入QQ号: ')
    # match函数的第一个参数是正则表达式字符串或正则表达式对象
    # 第二个参数是要跟正则表达式做匹配的字符串对象
    m1 = re.match(r'^[0-9a-zA-Z_]{6,20}$', username)
    if not m1:
        print('请输入有效的用户名.')
    m2 = re.match(r'^[1-9]\d{4,11}$', qq)
    if not m2:
        print('请输入有效的QQ号.')
    if m1 and m2:
        print('你输入的信息是有效的!')


if __name__ == '__main__':
    main()

你输入的信息是有效的!
