# Regular Expressions

In [1]:
import re

In [2]:
patterns = ['term1', 'term2']

In [3]:
text = 'This is a string with term1, but not the other term.'

In [4]:
re.search('hello', 'hello world!')

<re.Match object; span=(0, 5), match='hello'>

In [5]:
for pattern in patterns:
    print('Searching for "{}" in: \n"{}"'.format(pattern, text))
    
    if re.search(pattern, text):
        print('\n\tMatch was found.\n')
    else:
        print('\n\tNo Match was found.\n')    

Searching for "term1" in: 
"This is a string with term1, but not the other term."

	Match was found.

Searching for "term2" in: 
"This is a string with term1, but not the other term."

	No Match was found.



In [6]:
re.search('h', 'w')

In [7]:
print(re.search('h', 'w'))

None


In [8]:
match = re.search(patterns[0], text)

In [9]:
type(match)

re.Match

In [10]:
match.start()

22

In [11]:
match.end()

27

In [12]:
split_term = '@'

phrase = 'What is your email? is it hello@gmail.com?'

In [13]:
re.split(split_term, phrase)

['What is your email? is it hello', 'gmail.com?']

In [14]:
'hello world'.split()

['hello', 'world']

In [15]:
re.findall('match', 'Here is one match, here is another match')

['match', 'match']

In [1]:
def multi_re_find(patterns, phrase):
    for pattern in patterns:
        print('Searching the phrase using the re check: %r' %pattern)
        print(re.findall(pattern, phrase))
        print('\n')

# Regular Expression HOWTO
[python docs](https://docs.python.org/3/howto/regex.html)

In [2]:
import re

p = re.compile(r'ab*')

In [3]:
p

re.compile(r'ab*', re.UNICODE)

In [4]:
p = re.compile(r'ab*', re.IGNORECASE)

In [5]:
p

re.compile(r'ab*', re.IGNORECASE|re.UNICODE)

In [7]:
print(p.match(''))

None


In [9]:
print(p.match('abbbbb'))

<re.Match object; span=(0, 6), match='abbbbb'>


In [10]:
p = re.compile(r'\d+')
p.findall('12 drummers drumming, 11 pipers piping, 10 lords a-leaping')

['12', '11', '10']

In [17]:
p = re.compile(r'(\d+)\s(\w+)\s(\w+)')
p.findall('12 drummers drumming, 11 pipers piping, 10 lords a-leaping')

[('12', 'drummers', 'drumming'),
 ('11', 'pipers', 'piping'),
 ('10', 'lords', 'a')]

In [19]:
iterator = p.finditer('12 drummers drumming, 11 ... 10 ...')
iterator  

for match in iterator:
    print(match.span())


(0, 20)


In [23]:
print(re.match(r'From\s+', 'Fromage amk'))

print(re.match(r'From\s+', 'From amk Thu May 14 19:12:10 1998'))

None
<re.Match object; span=(0, 5), match='From '>


In [25]:
charref = re.compile(r"""
 &[#]                # Start of a numeric entity reference
 (
     0[0-7]+         # Octal form
   | [0-9]+          # Decimal form
   | x[0-9a-fA-F]+   # Hexadecimal form
 )
 ;                   # Trailing semicolon
""", re.VERBOSE)

In [27]:
print(re.search('^From', 'From Here to Eternity'))  
print(re.search('^From', 'Reciting From Memory'))

<re.Match object; span=(0, 4), match='From'>
None


In [29]:
print(re.search('}$', '{block}'))  
print(re.search('}$', '{block} '))
print(re.search('}$', '{block}\n'))

<re.Match object; span=(6, 7), match='}'>
None
<re.Match object; span=(6, 7), match='}'>


In [31]:
re.match('hello ([a-z]+)', 'Hello World!', re.IGNORECASE)

<re.Match object; span=(0, 11), match='Hello World'>

In [33]:
p = re.compile(r'\bclass\b')
print(p.search('no class at all'))
print(p.search('the declassified algorithm'))
print(p.search('one subclass is'))

<re.Match object; span=(3, 8), match='class'>
None
None


In [35]:
p = re.compile('(a)b')
m = p.match('ab')
m.group()

m.group(0)

'ab'

In [37]:
p = re.compile(r'\b(\w+)\s+\1\b')
p.search('Paris in the the spring').group()

'the the'

## Non-capturing and Named Groups

In [39]:
m = re.match("([abc])+", "abc")
print(m.groups())

m = re.match("(?:[abc])+", "abc")
print(m.groups())

('c',)
()


In [41]:
p = re.compile(r'(?P<word>\b\w+\b)')
m = p.search('(((( Lots of punctuation )))')

print(m.group('word'))
print(m.group(1))

Lots
Lots


In [43]:
m = re.match(r'(?P<first>\w+) (?P<last>\w+)', 'Jane Doe')
m.groupdict()

{'first': 'Jane', 'last': 'Doe'}

## Modifying Strings

In [46]:
p = re.compile(r'\W+')
print(p.split('This is a test, short and sweet, of split().'))
print(p.split('This is a test, short and sweet, of split().', 3))

['This', 'is', 'a', 'test', 'short', 'and', 'sweet', 'of', 'split', '']
['This', 'is', 'a', 'test, short and sweet, of split().']


In [47]:
re.split(r'[\W]+', 'Words, words, words.')

['Words', 'words', 'words', '']

In [48]:
re.split(r'([\W]+)', 'Words, words, words.')

['Words', ', ', 'words', ', ', 'words', '.', '']

In [49]:
re.split(r'[\W]+', 'Words, words, words.', 1)

['Words', 'words, words.']

In [50]:
p = re.compile('(blue|white|red)')
p.sub('colour', 'blue socks and red shoes')

'colour socks and colour shoes'

In [51]:
p.sub('colour', 'blue socks and red shoes', count=1)

'colour socks and red shoes'