# Regular Expressions & Python

In [6]:
import re
pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
old = 'IPs : 173.254.28.78 or 167.81.178.97'

new_ip = '127.0.0.1'

replaced = re.sub(pattern, new_ip, old)

print('replaced = %s' %(replaced))

replaced = IPs : 127.0.0.1 or 127.0.0.1


# Pluralizing Nouns with Regular Expressions

In [12]:
import re
re.search('[abc]', 'Space')

<re.Match object; span=(2, 3), match='a'>

In [13]:
re.sub('[abc]', 'o', 'Space')

'Spooe'

In [14]:
re.sub('[aeu]', 'n', re.sub('[abc]', 'o', 'Space'))

'Spoon'

# Pluralize nouns with regular expressions

In [15]:
def pluralize(noun):          
    if re.search('[sxz]$', noun):     
        return re.sub('$', 'es', noun)
    elif re.search('[^aeioudgkprt]h$', noun):
        return re.sub('$', 'es', noun)       
    elif re.search('[^aeiou]y$', noun):      
        return re.sub('y$', 'ies', noun)     
    else:
        return noun + 's'

In [20]:
re.search('[^aeiou]y$', 'emergency')

<re.Match object; span=(7, 9), match='cy'>

In [25]:
re.sub('y$', 'ies', 'emergency')

'emergencies'

In [26]:
re.sub('y$', 'ies', 'semitransparency')

'semitransparencies'

# Parsing Phone Number with Regular Expressions

In [27]:
import re
pattern = re.compile(r'^(\d{3})-(\d{3})-(\d{4})$')
pattern.search('415-867-5309')

<re.Match object; span=(0, 12), match='415-867-5309'>

In [28]:
pattern.search('415-867-5309').groups()

('415', '867', '5309')

In [32]:
pattern = re.compile(r'^(\d{3})-(\d{3})-(\d{4})-(\d+)$')
pattern.search('415-867-5309-9875').groups()

('415', '867', '5309', '9875')

In [33]:
pattern = re.compile(r'^(\d{3})\D+(\d{3})\D+(\d{4})\D+(\d+)$')
pattern.search('415 867 5309 9999').groups()

('415', '867', '5309', '9999')

# Regular expressions are compiled into pattern objects

In [35]:
import re
 
# \w is equivalent to [a-zA-Z0-9_].
p = re.compile('\w')
print(p.findall("He said * in some_lang."))
 
# \w+ matches to group of alphanumeric character.
p = re.compile('\w+')
print(p.findall("I went to him at 11 A.M., he \
said *** in some_language."))
 
# \W matches to non alphanumeric characters.
p = re.compile('\W')
print(p.findall("he said *** in some_language."))

['H', 'e', 's', 'a', 'i', 'd', 'i', 'n', 's', 'o', 'm', 'e', '_', 'l', 'a', 'n', 'g']
['I', 'went', 'to', 'him', 'at', '11', 'A', 'M', 'he', 'said', 'in', 'some_language']
[' ', ' ', '*', '*', '*', ' ', ' ', '.']


In [36]:
import re
p = re.compile('[a-e]')
print(p.findall("Aye, said Mr. Gibenson Stark"))

['e', 'a', 'd', 'b', 'e', 'a']
