### Regex Exercises
For each question below, enter a regex that matches all the positive examples but none of those in negative examples. An example question and its solution is presented below.

You will find there are many regexps that can distinguish these small sets. Programming with regexps is an art...and debugging needs trial and error. 

Refer to the re documentation in 

https://docs.python.org/3/howto/regex.html

and in 

https://docs.python.org/3/howto/regex.html (only as a reference) 

In [1]:
# example question
import re
pos_examples = ['abcdefg','abcde','abcd']
neg_examples = ['abc']

# This function returns True if the pattern meets all requirements, 
# and returns False otherwise.
# You can re-use this function below
def check_correctness(pos_list, neg_list, pattern):
    all_examples = pos_list + neg_list
    found_items = [entry for entry in all_examples if 
                   re.search(pattern, entry)]
    print(found_items) # you could uncomment this line for debugging purpose
    return found_items == pos_list 

pattern = r'abc.+'
check_correctness(pos_examples, neg_examples, pattern)

['abcdefg', 'abcde', 'abcd']


True

In [2]:
# exercise 1 : find words ending in 'ly'
pos_examples = [ 'quickly', 'bravely', 'slowly']
neg_examples = [ 'quick', 'gnu', 'wildebeeste', 'jellyfish']
pattern = r'ly'
check_correctness( pos_examples, neg_examples, pattern )

['quickly', 'bravely', 'slowly', 'jellyfish']


False

In [39]:
# exercise 1
pos_examples = ['abc123xyz','define "123"','var g = 123;']
neg_examples = ['xij456zki','1tom2toni3','123okq,123']
pattern = r'.123.'
check_correctness(pos_examples, neg_examples, pattern)

['abc123xyz', 'define "123"', 'var g = 123;']


True

In [130]:
# exercise 2
pos_examples = ['cat.','896.','?=\.']
neg_examples = ['abc1','7s9?','a b.','hello.']
pattern = r'^[\S]{3}\.'
check_correctness(pos_examples, neg_examples, pattern)

['cat.', '896.', '?=\\.']


True

In [146]:
# exercise 3
pos_examples = ['can','man','fan']
neg_examples = ['dan','ran','pan','world','cannot']
pattern = r'[cmf][a][n]$'
check_correctness(pos_examples, neg_examples, pattern)

['can', 'man', 'fan']


True

In [144]:
# exercise 4
pos_examples = ['hog','bog']
neg_examples = ['dog','boy','ooh','og?']
pattern = r'[hb][o][g]$'
check_correctness(pos_examples, neg_examples, pattern)

['hog', 'bog']


True

In [154]:
# exercise 5
pos_examples = ['Ana','Bob','Cpc']
neg_examples = ['aAx','bCy','cBB','76k','Hello']
pattern = r'[A-Z][a-z]{2}$'
pattern = r'^([A-Z])[a-z][^a-z]\1$'
check_correctness(pos_examples, neg_examples, pattern)

[]


False

In [149]:
# exercise 6
pos_examples = ['wazzzzzup','wazzzup']
neg_examples = ['wazup','wakup','wazdsup','wazzkp']
pattern = r'wazz+up'
check_correctness(pos_examples, neg_examples, pattern)

['wazzzzzup', 'wazzzup']


True

In [170]:
# exercise 7
pos_examples = ['aaaabcc','aabbcc','?]aa74ccdx']
neg_examples = ['a','96aacc']
pattern = r'^[^0-9].'
check_correctness(pos_examples, neg_examples,pattern)

['aaaabcc', 'aabbcc', '?]aa74ccdx']


True

In [186]:
# exercise 8
pos_examples = ['1 file found.','2 files found.','No.24 File found.']
neg_examples = ['No files found.','file 1 is found!','2 birds found?']
pattern = r'[0-9]{1,} [fF]iles* found.$'
check_correctness(pos_examples, neg_examples,pattern)

['1 file found.', '2 files found.', 'No.24 File found.']


True

In [190]:
# exercise 9
pos_examples = ['1.\tabc','2.\tabc','37.\t\t\tabc']
neg_examples = ['4.abc','1.tabc','3\tabc']
pattern = r'[0-9]{1,}[.][\t]{1,}'
check_correctness(pos_examples, neg_examples,pattern)

['1.\tabc', '2.\tabc', '37.\t\t\tabc']


True

In [200]:
# exercise 10
pos_examples = ['pit','spot','spate','slap two','respite']
neg_examples = ['pt','Pot','peat','part']
# pattern = r'[p][a-z ][t]'
pattern = r'p.{1}t'
check_correctness(pos_examples, neg_examples, pattern)

['pit', 'spot', 'spate', 'slap two', 'respite']


True

In [227]:
# to go further, devise some regexes with capture groups ()
# See the section on Grouping in https://docs.python.org/3/howto/regex.html

# write a regex to extract both the telephone and fax numbers from text 
# in the form: 

telnos = 'Tel: 010 345 6789  Fax: 010 344 7474'
pattern = r'(?:Tel: (?P<tel>[0-9 ]+))(?:  Fax: (?P<fax>[0-9 ]+))'

p = re.compile(pattern)
m = p.match(telnos)
print(m.group('tel'))  
# OR print(m.groupdict()['tel'])
print(m.groupdict()['fax'])
# print(m.group(1))
# print(m.group(2))
# print(m.group(1).replace(" ", ""))
# print(m.group(2).replace(" ", ""))

# allow for the spaces within telephone numbers to be optional

010 345 6789
010 344 7474
