# Exercises on regexes - Teacher version
Relevant documentation: https://docs.python.org/3.5/library/re.html, https://docs.python.org/3.5/howto/regex.html#regex-howto

In [33]:
import re
from datetime import date

In [14]:
def text_match(text, pattern):
    return re.search(pattern,  text) is not None

def evaluate_pattern(text_list, pattern, should_match=True):
    for text in text_list:
        match = text_match(text, pattern)
        if not match and should_match:
            print(f"failed to match pattern {pattern} to text {text}")
        elif match and should_match:
            print(f"successfully matched pattern {pattern} to text {text}")
        elif match and not should_match:
            print(f"matches pattern {pattern} to text {text}, but shouldn't")
        elif not match and not should_match:
            print(f"correctly does not match pattern {pattern} to text {text}")

## Exercise 1  
Complete the provided Python program that matches a string that has an a followed by zero or more b's.

In [73]:
pattern = "ab*"

In [74]:
should_match = ["ac","abc","abbc"]
should_not_match = ["bc"]

evaluate_pattern(should_match, pattern, True)
evaluate_pattern(should_not_match, pattern, False)

successfully matched pattern ab* to text ac
successfully matched pattern ab* to text abc
successfully matched pattern ab* to text abbc
correctly does not match pattern ab* to text bc


## Exercise 2
Complete the provided Python program that matches a string that has an a followed by one or more b's.

In [75]:
pattern = "ab+"

In [76]:
should_match = ["abc","abbc"]
should_not_match = ["bc","ac"]

evaluate_pattern(should_match, pattern, True)
evaluate_pattern(should_not_match, pattern, False)

successfully matched pattern ab+ to text abc
successfully matched pattern ab+ to text abbc
correctly does not match pattern ab+ to text bc
correctly does not match pattern ab+ to text ac


## Exercise 3
Complete the provided Python program that matches a string that has an a followed by zero or one b's.

In [77]:
pattern = "ab?"

In [78]:
should_match = ["ac","abc","abbc"]
should_not_match = ["bc"]

evaluate_pattern(should_match, pattern, True)
evaluate_pattern(should_not_match, pattern, False)

successfully matched pattern ab? to text ac
successfully matched pattern ab? to text abc
successfully matched pattern ab? to text abbc
correctly does not match pattern ab? to text bc


## Exercise 4
Complete the provided Python program that matches a string that has an a followed by 3 b's.

In [79]:
pattern = "ab{3}"

In [80]:
should_match = ["aabbbc"]
should_not_match = ["bc","ac","abc","abbc"]

evaluate_pattern(should_match, pattern, True)
evaluate_pattern(should_not_match, pattern, False)

successfully matched pattern ab{3} to text aabbbc
correctly does not match pattern ab{3} to text bc
correctly does not match pattern ab{3} to text ac
correctly does not match pattern ab{3} to text abc
correctly does not match pattern ab{3} to text abbc


## Exercise 5
Complete the provided Python program that matches a string that has an a followed by 2 to 4 b's, followed by 1 or more c's. 

In [81]:
pattern = "ab{2,4}c+"

In [82]:
should_match = ["abbc","aabbbc","aabbbcc"]
should_not_match = ["bc","ac","abc","aabbbbbbbc"]

evaluate_pattern(should_match, pattern, True)
evaluate_pattern(should_not_match, pattern, False)

successfully matched pattern ab{2,4}c+ to text abbc
successfully matched pattern ab{2,4}c+ to text aabbbc
successfully matched pattern ab{2,4}c+ to text aabbbcc
correctly does not match pattern ab{2,4}c+ to text bc
correctly does not match pattern ab{2,4}c+ to text ac
correctly does not match pattern ab{2,4}c+ to text abc
correctly does not match pattern ab{2,4}c+ to text aabbbbbbbc


## Exercise 6
Complete the provided Python program that matches a string, that has an a, followed by anything, ending in a b.

In [83]:
pattern = "a.*b$"

In [84]:
should_match = ["accddbbjjjb","dfracccccdjjjb"]
should_not_match = ["aabbbbd","aabAbbbc"]

evaluate_pattern(should_match, pattern, True)
evaluate_pattern(should_not_match, pattern, False)

successfully matched pattern a.*b$ to text accddbbjjjb
successfully matched pattern a.*b$ to text dfracccccdjjjb
correctly does not match pattern a.*b$ to text aabbbbd
correctly does not match pattern a.*b$ to text aabAbbbc


## Exercise 7
Complete the provided Python program that matches a string, that starts with an a, followed by anything, ending in a b.

In [85]:
pattern = "^a.*b$"

In [86]:
should_match = ["accddbbjjjb"]
should_not_match = ["aabbbbd","aabAbbbc","dfracccccdjjjb"]

evaluate_pattern(should_match, pattern, True)
evaluate_pattern(should_not_match, pattern, False)

successfully matched pattern ^a.*b$ to text accddbbjjjb
correctly does not match pattern ^a.*b$ to text aabbbbd
correctly does not match pattern ^a.*b$ to text aabAbbbc
correctly does not match pattern ^a.*b$ to text dfracccccdjjjb


## Exercise 8
Complete the provided Python program to remove leading zeroes from an IP adress.

In [29]:
import re
ip = "216.08.094.196"
string = re.sub('\.[0]*', '.', ip)

assert(string) == "216.8.94.196"

print(string)

216.8.94.196


## Exercise 9
Complete the provided Python program to look for the words "find", "lorem ipsum", "uninspired" and "home".  

In [30]:
import re
patterns = [ 'find', 'lorem ipsum', 'uninspired', 'home' ]
text = 'I find all these "lorem ipsum"-examples quite uninspired.'
for pattern in patterns:
    print('Searching for "%s" in "%s" ->' % (pattern, text),)
    if re.search(pattern,  text):
        print('Matched!')
    else:
        print('Not Matched!')

Searching for "find" in "I find all these "lorem ipsum"-examples quite uninspired." ->
Matched!
Searching for "lorem ipsum" in "I find all these "lorem ipsum"-examples quite uninspired." ->
Matched!
Searching for "uninspired" in "I find all these "lorem ipsum"-examples quite uninspired." ->
Matched!
Searching for "home" in "I find all these "lorem ipsum"-examples quite uninspired." ->
Not Matched!


## Exercise 10
Complete the provided Python program to extract year, month and date from the provided url. The format in the url will be yyyy/mm/dd, or yyyy/m/d. And convert it to a Python date object

In [43]:
url1= "https://www.washingtonpost.com/news/football-insider/wp/2016/12/24/odell-beckhams-fame-rests-on-one-stupid-little-ball-josh-norman-tells-author/"
url2= "https://www.washingtonpost.com/news/football-insider/wp/2016/9/2/odell-beckhams-fame-rests-on-one-stupid-little-ball-josh-norman-tells-author/"

pattern = "/(\d{4})/(\d{1,2})/(\d{1,2})/"

def extract_date(url, pattern):
    match_obj = re.search(pattern, url)
    if match_obj is not None:
        date_object = date(*[int(i) for i in match_obj.groups()])
        return date_object

assert extract_date(url1, pattern) == date(2016,12,24)
assert extract_date(url2, pattern) == date(2016,9,2)

## Exercise 11
Complete the provided Python program below to convert a date from the yyyy-mm-dd format to dd-mm-yyyy

In [44]:
import re
def change_date_format(dt):
        return re.sub(r'(\d{4})-(\d{1,2})-(\d{1,2})', '\\3-\\2-\\1', dt)
dt1 = "2026-01-02"
print("Original date in YYY-MM-DD Format: ",dt1)
print("New date in DD-MM-YYYY Format: ",change_date_format(dt1))

Original date in YYY-MM-DD Format:  2026-01-02
New date in DD-MM-YYYY Format:  02-01-2026


## Exercise 12
Complete the provided Python program below to filter out and print the numbers (ie. numeric characters, grouped as they are) from the given string.

In [12]:
import re
# Sample string.
text = "Ten 10, Twenty 20, Thirty 30"
result = re.split("\D+", text)
# Print results.
for element in result:
    print(element)


10
20
30


## Excercise 13
Complete the provided Python program below to split a string with multiple delimiters. A delimiter is a chracter or sequence of characters that denotes the boundary between seperate elements in a plain text or data stream. An example is the comma in the csv-format (comma separated values). For this exercise, possible delimiters are ; \S  *  $ <br>
Note that some of these delimiters have special meaning in the regex, so you must escape them with a backslash \

In [45]:
import re
text = 'The quick brown\Sfox jumps*over the lazy dog;huzzah.'
print(re.split(';|\*|\\\S|\$',text))

['The quick brown', 'fox jumps', 'over the lazy dog', 'huzzah.']


## Exercise 14
#### Complete the provided Python program below to check if a given password meets certain criteria. For this exercise, a good password consists of at least 12 but not longer then 16 characters, at least one uppercase letter, at least one lower case letter, at least 1 number and a non-alphanumeric character, but it can contain no whitespace characters. This should make the password hard to crack, and at least likely as possible to remember for the user.<br>
Hint for real life: use two-factor authentication for the apps that allow it and a password manager that autogenerates long, random passwords

In [72]:
import re
def password_check(string):
    passwordRe = re.compile('^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*\W)(?!.*\s).{12,16}$')
    string = passwordRe.search(string)
    return bool(string)

assert(password_check("password")) == False
assert(password_check("passw0rd")) == False
assert(password_check("Passw0rd!!")) == False
assert(password_check("letmeinalready11111!")) == False
assert(password_check("LET ME IN now12!")) == False
assert(password_check("LetInAlready52!")) == True  

print(password_check("password"))
print(password_check("passw0rd"))
print(password_check("Passw0rd!!"))
print(password_check("letmeinalready11111!"))
print(password_check("LET ME IN now12!"))
print(password_check("LetInAlready!5"))

False
False
False
False
False
True


## Excercise 15
Complete the provided Python program bellow to check that a string contains only a certain set of characters, in this case a-z, A-Z and 0-9. The program should return "True" if the checked string only contains allowed characters, and "False" if the string contains any other character. 

In [15]:
import re
def is_allowed_specific_char(string):
    charRe = re.compile(r'[^a-zA-Z0-9.]')
    string = charRe.search(string)
    return not bool(string)


Now, using the program above, check the strings, "ABCDEFabcdef153450", "&@#" and "15@a" against the rule outlined above. The expected results are "True", "False", "False"

In [16]:
assert(is_allowed_specific_char("ABCDEFabcdef123450")) == True
assert(is_allowed_specific_char("*&%@#!}{")) == False
assert(is_allowed_specific_char("15@a")) == False

Tips  
-Note that we are using "search" since we have no info on the expected or allowed length of the string  
-As a reminder "Search" will scan through the string looking for the first location where the regular expression pattern produces a match  
-Consider using one or more negations

Alternative solution (if people read the documentation)

In [17]:
import re
def is_allowed_specific_char_alt(string):
    charRe = re.compile(r'[^\w]')
    string = charRe.search(string)
    return not bool(string)

In [18]:
assert(is_allowed_specific_char_alt("ABCDEFabcdef123450")) == True
assert(is_allowed_specific_char_alt("*&%@#!}{")) == False
assert(is_allowed_specific_char_alt("15@a")) == False