In [1]:
import re

# Regex Exercises

1. Write a function named is_vowel. It should accept a string as input and use a regular expression to determine if the passed string is a vowel. While not explicity mentioned in the lesson, you can treat the result of re.search as a boolean value that indicates whether or not the regular expression matches the given string.

In [179]:
def is_vowel(letter):
    """
    Indicates if a letter is a vowel or not.
    
    Parameters
    ----------
    letter : str
        A single character
    
    Raises
    ------
    ValueError
        Thrown if length of letter is 0 or > 1

    Returns
    -------
    bool
        True if letter is "a", "e", "i", "o", or "u". False otherwise.
    """
    
    if len(letter) > 1 or len(letter) == 0:
        raise ValueError("letter length must be 1")
        
    vowels = r'[aeiou]{1}'
    
    match = re.match(vowels, letter, flags=re.IGNORECASE)
    
    if match:
        return match
    else:
        return False

In [181]:
# Tests to make sure my function is working correctly
assert(is_vowel("a"))
assert(is_vowel("A"))
assert(is_vowel("e"))
assert(is_vowel("i"))
assert(is_vowel("o"))
assert(is_vowel("u"))
assert(is_vowel("y") == False)
assert(is_vowel("B") == False)

2. Write a function named is_valid_username that accepts a string as input. A valid username starts with a lowercase letter, and only consists of lowercase letters, numbers, or the _ character. It should also be no longer than 32 characters. The function should return either True or False depending on whether the passed string is a valid username.

In [200]:
def is_valid_username(username):
    """
    Indicates if a username is a valid or not.
    
    Parameters
    ----------
    username : str
        The username to be checked

    Returns
    -------
    bool
        True if the username is valid and False otherwise. Valid usernames start with lowercase alphabetical characters, \
        contain all lowercase alphabetical characters or numbers or underscores, and is 32 characters or less.
    """
    
    starts_lowercase = r"^[a-z]"
    all_lowercase_num_or_underscore = r"[a-z\d_]"
    
    length = len(username)
    
    starts_lowercase_match = False
    lowercase_num_underscore_match = False
    
    if re.match(starts_lowercase, username):
        starts_lowercase_match = True
    
    if len(re.findall(all_lowercase_num_or_underscore , username)) == length:
        lowercase_num_underscore_match = True
    
    return starts_lowercase_match and lowercase_num_underscore_match and length <= 32

In [201]:
# Tests
assert(is_valid_username("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") == False)
assert(is_valid_username("codeup"))
assert(is_valid_username("Codeup") == False)
assert(is_valid_username("codeup123"))
assert(is_valid_username("1codeup") == False)

3. Write a regular expression to capture phone numbers. It should match all of the following:

(210) 867 5309\
+1 210.867.5309\
867-5309\
210-867-5309

In [202]:
phone_numbers = r"\D*(\d{0,3})\D*(\d{3})\D*(\d{4})"

In [203]:
assert(re.search(phone_numbers, "(210) 867 5309"))
assert(re.search(phone_numbers, "+1 210.867.5309"))
assert(re.search(phone_numbers, "867-5309"))
assert(re.search(phone_numbers, "210-867-5309"))

4. Use regular expressions to convert the dates below to the standardized year-month-day format.

02/04/19\
02/05/19\
02/06/19\
02/07/19\
02/08/19\
02/09/19\
02/10/19

In [204]:
month_day_year = r"(\d{2})/(\d{2})/(\d{2})"

In [205]:
assert(re.sub(month_day_year, r"\3-\1-\2", "02/04/19") == "19-02-04")
assert(re.sub(month_day_year, r"\3-\1-\2", "02/05/19") == "19-02-05")
assert(re.sub(month_day_year, r"\3-\1-\2", "02/06/19") == "19-02-06")
assert(re.sub(month_day_year, r"\3-\1-\2", "02/07/19") == "19-02-07")
assert(re.sub(month_day_year, r"\3-\1-\2", "02/08/19") == "19-02-08")
assert(re.sub(month_day_year, r"\3-\1-\2", "02/09/19") == "19-02-09")
assert(re.sub(month_day_year, r"\3-\1-\2", "02/10/19") == "19-02-10")

5. Write a regex to extract the various parts of these logfile lines:

`GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58`

`POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58`

`GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58`

In [541]:
log_extracts = r'(?P<request_type>^[A-Z]+)\s{1}(?P<url>/.{1,})\s{1}(?P<timestamp>\[.*\])\s{1}(?P<protocol>HTTP.*)\s{1}(?P<status_cd>{.*})\s{1}(?P<size_bytes>\d.*)\s{1}(?P<client>".*")\s{1}(?P<ip_address>.*)'

In [542]:
re.search(log_extracts,\
          'GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58').groupdict()

{'request_type': 'GET',
 'url': '/api/v1/sales?page=86',
 'timestamp': '[16/Apr/2019:193452+0000]',
 'protocol': 'HTTP/1.1',
 'status_cd': '{200}',
 'size_bytes': '510348',
 'client': '"python-requests/2.21.0"',
 'ip_address': '97.105.19.58'}

In [545]:
re.search(log_extracts,\
          'POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58').groupdict()

{'request_type': 'POST',
 'url': '/users_accounts/file-upload',
 'timestamp': '[16/Apr/2019:193452+0000]',
 'protocol': 'HTTP/1.1',
 'status_cd': '{201}',
 'size_bytes': '42',
 'client': '"User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"',
 'ip_address': '97.105.19.58'}

In [546]:
re.search(log_extracts,\
          'GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58').groupdict()

{'request_type': 'GET',
 'url': '/api/v1/items?page=3',
 'timestamp': '[16/Apr/2019:193453+0000]',
 'protocol': 'HTTP/1.1',
 'status_cd': '{429}',
 'size_bytes': '3561',
 'client': '"python-requests/2.21.0"',
 'ip_address': '97.105.19.58'}

6. You can find a list of words on your mac at /usr/share/dict/words. Use this file to answer the following questions:

In [596]:
words_df = pd.read_csv("/usr/share/dict/words", header=None)
word_series = pd.Series(words_df[0])

In [598]:
word_series = word_series.dropna()

* How many words have at least 3 vowels?

In [599]:
vowels = r"[aeiouAEIOU]"
more_than_three = 0

for word in word_series:
    if len(re.findall(vowels, word)) >= 3:
        more_than_three += 1
        
more_than_three

191365

* How many words have at least 3 vowels in a row?

In [603]:
vowels_in_a_row = r"[aeiouAEIOU]{3}"

three_vowels_in_row = 0

for word in word_series:
    if re.search(vowels_in_a_row, word):
        three_vowels_in_row += 1
        
three_vowels_in_row

6182

* How many words have at least 4 consonants in a row?

* How many words start and end with the same letter?
* How many words start and end with a vowel?
* How many words contain the same letter 3 times in a row?
* What other interesting patterns in words can you find?6. 