# Regex Exercises

In [1]:
import numpy as np
import pandas as pd
import re

__1) Write a function named is_vowel. It should accept a string as input and use a regular expression to determine if the passed string is a vowel. While not explicity mentioned in the lesson, you can treat the result of re.search as a boolean value that indicates whether or not the regular expression matches the given string.__

In [2]:
def is_vowel(string):
    regexp = r'[aeiouAEIOU]'
    
    return bool(re.search(regexp, string))

In [3]:
#For testing
is_vowel('o')

True

In [4]:
#For Testing
is_vowel('n')

False

__2) Write a function named is_valid_username that accepts a string as input. A valid username starts with a lowercase letter, and only consists of lowercase letters, numbers, or the _ character. It should also be no longer than 32 characters. The function should return either True or False depending on whether the passed string is a valid username.__

In [13]:
def is_valid_username(string):
    regexp = r'\b[a-z][a-z0-9_]{1,31}\b'
    
    return bool(re.search(regexp, string))

In [6]:
#For testing
is_valid_username('123abc')

False

In [7]:
#For Testing
is_valid_username('abc1223__wrx')

True

In [15]:
#For testing (Testing the length parameter)
is_valid_username('abcdefghijklmnopqrstuvwxyzabcdefasdfasdf')

False

In [17]:
#For testing
is_valid_username('Codeup')

False

__3) Write a regular expression to capture phone numbers. It should match all of the following:__

* (210) 867 5309
* +1 210.867.5309
* 867-5309
* 210-867-5309

In [79]:
def is_phone_number(string):
    regexp = r'(\+[0-9]\s)?(\(?[0-9]{3}\)?[\s.-])?[0-9]{3}[\s.-][0-9]{4}'
    
    return bool(re.match(regexp, string))

In [80]:
#For testing
is_phone_number('(210) 867 5309')

True

In [82]:
#For testing
is_phone_number('+1 210.867.5309')

True

In [83]:
#For Testing
is_phone_number('867-5309')

True

In [84]:
#For Testing
is_phone_number('210-867-5309')

True

In [85]:
#For Testing
is_phone_number('3 3455 9999')

False

__4) Use regular expressions to convert the dates below to the standardized year-month-day format.__

* 02/04/19
* 02/05/19
* 02/06/19
* 02/07/19
* 02/08/19
* 02/09/19
* 02/10/19

In [92]:
def convert_date(string):
    
    #Rearrange the numbers
    string = re.sub(r'(\d{2})/(\d{2})/(\d{2})', r'\3-\1-\2', string)
    
    #Convert they year to 4 digit version (Assume 2000's)
    string = re.sub(r'\b(\d{2})', r'20\1', string, count = 1)
    
    return string

In [93]:
#For testing
convert_date('02/04/19')

'2019-02-04'

In [94]:
convert_date('02/05/19')

'2019-02-05'

In [95]:
convert_date('02/06/19')

'2019-02-06'

In [96]:
convert_date('02/07/19')

'2019-02-07'

In [97]:
convert_date('02/08/19')

'2019-02-08'

In [98]:
convert_date('02/09/19')

'2019-02-09'

In [99]:
convert_date('02/10/19')

'2019-02-10'

__5) Write a regex to extract the various parts of these logfile lines:__

In [147]:
string = """GET /api/v1/sales?page=86 [16/Apr/2019:193452+0000] HTTP/1.1 {200} 510348 "python-requests/2.21.0" 97.105.19.58
POST /users_accounts/file-upload [16/Apr/2019:193452+0000] HTTP/1.1 {201} 42 "User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" 97.105.19.58
GET /api/v1/items?page=3 [16/Apr/2019:193453+0000] HTTP/1.1 {429} 3561 "python-requests/2.21.0" 97.105.19.58"""

In [179]:
regexp = r"""
(?P<type>GET|POST)
\s
(?P<path>.+)
\s
\[(?P<timestamp>.+)\]
\s
(?P<http_version>HTTP/1.1)
\s
\{(?P<status_code>\d{3})\}
\s
(?P<bytes>\d*)
\s
"(?P<user_agent>.+)"
\s
(?P<ip>.+)
"""


In [180]:
#Create new df
df = pd.DataFrame()

In [181]:
#Create new line column
df['line'] = string.strip().split('\n')

In [182]:
df

Unnamed: 0,line
0,GET /api/v1/sales?page=86 [16/Apr/2019:193452+...
1,POST /users_accounts/file-upload [16/Apr/2019:...
2,GET /api/v1/items?page=3 [16/Apr/2019:193453+0...


In [183]:
df = pd.concat([df, df.line.str.extract(regexp, re.VERBOSE)], axis = 1)

In [184]:
df

Unnamed: 0,line,type,path,timestamp,http_version,status_code,bytes,user_agent,ip
0,GET /api/v1/sales?page=86 [16/Apr/2019:193452+...,GET,/api/v1/sales?page=86,16/Apr/2019:193452+0000,HTTP/1.1,200,510348,python-requests/2.21.0,97.105.19.58
1,POST /users_accounts/file-upload [16/Apr/2019:...,POST,/users_accounts/file-upload,16/Apr/2019:193452+0000,HTTP/1.1,201,42,User-Agent: Mozilla/5.0 (X11; Fedora; Fedora; ...,97.105.19.58
2,GET /api/v1/items?page=3 [16/Apr/2019:193453+0...,GET,/api/v1/items?page=3,16/Apr/2019:193453+0000,HTTP/1.1,429,3561,python-requests/2.21.0,97.105.19.58
