# Environment

In [1]:
import ddfloww as dd
import pandas as pd
import re

from pydataset import data

In [2]:
tips = data('tips')

In [3]:
dd.select(tips, 'sex', 'smoker', total_bill='total', size='party_size')

Unnamed: 0,sex,smoker,total,party_size
1,Female,No,16.99,2
2,Male,No,10.34,3
3,Male,No,21.01,3
4,Male,No,23.68,2
5,Female,No,24.59,4
6,Male,No,25.29,4
7,Male,No,8.77,2
8,Male,No,26.88,4
9,Male,No,15.04,2
10,Male,No,14.78,2


In [4]:
re.findall(r'b', 'abcd')

['b']

In [7]:
sentence = 'Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.'
dd.show_all_matches([
    r'a',
    r'm',
    r'M',
    r'Mary',
    r'little',
    r'1',
    r'10',
    r'22'
], sentence)

Sentence:

    Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regexp | matches
 ------ | -------
 a      | ['a', 'a', 'a', 'a', 'a']
 m      | ['m', 'm']
 M      | ['M']
 Mary   | ['Mary']
 little | ['little', 'little']
 1      | ['1', '1', '1']
 10     | ['10']
 22     | ['22']


In [9]:
res = [
    r'\w',
    r'\d',
    r'\s',
    r'.', # matches every character
    r'\.', # a literal period
]
dd.show_all_matches(res, sentence)

Sentence:

    Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regexp | matches
 ------ | -------
 \w     | ['M', 'a', 'r', 'y', 'h', 'a', 'd', 'a', '...']
 \d     | ['1', '1', '0', '1', '2', '2', '2']
 \s     | [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '...']
 .      | ['M', 'a', 'r', 'y', ' ', 'h', 'a', 'd', '...']
 \.     | ['.', '.', '.']


In [10]:
dd.show_all_matches([r'l\w\w\w\W', r'\d\d'], sentence, re_length=9)

Sentence:

    Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regexp    | matches
 ------    | -------
 l\w\w\w\W | ['lamb.', 'lamb.']
 \d\d      | ['10', '12', '22']


In [11]:
dd.show_all_matches([
    r'\d+'
], sentence)

print('\n---\n')

dd.show_all_matches([
    r'a{2,}',
    r'a{2}',
    r'a{3,4}'
], 'aabbaaaa')

Sentence:

    Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regexp | matches
 ------ | -------
 \d+    | ['1', '10', '12', '22']

---

Sentence:

    aabbaaaa

 regexp | matches
 ------ | -------
 a{2,}  | ['aa', 'aaaa']
 a{2}   | ['aa', 'aa', 'aa']
 a{3,4} | ['aaaa']


In [12]:
dd.show_all_matches([
    r'[lt]',
    r'[lt]+',
    r'[^aeiou\s\.]', # any letter that's not a vowel
    r'[a-d]'
], sentence, re_length=12)

Sentence:

    Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regexp       | matches
 ------       | -------
 [lt]         | ['l', 't', 't', 'l', 'l', 'l', 't', 't', '...']
 [lt]+        | ['l', 'ttl', 'l', 'l', 'ttl', 'l', 't', 't', '...']
 [^aeiou\s\.] | ['M', 'r', 'y', 'h', 'd', 'l', 't', 't', '...']
 [a-d]        | ['a', 'a', 'd', 'a', 'a', 'b', 'a', 'b']


In [13]:
dd.show_all_matches([
    r'\bo\w+', # any word that starts with an 'o'
    r'^\s', # starts with a space
    r'^M', # starts with 'M'
    r'\.$', # ends with a period
], sentence)

Sentence:

    Mary had a little lamb. 1 little lamb. Not 10, not 12, not 22, just one.

 regexp | matches
 ------ | -------
 \bo\w+ | ['one']
 ^\s    | []
 ^M     | ['M']
 \.$    | ['.']


In [14]:
sentence = '''
You can find us on the web at https://codeup.com. Our ip address is 123.123.123.123 (maybe).
'''.strip()

In [15]:
ip_re = r'\d+(\.\d+){3}'

match = re.search(ip_re, sentence)
match[0]

'123.123.123.123'

In [17]:
url_re = r'(https?)://(\w+)\.(\w+)'

protocol, domain, tld = re.search(url_re, sentence).groups()

print(f'''
protocol: {protocol}
domain: {domain}
tld: {tld}
''')


protocol: https
domain: codeup
tld: com



In [1]:
# \w{4,5}://(\w+\.)+\w{2,}

# EXERCISES

### 1. Write a function named `is_vowel`. It should accept a string input and use a regular expression to determine  if the passed string is a vowel. You can treat the result of `re.search` as a boolean value that indicates whether or not the regular expression matches the given string.

### 2. Write a function named `is_valid_username` that accepts a string input. A valid username starts with a lowercase letter, and only consists of lowercase letters, numbers, or the _ character. It should also be no longer than 32 characters. The function should return either `True` or `False` depending on whether the passed string is a valid username.

### 3. Write a regular expression to capture phone numbers. It should match all of the following:

    (210) 867 5309
    +1 210.867.5309
    867-5309
    210-867-5309

### 4. Use regular expressions to convert the dates below to the standardized year-month-day format.