# Advanced Regular Expressions Lab

Complete the following set of exercises to solidify your knowledge of regular expressions.

In [1]:
import re

### 1. Use a regular expression to find and extract all vowels in the following text.

In [8]:
text = "This is going to be a sentence with a good number of vowels in it."

In [39]:
#create the function:
def volwelsFinder (string):
    return re.findall('[aeiouAEIOU]',string)

In [40]:
#call the function (printing it to get the result in one line)
print(volwelsFinder(text))

['i', 'i', 'o', 'i', 'o', 'e', 'a', 'e', 'e', 'e', 'i', 'a', 'o', 'o', 'u', 'e', 'o', 'o', 'e', 'i', 'i']


### 2. Use a regular expression to find and extract all occurrences and tenses (singular and plural) of the word "puppy" in the text below.

In [118]:
text = "The puppy saw all the rest of the puppies playing and wanted to join them. I saw this and wanted a puppy of my own!"

In [121]:
#create the function:
def puppFinder(string):
    return re.findall('pupp[\w]*', string)

In [123]:
#call the function:
puppFinder(text)

['puppy', 'puppies', 'puppy']

### 3. Use a regular expression to find and extract all tenses (present and past) of the word "run" in the text below.

In [134]:
text = "I ran the relay race the only way I knew how to run it."

In [135]:
#create the function:
def runFinder(string):
    return re.findall('r[au]n',string)

In [137]:
#call the function:
runFinder(text)

['ran', 'run']

### 4. Use a regular expression to find and extract all words that begin with the letter "r" from the previous text.

In [138]:
#create the function:
def start_r(string):
    return re.findall('r\w*', string)

In [139]:
#call the function:
start_r(text)

['ran', 'relay', 'race', 'run']

### 5. Use a regular expression to find and substitute the letter "i" for the exclamation marks in the text below.

In [143]:
text = "Th!s !s a sentence w!th spec!al characters !n !t."

In [144]:
text_corrected = re.sub('!','i',text)
text_corrected

'This is a sentence with special characters in it.'

### 6. Use a regular expression to find and extract words longer than 4 characters in the text below.

In [146]:
text = "This sentence has words of varying lengths."

In [147]:
def longer_four(string):
    return re.findall('\w{5,}',string)

In [148]:
longer_four(text)

['sentence', 'words', 'varying', 'lengths']

### 7. Use a regular expression to find and extract all occurrences of the letter "b", some letter(s), and then the letter "t" in the sentence below.

In [149]:
text = "I bet the robot couldn't beat the other bot with a bat, but instead it bit me."

In [150]:
def b_Letters_t(string):
    return re.findall('[b]\w+[t]',string)

In [151]:
b_Letters_t(text)

['bet', 'bot', 'beat', 'bot', 'bat', 'but', 'bit']

### 8. Use a regular expression to find and extract all words that contain either "ea" or "eo" in them.

In [152]:
text = "During many of the peaks and troughs of history, the people living it didn't fully realize what was unfolding. But we all know we're navigating breathtaking history: Nearly every day could be — maybe will be — a book."


In [154]:
def ea_eo_words(string):
    return(re.findall('\w*[e][a]\w*|\w*[e][o]\w*', string))

In [155]:
ea_eo_words(text)

['peaks', 'people', 'realize', 'breathtaking', 'Nearly']

### 9. Use a regular expression to find and extract all the capitalized words in the text below individually.

In [156]:
text = "Teddy Roosevelt and Abraham Lincoln walk into a bar."

In [159]:
def cap_words(string):
    return(re.findall('[A-Z]\w*', string))

In [160]:
cap_words(text)

['Teddy', 'Roosevelt', 'Abraham', 'Lincoln']

### 10. Use a regular expression to find and extract all the sets of consecutive capitalized words in the text above.

In [162]:
def consecutive_cap_words(string):
    return(re.findall('[A-Z]\w*\s[A-Z]\w*',string))

In [163]:
consecutive_cap_words(text)

['Teddy Roosevelt', 'Abraham Lincoln']

### 11. Use a regular expression to find and extract all the quotes from the text below.

*Hint: This one is a little more complex than the single quote example in the lesson because there are multiple quotes in the text.*

In [220]:
text = 'Roosevelt says to Lincoln, "I will bet you $50 I can get the bartender to give me a free drink." Lincoln says, "I am in!"'


In [221]:
def quotes(string):
    return re.findall('(\".+?\")+',string)

In [222]:
quotes(text)

['"I will bet you $50 I can get the bartender to give me a free drink."',
 '"I am in!"']

### 12. Use a regular expression to find and extract all the numbers from the text below.

In [223]:
text = "There were 30 students in the class. Of the 30 students, 14 were male and 16 were female. Only 10 students got A's on the exam."


In [224]:
def numbers(string):
    return re.findall('[\d]+',string)

In [225]:
numbers(text)

['30', '30', '14', '16', '10']

### 13. Use a regular expression to find and extract all the social security numbers from the text below.

In [228]:
text = """
Henry's social security number is 876-93-2289 and his phone number is (847)789-0984.
Darlene's social security number is 098-32-5295 and her phone number is (987)222-0901.
"""

In [229]:
def social_security_num(string):
    return re.findall('[\d]+\-[\d]+\-[\d]+',string)

In [230]:
social_security_num(text)

['876-93-2289', '098-32-5295']

### 14. Use a regular expression to find and extract all the phone numbers from the text below.

In [231]:
def phone_num(string):
    return re.findall('\([\d]+\)[\d]+\-[\d]+',string)

In [232]:
phone_num(text)

['(847)789-0984', '(987)222-0901']

### 15. Use a regular expression to find and extract all the formatted numbers (both social security and phone) from the text below.

In [235]:
def both_nums(string):
    return re.findall('\(*[\d]+\)*\-*[\d]+\-[\d]+',string)

In [236]:
both_nums(text)

['876-93-2289', '(847)789-0984', '098-32-5295', '(987)222-0901']