# Advanced Regular Expressions Lab

Complete the following set of exercises to solidify your knowledge of regular expressions.

In [1]:
import re

### 1. Use a regular expression to find and extract all vowels in the following text.

In [58]:
text = "This is going to be a sentence with a good number of vowels in it."

In [59]:
# Primero mediante list comprehension y "re.match"

vocales = [ch for ch in text if re.match("[aeiou]", ch)]
print(vocales)

['i', 'i', 'o', 'i', 'o', 'e', 'a', 'e', 'e', 'e', 'i', 'a', 'o', 'o', 'u', 'e', 'o', 'o', 'e', 'i', 'i']


In [8]:
# Lo mismo pero mediante un bucle for, buscamos las vocales dentro de text:

for vocales in text:
    if re.match("[aeiou]", vocales):
        print(vocales)

i
i
o
i
o
e
a
e
e
e
i
a
o
o
u
e
o
o
e
i
i


### 2. Use a regular expression to find and extract all occurrences and tenses (singular and plural) of the word "puppy" in the text below.

In [10]:
text = "The puppy saw all the rest of the puppies playing and wanted to join them. I saw this and wanted a puppy of my own!"

In [12]:
puppies_1 = re.findall('pupp[y|ies]', text)
print(puppies_1)

['puppy', 'puppi', 'puppy']


### 3. Use a regular expression to find and extract all tenses (present and past) of the word "run" in the text below.

In [21]:
text = "I ran the relay race the only way I knew how to run it."

In [22]:
# Primero mediante "re.findall":

run1= re.findall(r'r[u|a]n', text)
print(run1)

['ran', 'run']


In [23]:
# Ahora mediante "re.match" y list comprehension:

run2 = [word for word in text.split(" ") if re.match(r"r[u|a]n",word)]
print(run2)

['ran', 'run']


### 4. Use a regular expression to find and extract all words that begin with the letter "r" from the previous text.

In [25]:
# Primero mediante "re.findall":

word_start_r = re.findall(r"[r][a-z]+", text)
print(word_start_r)

['ran', 'relay', 'race', 'run']


In [26]:
# Ahora mediante "re.match" y list comprehension:

word_start_r_2 = [word for word in text.split(' ') if re.match(r"[r][a-z]+", word)]
print(word_start_r_2)

['ran', 'relay', 'race', 'run']


### 5. Use a regular expression to find and substitute the letter "i" for the exclamation marks in the text below.

In [27]:
text = "Th!s !s a sentence w!th spec!al characters !n !t."

In [28]:
sust_word = re.sub('!','i', text)
print(sust_word)

This is a sentence with special characters in it.


### 6. Use a regular expression to find and extract words longer than 4 characters in the text below.

In [29]:
text = "This sentence has words of varying lengths."

In [30]:
words_4 = re.findall(r'\w{5}', text)
print(words_4)

['sente', 'words', 'varyi', 'lengt']


### 7. Use a regular expression to find and extract all occurrences of the letter "b", some letter(s), and then the letter "t" in the sentence below.

In [32]:
text = "I bet the robot couldn't beat the other bot with a bat, but instead it bit me."

In [33]:
word_bt = re.findall(r'b\w+t', text)
print(word_bt)

['bet', 'bot', 'beat', 'bot', 'bat', 'but', 'bit']


### 8. Use a regular expression to find and extract all words that contain either "ea" or "eo" in them.

In [34]:
text = "During many of the peaks and troughs of history, the people living it didn't fully realize what was unfolding. But we all know we're navigating breathtaking history: Nearly every day could be — maybe will be — a book."


In [35]:
word_ea_eo = [word for word in text.split(' ') if re.findall(r'e[a|o]', word)]
print(word_ea_eo)


['peaks', 'people', 'realize', 'breathtaking', 'Nearly']


### 9. Use a regular expression to find and extract all the capitalized words in the text below individually.

In [37]:
text = "Teddy Roosevelt and Abraham Lincoln walk into a bar."

In [43]:
mayus_words = re.findall('[A-Z]',text)
print(mayus_words)

['T', 'R', 'A', 'L']


### 10. Use a regular expression to find and extract all the sets of consecutive capitalized words in the text above.

In [44]:
mayus_words_2 = re.findall(r'[A-Z]\w+\s[A-Z]\w+', text)
print(mayus_words_2)

['Teddy Roosevelt', 'Abraham Lincoln']


### 11. Use a regular expression to find and extract all the quotes from the text below.

*Hint: This one is a little more complex than the single quote example in the lesson because there are multiple quotes in the text.*

In [45]:
text = 'Roosevelt says to Lincoln, "I will bet you $50 I can get the bartender to give me a free drink." Lincoln says, "I am in!"'


In [46]:
quotes = re.findall(r'".*?"', text)
print(quotes)

['"I will bet you $50 I can get the bartender to give me a free drink."', '"I am in!"']


### 12. Use a regular expression to find and extract all the numbers from the text below.

In [47]:
text = "There were 30 students in the class. Of the 30 students, 14 were male and 16 were female. Only 10 students got A's on the exam."


In [48]:
# Primero con "re.match y haciendo split(" ")

numbers = [ch for ch in text.split(' ') if re.match(r'[0-9]', ch)] 
print(numbers)

['30', '30', '14', '16', '10']


In [51]:
# Ahora con "re.findall"

numbers2 = re.findall('\d+',text)

print(numbers2)

['30', '30', '14', '16', '10']


### 13. Use a regular expression to find and extract all the social security numbers from the text below.

In [54]:
text = """
Henry's social security number is 876-93-2289 and his phone number is (847)789-0984.
Darlene's social security number is 098-32-5295 and her phone number is (987)222-0901.
"""

In [55]:
security_numbers = re.findall(r'\d{3}-\d{2}-\d{4}',text)
print(security_numbers)

['876-93-2289', '098-32-5295']


### 14. Use a regular expression to find and extract all the phone numbers from the text below.

In [56]:
phone_numbers = re.findall(r'\(\d{3}\)\d{3}-\d{4}',text)
print(phone_numbers)

['(847)789-0984', '(987)222-0901']


### 15. Use a regular expression to find and extract all the formatted numbers (both social security and phone) from the text below.

In [57]:
phone_numbers_2 = re.findall(r'\d{3}-\d{2}-\d{4}|\(\d{3}\)\d{3}-\d{4}',text)
print(phone_numbers_2)

['876-93-2289', '(847)789-0984', '098-32-5295', '(987)222-0901']
