# Advanced Regular Expressions Lab

Complete the following set of exercises to solidify your knowledge of regular expressions.

In [1]:
import re
import pandas as pd

### 1. Use a regular expression to find and extract all vowels in the following text.

In [2]:
text = "This is going to be a sentence with a good number of vowels in it."

In [3]:
re.findall(r'a|e|i|o|u', text)

['i',
 'i',
 'o',
 'i',
 'o',
 'e',
 'a',
 'e',
 'e',
 'e',
 'i',
 'a',
 'o',
 'o',
 'u',
 'e',
 'o',
 'o',
 'e',
 'i',
 'i']

In [4]:
re.findall(r'[aeiouAEIOU]', text)

matches = re.finditer("[aeiouAEIOU]", text)
for match in matches:
    print(match)

<re.Match object; span=(2, 3), match='i'>
<re.Match object; span=(5, 6), match='i'>
<re.Match object; span=(9, 10), match='o'>
<re.Match object; span=(10, 11), match='i'>
<re.Match object; span=(15, 16), match='o'>
<re.Match object; span=(18, 19), match='e'>
<re.Match object; span=(20, 21), match='a'>
<re.Match object; span=(23, 24), match='e'>
<re.Match object; span=(26, 27), match='e'>
<re.Match object; span=(29, 30), match='e'>
<re.Match object; span=(32, 33), match='i'>
<re.Match object; span=(36, 37), match='a'>
<re.Match object; span=(39, 40), match='o'>
<re.Match object; span=(40, 41), match='o'>
<re.Match object; span=(44, 45), match='u'>
<re.Match object; span=(47, 48), match='e'>
<re.Match object; span=(50, 51), match='o'>
<re.Match object; span=(54, 55), match='o'>
<re.Match object; span=(56, 57), match='e'>
<re.Match object; span=(60, 61), match='i'>
<re.Match object; span=(63, 64), match='i'>


### 2. Use a regular expression to find and extract all occurrences and tenses (singular and plural) of the word "puppy" in the text below.

In [5]:
text = "The puppy saw all the rest of the puppies playing and wanted to join them. I saw this and wanted a puppy of my own!"

In [6]:
pattern = 'pu\w+'
strings = re.findall(pattern, text)
print(strings)

['puppy', 'puppies', 'puppy']


### 3. Use a regular expression to find and extract all tenses (present and past) of the word "run" in the text below.

In [7]:
text = "I ran the relay race the only way I knew how to run it."

In [8]:
re.findall(r'\br.n',text)



['ran', 'run']

### 4. Use a regular expression to find and extract all words that begin with the letter "r" from the previous text.

In [9]:
 
re.findall(r'\br.*? ',text)


['ran ', 'relay ', 'race ', 'run ']

### 5. Use a regular expression to find and substitute the letter "i" for the exclamation marks in the text below.

In [10]:
text = "Th!s !s a sentence w!th spec!al characters !n !t."


In [11]:
re.sub(r'!','i',text)



'This is a sentence with special characters in it.'

### 6. Use a regular expression to find and extract words longer than 4 characters in the text below.

In [12]:
text = "This sentence has words of varying lengths."

In [13]:
# \w{5}.*? |\w{5}.*?\.
# re.findall(r'\w{5}.*? |\w{5}.*?\.',text)
# or
re.findall(r'[\w]{5,}',text)


['sentence', 'words', 'varying', 'lengths']

### 7. Use a regular expression to find and extract all occurrences of the letter "b", some letter(s), and then the letter "t" in the sentence below.

In [14]:
text = "I bet the robot couldn't beat the other bot with a bat, but instead it bit me."

In [15]:
#? doubt
pattern = "b[\w]{,1000}t"
strings = re.findall(pattern, text)
print(strings)

['bet', 'bot', 'beat', 'bot', 'bat', 'but', 'bit']


### 8. Use a regular expression to find and extract all words that contain either "ea" or "eo" in them.

In [16]:
text = "During many of the peaks and troughs of history, the people living it didn't fully realize what was unfolding. But we all know we're navigating breathtaking history: Nearly every day could be — maybe will be — a book."


In [17]:
re.findall(r'\w+ea.*? |\w+eo.*? ',text)

['peaks ', 'people ', 'realize ', 'breathtaking ', 'Nearly ']

### 9. Use a regular expression to find and extract all the capitalized words in the text below individually.

In [41]:
text = "Teddy Roosevelt and Abraham Lincoln walk into a bar."

In [19]:
#  
re.findall(r'[A-Z].*? ',text)

['Teddy ', 'Roosevelt ', 'Abraham ', 'Lincoln ']

### 10. Use a regular expression to find and extract all the sets of consecutive capitalized words in the text above.

In [43]:
pattern = "([A-Z][a-z]+ ?[A-Z][a-z]+)"
re.findall(pattern, text)

['Teddy Roosevelt', 'Abraham Lincoln']

### 11. Use a regular expression to find and extract all the quotes from the text below.

*Hint: This one is a little more complex than the single quote example in the lesson because there are multiple quotes in the text.*

In [21]:
text = 'Roosevelt says to Lincoln, "I will bet you $50 I can get the bartender to give me a free drink." Lincoln says, "I am in!"'


In [22]:
re.findall(r'".*?"',text)


['"I will bet you $50 I can get the bartender to give me a free drink."',
 '"I am in!"']

### 12. Use a regular expression to find and extract all the numbers from the text below.

In [23]:
text = "There were 30 students in the class. Of the 30 students, 14 were male and 16 were female. Only 10 students got A's on the exam."


In [24]:
print("nums:",re.findall(r'\d+',text))

nums: ['30', '30', '14', '16', '10']


### 13. Use a regular expression to find and extract all the social security numbers from the text below.

In [25]:
text = """
Henry's social security number is 876-93-2289 and his phone number is (847)789-0984.
Darlene's social security number is 098-32-5295 and her phone number is (987)222-0901.
"""

In [26]:
re.findall(r'\d{3}-\d{2}-\d{4}',text)

['876-93-2289', '098-32-5295']

In [27]:
# or

In [28]:
re.findall(r'\d+?-\d+-\d+',text)

['876-93-2289', '098-32-5295']

In [29]:
re.findall(r'\d+?-\w+-\w+',text)

['876-93-2289', '098-32-5295']

### 14. Use a regular expression to find and extract all the phone numbers from the text below.

In [30]:
re.findall(r'\(\d.*\d',text)

['(847)789-0984', '(987)222-0901']

### 15. Use a regular expression to find and extract all the formatted numbers (both social security and phone) from the text below.

In [31]:
#
re.findall(r'\d+-.*?\d+-\d+|\(.*',text)

['876-93-2289', '(847)789-0984.', '098-32-5295', '(987)222-0901.']

In [32]:
# or

In [33]:
re.findall(r'\d+?-\d+-\d+|\(\d.*\d',text)

['876-93-2289', '(847)789-0984', '098-32-5295', '(987)222-0901']