# Regular Expressions

Like a good espresso, regular expressions require care and precision to avoid either a watery mess or bitter sludge.

We will not have time to cover everything about **regexes** here, but this introduction should help you feel comfortable poking around and using them in your own code.

In [5]:
sample_text = """
“Ye kings and warriors! may your vows be crown’d,
And Troy’s proud walls lie level with the ground.
May Jove restore you when your toils are o’er
Safe to the pleasures of your native shore.
But, oh! relieve a wretched parent’s pain,
And give Chryseïs to these arms again;
If mercy fail, yet let my presents move,
And dread avenging Phœbus, son of Jove.”
"""

In [6]:
[l.split() for l in sample_text.strip().splitlines()]

[['“Ye', 'kings', 'and', 'warriors!', 'may', 'your', 'vows', 'be', 'crown’d,'],
 ['And', 'Troy’s', 'proud', 'walls', 'lie', 'level', 'with', 'the', 'ground.'],
 ['May', 'Jove', 'restore', 'you', 'when', 'your', 'toils', 'are', 'o’er'],
 ['Safe', 'to', 'the', 'pleasures', 'of', 'your', 'native', 'shore.'],
 ['But,', 'oh!', 'relieve', 'a', 'wretched', 'parent’s', 'pain,'],
 ['And', 'give', 'Chryseïs', 'to', 'these', 'arms', 'again;'],
 ['If', 'mercy', 'fail,', 'yet', 'let', 'my', 'presents', 'move,'],
 ['And', 'dread', 'avenging', 'Phœbus,', 'son', 'of', 'Jove.”']]

In [7]:
words_in_lines = []
for line in sample_text.strip().splitlines():
    words_in_lines.append(line.split())

words_in_lines

[['“Ye', 'kings', 'and', 'warriors!', 'may', 'your', 'vows', 'be', 'crown’d,'],
 ['And', 'Troy’s', 'proud', 'walls', 'lie', 'level', 'with', 'the', 'ground.'],
 ['May', 'Jove', 'restore', 'you', 'when', 'your', 'toils', 'are', 'o’er'],
 ['Safe', 'to', 'the', 'pleasures', 'of', 'your', 'native', 'shore.'],
 ['But,', 'oh!', 'relieve', 'a', 'wretched', 'parent’s', 'pain,'],
 ['And', 'give', 'Chryseïs', 'to', 'these', 'arms', 'again;'],
 ['If', 'mercy', 'fail,', 'yet', 'let', 'my', 'presents', 'move,'],
 ['And', 'dread', 'avenging', 'Phœbus,', 'son', 'of', 'Jove.”']]

In [8]:
lines = [l.split() for l in sample_text.strip().splitlines()]
lines 

[['“Ye', 'kings', 'and', 'warriors!', 'may', 'your', 'vows', 'be', 'crown’d,'],
 ['And', 'Troy’s', 'proud', 'walls', 'lie', 'level', 'with', 'the', 'ground.'],
 ['May', 'Jove', 'restore', 'you', 'when', 'your', 'toils', 'are', 'o’er'],
 ['Safe', 'to', 'the', 'pleasures', 'of', 'your', 'native', 'shore.'],
 ['But,', 'oh!', 'relieve', 'a', 'wretched', 'parent’s', 'pain,'],
 ['And', 'give', 'Chryseïs', 'to', 'these', 'arms', 'again;'],
 ['If', 'mercy', 'fail,', 'yet', 'let', 'my', 'presents', 'move,'],
 ['And', 'dread', 'avenging', 'Phœbus,', 'son', 'of', 'Jove.”']]

In [13]:
n_ground = 0 
for line in lines:
    for word in line:
        if word == "ground":
            n_ground += 1

n_ground

1

In [10]:
import re 
word_regex = re.compile(r"\w+")
print(re.split(r"\s+", "abc def"))

lines = [word_regex.findall(l) for l in sample_text.strip().splitlines()]
lines

['abc', 'def']


[['Ye', 'kings', 'and', 'warriors', 'may', 'your', 'vows', 'be', 'crown', 'd'],
 ['And',
  'Troy',
  's',
  'proud',
  'walls',
  'lie',
  'level',
  'with',
  'the',
  'ground'],
 ['May', 'Jove', 'restore', 'you', 'when', 'your', 'toils', 'are', 'o', 'er'],
 ['Safe', 'to', 'the', 'pleasures', 'of', 'your', 'native', 'shore'],
 ['But', 'oh', 'relieve', 'a', 'wretched', 'parent', 's', 'pain'],
 ['And', 'give', 'Chryseïs', 'to', 'these', 'arms', 'again'],
 ['If', 'mercy', 'fail', 'yet', 'let', 'my', 'presents', 'move'],
 ['And', 'dread', 'avenging', 'Phœbus', 'son', 'of', 'Jove']]

In [11]:
test_list = ["abc", "def", "ghi"]
for line in test_list:
    for word in line:
        for a in word:
            print(a)
            for b in a:
                print(b)

a
a
b
b
c
c
d
d
e
e
f
f
g
g
h
h
i
i


In [12]:
## Homework

## Using the code above as a guide, get the counts for _every_word in "sample_text". Use the for loops as an example, if lists are too confusing.

In [23]:
n_Ye = 0 
for line in lines:
    for word in line:
        if word == "Ye":
            n_Ye += 1

n_Ye

1

In [24]:
n_kings = 0 
for line in lines:
    for word in line:
        if word == "kings":
            n_kings += 1
n_kings

1

In [25]:
n_and = 0 
for line in lines:
    for word in line:
        if word == "and":
            n_and += 1

n_and

1

In [27]:
n_warriors = 0 
for line in lines:
    for word in line:
        if word == "warriors":
            n_warriors += 1

n_warriors

1

In [28]:
n_may = 0 
for line in lines:
    for word in line:
        if word == "may":
            n_may += 1

n_may

1

In [26]:
n_your = 0 
for line in lines:
    for word in line:
        if word == "your":
            n_your += 1

n_your

3

In [30]:
n_vows = 0 
for line in lines:
    for word in line:
        if word == "vows":
            n_vows += 1

n_vows

1

In [31]:
n_be = 0 
for line in lines:
    for word in line:
        if word == "be":
            n_be += 1

n_be

1

In [34]:
n_crown = 0 
for line in lines:
    for word in line:
        if word == "crown":
            n_crown += 1

n_crown

1

In [36]:
n_And = 0 
for line in lines:
    for word in line:
        if word == "And":
            n_And += 1

n_And

3

In [37]:
n_Troy = 0 
for line in lines:
    for word in line:
        if word == "Troy":
            n_Troy += 1

n_Troy

1

In [38]:
n_proud = 0 
for line in lines:
    for word in line:
        if word == "proud":
            n_proud += 1

n_proud

1

In [39]:
n_walls = 0 
for line in lines:
    for word in line:
        if word == "walls":
            n_walls += 1

n_walls

1

In [40]:
n_lie = 0 
for line in lines:
    for word in line:
        if word == "lie":
            n_lie += 1

n_lie

1

In [41]:
n_level = 0 
for line in lines:
    for word in line:
        if word == "level":
            n_level += 1

n_level

1

In [42]:
n_with = 0 
for line in lines:
    for word in line:
        if word == "with":
            n_with += 1

n_with

1

In [43]:
n_the = 0 
for line in lines:
    for word in line:
        if word == "the":
            n_the += 1

n_the

2

In [44]:
n_May = 0 
for line in lines:
    for word in line:
        if word == "May":
            n_May += 1

n_May

1

In [45]:
n_Jove = 0 
for line in lines:
    for word in line:
        if word == "Jove":
            n_Jove += 1

n_Jove

2

In [46]:
n_restore = 0 
for line in lines:
    for word in line:
        if word == "restore":
            n_restore += 1

n_restore

1

In [47]:
n_you = 0 
for line in lines:
    for word in line:
        if word == "you":
            n_you+= 1

n_you

1

In [48]:
n_when = 0 
for line in lines:
    for word in line:
        if word == "when":
            n_when += 1

n_when

1

In [49]:
n_toils = 0 
for line in lines:
    for word in line:
        if word == "toils":
            n_toils += 1

n_toils

1

In [50]:
n_are = 0 
for line in lines:
    for word in line:
        if word == "are":
            n_are += 1

n_are

1

In [52]:
n_Safe = 0 
for line in lines:
    for word in line:
        if word == "Safe":
            n_Safe += 1

n_Safe

1

In [53]:
n_to = 0 
for line in lines:
    for word in line:
        if word == "to":
            n_to += 1

n_to

2

In [54]:
n_pleasures = 0 
for line in lines:
    for word in line:
        if word == "pleasures":
            n_pleasures += 1

n_pleasures

1

In [55]:
n_of = 0 
for line in lines:
    for word in line:
        if word == "of":
            n_of += 1

n_of

2

In [56]:
n_native = 0 
for line in lines:
    for word in line:
        if word == "native":
            n_native += 1

n_native

1

In [57]:
n_shore = 0 
for line in lines:
    for word in line:
        if word == "shore":
            n_shore += 1

n_shore

1

In [58]:
n_But = 0 
for line in lines:
    for word in line:
        if word == "But":
            n_But+= 1

n_But

1

In [59]:
n_oh = 0 
for line in lines:
    for word in line:
        if word == "oh":
            n_oh += 1

n_oh

1

In [60]:
n_relieve = 0 
for line in lines:
    for word in line:
        if word == "relieve":
            n_relieve += 1

n_relieve 

1

In [61]:
n_a = 0 
for line in lines:
    for word in line:
        if word == "a":
            n_a += 1

n_a

1

In [62]:
n_wretched = 0 
for line in lines:
    for word in line:
        if word == "wretched":
            n_wretched += 1

n_wretched

1

In [63]:
n_parent = 0 
for line in lines:
    for word in line:
        if word == "parent":
            n_parent += 1

n_parent

1

In [64]:
n_pain = 0 
for line in lines:
    for word in line:
        if word == "pain":
            n_pain += 1

n_pain

1

In [65]:
n_give = 0 
for line in lines:
    for word in line:
        if word == "give":
            n_give += 1

n_give

1

In [67]:
n_Chryseis = 0 
for line in lines:
    for word in line:
        if word == "Chryseïs":
            n_Chryseis += 1

n_Chryseis

1

In [69]:
n_these = 0 
for line in lines:
    for word in line:
        if word == "these":
            n_these += 1

n_these 

1

In [70]:
n_arms = 0 
for line in lines:
    for word in line:
        if word == "arms":
            n_arms += 1

n_arms

1

In [71]:
n_again = 0 
for line in lines:
    for word in line:
        if word == "again":
            n_again += 1

n_again

1

In [72]:
n_If = 0 
for line in lines:
    for word in line:
        if word == "If":
            n_If += 1

n_If

1

In [73]:
n_mercy = 0 
for line in lines:
    for word in line:
        if word == "mercy":
            n_mercy += 1

n_mercy

1

In [74]:
n_fail = 0 
for line in lines:
    for word in line:
        if word == "fail":
            n_fail += 1

n_fail

1

In [76]:
n_yet = 0 
for line in lines:
    for word in line:
        if word == "yet":
            n_yet+= 1

n_yet

1

In [79]:
n_let = 0 
for line in lines:
    for word in line:
        if word == "let":
            n_let += 1

n_let

1

In [82]:
n_my = 0 
for line in lines:
    for word in line:
        if word == "my":
            n_my+= 1

n_my

1

In [83]:
n_presents = 0 
for line in lines:
    for word in line:
        if word == "presents":
            n_presents += 1

n_presents

1

In [84]:
n_move = 0 
for line in lines:
    for word in line:
        if word == "move":
            n_move += 1

n_move

1

In [85]:
n_dread = 0 
for line in lines:
    for word in line:
        if word == "dread":
            n_dread += 1

n_dread

1

In [86]:
n_avenging = 0 
for line in lines:
    for word in line:
        if word == "avenging":
            n_avenging += 1

n_avenging

1

In [87]:
n_Phœbus = 0 
for line in lines:
    for word in line:
        if word == "Phœbus":
            n_Phœbus += 1

n_Phœbus

1

In [88]:
n_son = 0 
for line in lines:
    for word in line:
        if word == "son":
            n_son += 1

n_son

1