In [1]:
import re

**\d: any numeric digit from 0 to 9**

**\D: any character that is not a numeric digit from 0 to 9**

**\w: any letter, numeric digit, or the underscore character ( think of this as matching 'word' characters )**

**\W: any character that is not a letter, numeric digit, or the underscore character**

**\s: any space, tab, or newline character ( think of this as matching 'space' characters )**

**\S: any character that is not a space, tab, or newline**

In [2]:
lyrics = """
12 drummers drumming, 11 pipers piping, 10 lords a leaping, 9 ladies dancing, 8 maids a miling, \
7 swans a swimming, 6 geese a laying, 6 golden rings, 4 calling birds, 3 french hens, 2 turtle doves, \
and 1 partridge in a pear tree
"""

In [5]:
pattern = r'\d+\s\w+'  # a digit, followed by a space, and followed by one or more characters
xmas_regex = re.compile(pattern)

In [6]:
xmas_regex.findall(lyrics)

['12 drummers',
 '11 pipers',
 '10 lords',
 '9 ladies',
 '8 maids',
 '7 swans',
 '6 geese',
 '6 golden',
 '4 calling',
 '3 french',
 '2 turtle',
 '1 partridge']

# Making our own character classes

In [7]:
vowel_regex = re.compile(r'[aeiou]') # same as '(a|e|i|o|u)'
vowel_regex.findall('Robocop eats baby food. BABY FOOD.')

['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o']

In [8]:
vowel_regex = re.compile(r'[aeiouAEIOU]')
vowel_regex.findall('Robocop eats baby food. BABY FOOD.')

['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o', 'A', 'O', 'O']

In [9]:
double_vowel_regex = re.compile(r'[aeiouAEIOU]{2}')
double_vowel_regex.findall('Robocop eats baby food. BABY FOOD.')

['ea', 'oo', 'OO']

# use ^ inside the [] bracket to match the opposite

In [10]:
consonant_regex = re.compile(r'[^aeiouAEIOU]')
consonant_regex.findall('Robocop eats baby food. BABY FOOD.')

['R',
 'b',
 'c',
 'p',
 ' ',
 't',
 's',
 ' ',
 'b',
 'b',
 'y',
 ' ',
 'f',
 'd',
 '.',
 ' ',
 'B',
 'B',
 'Y',
 ' ',
 'F',
 'D',
 '.']