---
title: "Regular Expression By Example"
description: ""
tags: python, data_wrangling
URL: https://github.com/chrisalbon/notes
Licence: 
Creator: 
Meta: 

---

 <div>
    	<img src="./coco.png" style="float: left;height: 55px">
    	<div style="height: 75px;text-align: center; padding-top:5px">
        <h1>
      	Regular Expression By Example
        </h1>
        <p></p>
    	</div>
		</div> 

In [1]:
# Import regex
import re

In [2]:
# Create some data
text = 'A flock of 120 quick brown foxes jumped over 30 lazy brown, bears.'

### ^ Matches beginning of line.

In [3]:
re.findall('^A', text)

['A']

### $ Matches end of line.

In [4]:
re.findall('bears.$', text)

['bears.']

### . Matches any single character except newline.

In [5]:
re.findall('f..es', text)

['foxes']

### `[...]` Matches any single character in brackets.

In [6]:
# Find all vowels
re.findall('[aeiou]', text)

['o', 'o', 'u', 'i', 'o', 'o', 'e', 'u', 'e', 'o', 'e', 'a', 'o', 'e', 'a']

### `[# ^...]` Matches any single character not in brackets

In [7]:
# Find all characters that are not lower-case vowels
re.findall('[^aeiou]', text)

['A',
 ' ',
 'f',
 'l',
 'c',
 'k',
 ' ',
 'f',
 ' ',
 '1',
 '2',
 '0',
 ' ',
 'q',
 'c',
 'k',
 ' ',
 'b',
 'r',
 'w',
 'n',
 ' ',
 'f',
 'x',
 's',
 ' ',
 'j',
 'm',
 'p',
 'd',
 ' ',
 'v',
 'r',
 ' ',
 '3',
 '0',
 ' ',
 'l',
 'z',
 'y',
 ' ',
 'b',
 'r',
 'w',
 'n',
 ',',
 ' ',
 'b',
 'r',
 's',
 '.']

### `a | b` Matches either a or b.

In [8]:
re.findall('a|A', text)

['A', 'a', 'a']

### `(re)` Groups regular expressions and remembers matched text.

In [9]:
# Find any instance of 'fox'
re.findall('(foxes)', text)

['foxes']

### `\w` Matches word characters.

In [10]:
# Break up string into five character blocks
re.findall('\w\w\w\w\w', text)

['flock', 'quick', 'brown', 'foxes', 'jumpe', 'brown', 'bears']

### `\W` Matches nonword characters.

In [11]:
re.findall('\W\W', text)

[', ']

### `\s` Matches whitespace. Equivalent to `[\t\n\r\f]`.

In [12]:
re.findall('\s', text)

[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']

### `\S` Matches nonwhitespace.

In [13]:
re.findall('\S\S', text)

['fl',
 'oc',
 'of',
 '12',
 'qu',
 'ic',
 'br',
 'ow',
 'fo',
 'xe',
 'ju',
 'mp',
 'ed',
 'ov',
 'er',
 '30',
 'la',
 'zy',
 'br',
 'ow',
 'n,',
 'be',
 'ar',
 's.']

### `\d` Matches digits. Equivalent to `[0-9]`.

In [14]:
re.findall('\d\d\d', text)

['120']

### `\D` Matches nondigits.

In [15]:
re.findall('\D\D\D\D\D', text)

['A flo',
 'ck of',
 ' quic',
 'k bro',
 'wn fo',
 'xes j',
 'umped',
 ' over',
 ' lazy',
 ' brow',
 'n, be']

### `\A` Matches beginning of string.

In [16]:
re.findall('\AA', text)

['A']

### `\Z` Matches end of string. If a newline exists, it matches just before newline.

In [17]:
re.findall('bears.\Z', text)

['bears.']

### `\b` Matches end of string.

In [19]:
re.findall('\b[foxes]', text)

[]

### `\n`, `\t`, etc. Matches newlines, carriage returns, tabs, etc.

In [20]:
re.findall('\n', text)

[]

### `[Pp]ython` Match "Python" or "python"

In [21]:
re.findall('[Ff]oxes', 'foxes Foxes Doxes')

['foxes', 'Foxes']

### `[0-9]` Match any digit; same as `[0123456789]`

In [22]:
re.findall('[Ff]oxes', 'foxes Foxes Doxes')

['foxes', 'Foxes']

### `[a-z]` Match any lowercase ASCII letter

In [23]:
re.findall('[a-z]', 'foxes Foxes')

['f', 'o', 'x', 'e', 's', 'o', 'x', 'e', 's']

### `[A-Z]` Match any uppercase ASCII letter

In [24]:
re.findall('[A-Z]', 'foxes Foxes')

['F']

### `[a-zA-Z0-9]` Match any of the above

In [25]:
re.findall('[a-zA-Z0-9]', 'foxes Foxes')

['f', 'o', 'x', 'e', 's', 'F', 'o', 'x', 'e', 's']

### `[^aeiou]` Match anything other than a lowercase vowel

In [26]:
re.findall('[^aeiou]', 'foxes Foxes')

['f', 'x', 's', ' ', 'F', 'x', 's']

### `[^0-9]` Match anything other than a digit

In [27]:
re.findall('[^0-9]', 'foxes Foxes')

['f', 'o', 'x', 'e', 's', ' ', 'F', 'o', 'x', 'e', 's']

### `ruby?` Match "rub" or "ruby": the y is optional

In [28]:
re.findall('foxes?', 'foxes Foxes')

['foxes']

### `ruby*` Match "rub" plus 0 or more ys

In [29]:
re.findall('ox*', 'foxes Foxes')

['ox', 'ox']

### `ruby+` Match "rub" plus 1 or more ys

In [30]:
re.findall('ox+', 'foxes Foxes')

['ox', 'ox']

### `\d{3}` Match exactly 3 digits

In [31]:
re.findall('\d{3}', text)

['120']

### `\d{3,}` Match 3 or more digits

In [32]:
re.findall('\d{2,}', text)

['120', '30']

### `\d{3,5}` Match 3, 4, or 5 digits

In [33]:
re.findall('\d{2,3}', text)

['120', '30']

### `^Python` Match "Python" at the start of a string or internal line

In [34]:
re.findall('^A', text)

['A']

### `Python$` Match "Python" at the end of a string or line

In [35]:
re.findall('bears.$', text)

['bears.']

### `\APython` Match "Python" at the start of a string

In [36]:
re.findall('\AA', text)

['A']

### `Python\Z` Match "Python" at the end of a string

In [37]:
re.findall('bears.\Z', text)

['bears.']

### `Python(?=!)` Match "Python", if followed by an exclamation point

In [38]:
re.findall('bears(?=.)', text)

['bears']

### `Python(?!!)` Match "Python", if not followed by an exclamation point

In [39]:
re.findall('foxes(?!!)', 'foxes foxes!')

['foxes']

### `python|perl` Match "python" or "perl"

In [40]:
re.findall('foxes|foxes!', 'foxes foxes!')

['foxes', 'foxes']

### `rub(y|le))` Match "ruby" or "ruble"

In [41]:
re.findall('fox(es!)', 'foxes foxes!')

['es!']

### `Python(!+|\?)` "Python" followed by one or more ! or one ?

In [42]:
re.findall('foxes(!)', 'foxes foxes!')

['!']