# Python Regular Expression (regex)
A [regular expression](https://en.wikipedia.org/wiki/Regular_expression) (sometimes called a rational expression) is a sequence of characters that define a search pattern.
## String searching

In [1]:
ptn1 = "sick"
ptn2 = "pick"
ptn3 = "nick"
string = "6 sick hicks nick\nsix slick bricks with sticks."
print(string)

# Matching String
print(ptn1 in string)
print(ptn2 in string)

6 sick hicks nick
six slick bricks with sticks.
True
False


## Regular Expression
### Simple search

In [2]:
# Regular Expression
import re

## pattern matching
print(re.search(ptn1, string))
print(re.search(ptn2, string))

<re.Match object; span=(2, 6), match='sick'>
None


### Multiple patterns search

In [3]:
## multiple patterns matching ("sick" or "pick")
ptn = r"[sp]ick" # r outside "" stands for regexp, followed by the expressoin in the quotes 
print(re.search(ptn, string))

<re.Match object; span=(2, 6), match='sick'>


### More patterns

In [4]:
## more patterns
print(re.search(r"[a-z]ick", string))
print(re.search(r"[A-Z]ick", string))
print(re.search(r"[0-9a-z]ick", string))

<re.Match object; span=(2, 6), match='sick'>
None
<re.Match object; span=(2, 6), match='sick'>


### Digit search

In [5]:
## matching digit
### \d: any decimal digit
print(re.search(r"\d", string))
### \D: any non-decimal digit
print(re.search(r"\Dick", string))

<re.Match object; span=(0, 1), match='6'>
<re.Match object; span=(2, 6), match='sick'>


### Space search

In [6]:
## matching space
### \s: any white space [\t\n\r\f\v]
print(re.search(r"\s", string))
### \S: opposite to \s, any non-white space
print(re.search(r"\S", string))

<re.Match object; span=(1, 2), match=' '>
<re.Match object; span=(0, 1), match='6'>


### Digit, Alphabet and "_"

In [7]:
## matching any digit, alphabet and "_"
### \w: includes [a-zA-Z0-9_]
print(re.search(r"\wick", string))
### \W: anything other than [a-ZA-Z0-9_]
print(re.search(r"\Wick", string))
print(re.search(r"\Wsick", string))

<re.Match object; span=(2, 6), match='sick'>
None
<re.Match object; span=(1, 6), match=' sick'>


### Empty string

In [8]:
## matching empty string 
### \b: empty string (only at the start or end of the word)
print(re.search(r"\bsick\b", string))
print(re.search(r"\bhick\b", string))
### \B: empty string (but not at the start or end of a word)
print(re.search(r"\B sick \B", string))
print(re.search(r"\B sick \B", "  sick  "))

<re.Match object; span=(2, 6), match='sick'>
None
None
<re.Match object; span=(1, 7), match=' sick '>


### Special Character

In [9]:
## matching special character
### \\: match \
print(re.search(r"\\", string))
print(re.search(r"\\", "\\n."))
### .: match anything except \n
print(re.search(r".ick", string))
print(re.search(r"\.", "\\n."))

None
<re.Match object; span=(0, 1), match='\\'>
<re.Match object; span=(2, 6), match='sick'>
<re.Match object; span=(2, 3), match='.'>


### Start and end of line

In [10]:
## match line beginning or ending
### ^: match line beginning
print(re.search(r"^6", string))
print(re.search(r"^six", string))
### $: match line ending
print(re.search(r"\.$", string))

<re.Match object; span=(0, 1), match='6'>
None
<re.Match object; span=(46, 47), match='.'>


### Multpile Line search

In [11]:
## multiple line match
print(re.search(r"^six", string))
print(re.search(r"^six", string, flags=re.M))

None
<re.Match object; span=(18, 21), match='six'>


## Quantifiers
### 0 or 1

In [12]:
## ?: may or may not occur
print(re.search(r"s(l)?ick", string))
print(re.search(r"s(l)?ick", string[10:]))

<re.Match object; span=(2, 6), match='sick'>
<re.Match object; span=(12, 17), match='slick'>


### 0 or more

In [13]:
## occur 0 or more times
print(re.search(r"ab*", "a"))
print(re.search(r"ab*", "abbbb"))

<re.Match object; span=(0, 1), match='a'>
<re.Match object; span=(0, 5), match='abbbb'>


### 1 or more

In [14]:
## + : occur 1 or more times
print(re.search(r"ab+", "a"))                       
print(re.search(r"ab+", "abbbbb"))

None
<re.Match object; span=(0, 6), match='abbbbb'>


### n to m times

In [15]:
## {n, m} : occur n to m times
print(re.search(r"ab{2,10}", "a"))                  
print(re.search(r"ab{2,10}", "abbbbb"))
print(re.search(r"ab{8,10}", "abbbbb"))

None
<re.Match object; span=(0, 6), match='abbbbb'>
None


## Range
### group

In [16]:
## group
match = re.search(r"(\d+), Date: (.+)", "ID: 021523, Date: Feb/12/2017")
# () helps group the matched string to different groups
print(match.group())                                
print(match.group(1))                               
print(match.group(2))

021523, Date: Feb/12/2017
021523
Feb/12/2017


### a or b

In [17]:
## | : or
print(re.findall(r"(s|h)ick", string))
print(re.findall(r"(sick|hick)", string))

['s', 'h']
['sick', 'hick']


## Find all

In [18]:
## findall
print(re.findall(r".ick", string))

['sick', 'hick', 'nick', 'lick', 'rick', 'tick']


## Replace

In [19]:
## re.sub() replace
print(re.sub(r"6", "six", string))

six sick hicks nick
six slick bricks with sticks.


## Split

In [20]:
## re.split()
print(re.split("[.\s]", string))

['6', 'sick', 'hicks', 'nick', 'six', 'slick', 'bricks', 'with', 'sticks', '']


## Compile

In [21]:
## compile
compiled_re = re.compile(r"[psn]ick")
print(compiled_re.search(string))
print(compiled_re.findall(string))

<re.Match object; span=(2, 6), match='sick'>
['sick', 'nick']


__You can find more on this [cheat sheet](http://www.cbs.dtu.dk/courses/27610/regular-expressions-cheat-sheet-v2.pdf)__