# Imports

In [1]:
import re
import pandas

pandas.options.display.max_colwidth = 5000
pandas.options.display.max_columns = 10
pandas.options.display.max_rows = 200

# Legend

In [87]:
data = [
    ['<b>.</b>', 'match any single character (letter, digit, whitespace, everything), except . itself. - <b>needs ecaping with \</b>'],
    ['<b>\.</b>', 'match any single character (letter, digit, whitespace, everything) and the dot'],
    ['<b>\d</b>', 'match any digit'],
    ['<b>\d+</b>', 'string has at least one digit'],
    ['<b>\d\d\d</b>', 'exactly any three digits'],
    ['<b>[cmf]</b>', 'match only c, m, or f and nothing else'],
    ['<b>[^cmf]</b>', 'match any except c, m, or f'],
    ['<b>[0-6]</b>', 'match any digit from 0 to 6'],
    ['<b>[^a-c]</b>', 'match any character except from a to c'],
    ['<b>\w or [A-Za-z0-9_]</b>', 'match any characters or digits, no special characters'],
    ['<b>ab?c</b>', 'match either "abc" or "ac" because b is optional, except ? - <b>needs escaping with \</b>'],
    ['<b>\' \'</b>', 'match space'],
    ['<b>\\r</b>', 'match carriage return'],
    ['<b>\\t</b>', 'match tab'],
    ['<b>\\n</b>', 'match new line'],
    ['<b>\s</b>', 'match any whitespace character'],
    ['<b>^Today</b>', 'match string beginning with Today'],
    ['<b>;$</b>', 'ends with a semicolon'],
]

legend = pandas.DataFrame(data, columns=['Filter', 'Explanation'], index = None)
legend.style.hide_index()

Filter,Explanation
.,"match any single character (letter, digit, whitespace, everything), except . itself. Needs ecaping with \"
\.,"match any single character (letter, digit, whitespace, everything) and the dot"
\d,match any digit
\d+,string has at least one digit
\d\d\d,exactly any three digits
[cmf],"match only c, m, or f and nothing else"
[^cmf],"match any except c, m, or f"
[0-6],match any digit from 0 to 6
[^a-c],match any character except from a to c
\w or [A-Za-z0-9_],"match any characters or digits, no special characters"


# Functions

In [54]:
def testMatch(result):
    if result is None:
        print('Not a match.')
    else:
        print('Found a match at start: ' + str(result.start()) + ' | end: ' + str(result.end()))

# Basic match 1

In [55]:
text = 'abcdefg'
match = 'b'

testMatch(re.search(match, text))

Found a match at start: 1 | end: 2


# Find and gather all matching digits | \d

In [5]:
text = 'adsf23gfd'
match = '\d'

re.findall(match, text)

['2', '3']

# Multiple matched string locations

In [6]:
text = 'Now I see trees everywhere.'

print("Text: " + text + "\n")

for match in re.finditer('r', text):
    print("Letter \'e\' found at start: " + str(match.start()) + " ending at: " + str(match.end()))

for match in re.finditer('h', text):
    print("Letter \'h\' found at start: " + str(match.start()) + " ending at: " + str(match.end()))

Text: Now I see trees everywhere.

Letter 'e' found at start: 11 ending at: 12
Letter 'e' found at start: 19 ending at: 20
Letter 'e' found at start: 24 ending at: 25
Letter 'h' found at start: 22 ending at: 23


# Match anything except... | [^Now]

In [7]:
text = 'Now I see trees everywhere.'

testMatch(re.search('[^Now]', text))

Found a match at start: 3 | end: 4


In [8]:
re.findall('[^ Nowe]', text)

['I', 's', 't', 'r', 's', 'v', 'r', 'y', 'h', 'r', '.']

# Match any characters or digits| \w or [A-Za-z0-9_]

In [9]:
text = '68 hens \'n a lil piggeh in a yard kod dida Čake. Additionalz: Đ0_324$#'

testMatch(re.search('\w', text))

Found a match at start: 0 | end: 1


In [10]:
testMatch(re.search('[A-Za-z0-9_]', text))

Found a match at start: 0 | end: 1


In [11]:
text = '\.#$$ non chars muahahaha'
testMatch(re.search('\w', text))

Found a match at start: 6 | end: 7


###### Exactly three of such

In [12]:
text = '\.#$$ n$on$ ch$ars muahahaha'
testMatch(re.search('\w\w\w', text))

Found a match at start: 15 | end: 18


###### Another way

In [13]:
testMatch(re.search('\w{3}', text))

Found a match at start: 15 | end: 18


# Match repetitions - amount from range

In [21]:
text = 'the mexican dude went all jajajajaja'
expression = 'l{2,3}'

testMatch(re.search(expression, text))

Found a match at start: 24 | end: 26


# Check for any amount of digit presence

In [57]:
text = 'I\'m 28 years and 4 months old.'
expression = '\d+'

testMatch(re.search(expression, text))

Found a match at start: 4 | end: 6


##### Find all matching results

In [45]:
re.findall(expression, text)

['28', '4']