# String methods (are *not* enough)

In [2]:
dir(str)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',


In [3]:
# starting index
'123foo456'.index('foo')

3

In [5]:
'123foo456'.split('foo')

['123', '456']

In [6]:
# Remove spaces at the beginning and at the end of the string
' 123 foo 456 '.strip()

'123 foo 456'

In [10]:
# if the 'count' parameter is not specified, then all occurrences will be replaced
'How now brown cow are brown?'.replace('brown ','green-')

'How now green-cow are brown?'

# Regular expressions

In [12]:
import re
# re.search(<regex>, <str>)
s = '123foo456'
if re.search('123',s):
  print("Found a match.")
else:
  print("No match.")

Found a match.


In [52]:
# capturing matches
m = re.search('123',s)
print(m.start())
print(m.end())
print(m.span())
print(m.group())

0
3
(0, 3)
123


In [53]:
# the third parameter allows us to
## match newlines (re.DOTALL)
## ignore case (re.IGNOREALL)
## match across lines (re.MULTILINE)
## if you need multiple options, it's re.DOTALL | re.IGNORECASE
m = re.search('FOO',s)
print(m)
m = re.search('FOO',s,re.IGNORECASE | re.DOTALL)
print(m)

None
<re.Match object; span=(3, 6), match='foo'>


In [14]:
# more than one match
s = '123foo456foo789'
lst = re.findall('foo',s)
print(lst)
lst = re.finditer('foo',s)
[x for x in lst]
rs  = re.sub('foo',' ',s)
print(rs)
rs  = re.split(' ',rs)
print(rs)

['foo', 'foo']
123 456 789
['123', '456', '789']


# Let's get meta (Regular Expressions Do Much More)

The **search** function is not easy to understand.

In [50]:
import re
# pattern: starting with $ and having 1-or-more digits followed by a comma
m = re.search(r'\$((\d+,){2,}\d+)',
		"'That will be $1,000,000 he said...'")
# The entire match
print(m.group(0))
# The first parenthesized subgroup
print(m.group(1))
# The second parenthesized subgroup. If a group matches multiple times, only the last match is accessible
# as (\d+,) matches multiple times, the last match is '000,' and is returned
print(m.group(2))

$1,000,000
1,000,000
000,


In [42]:
m = re.search('\$',
		"'That will be $1,000,000 he said...'")

m.groups()

()

In [37]:
m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
m.group(0)       # The entire match
m.group(1)       # The first parenthesized subgroup.
m.group(2)       # The second parenthesized subgroup.
m.group(1, 2)    # Multiple arguments give us a tuple.

('Isaac', 'Newton')

In [54]:
import re
re.findall('\w{2,4}', 'b be bee beee')

['be', 'bee', 'beee']

# Resources

- [Python Documentation](https://docs.python.org/3/howto/regex.html)
- [Real Python: Regular Expressions 1](https://realpython.com/regex-python/)
- [Real Python: Regular Expressions 2](https://realpython.com/regex-python-part-2/)
- [Data Camp RegEx Tutorial](https://www.datacamp.com/community/tutorials/python-regular-expression-tutorial)
- [Introduction to Regex](https://medium.com/better-programming/introduction-to-regex-8c18abdd4f70)
- [Understanding RegExes in Python](https://medium.com/better-programming/introduction-to-regex-8c18abdd4f70)
- [Demystifying RegExes in Python](https://medium.com/@snk.nitin/your-guide-to-using-regular-expressions-in-python-a7908b8e4b68)
- [Python RegExes](https://medium.com/@devopslearning/python-regular-expression-8ee28d35f3a7)
- [Mastering String Methods in Python](https://towardsdatascience.com/mastering-string-methods-in-python-456174ede911)