In [1]:
import re

The essential functionality is first compiling a pattern using `re.compile()` and then using methods such as `match()` (only finds a match at the beginning of the sentence if exits), search() (finds a match anywhere in the sentence but outputs the first match found only), `findall()` (finds all the matches and returns the list), or `finditer()` (returns a generator of all the matches) which can be directly used in a for loop.

In [83]:
text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
pat = re.compile(r'(\d+)/(\d+)/(\d+)')

for m, d, y in re.findall(pat, text):
    print(m, d,y)

11 27 2012
3 13 2013


In [86]:
for date in re.finditer(pat, text):
    print(date.group())

11/27/2012
3/13/2013


You have a string that you want to parse left to right into a stream of tokens

Input : text = 'foo = 23 + 42 * 10'

Output: tokens = [('NAME', 'foo'), ('EQ','='), ('NUM', '23'), ('PLUS','+'),
('NUM', '42'), ('TIMES',
'*'), ('NUM', 10')]

In [57]:
text = 'foo = 23 + 42 * 10'

Use regular expression patterns using named capture groups:

In [58]:
NAME = r'(?P<NAME>[a-zA-Z][a-zA-Z_0-9]*)'
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
TIMES = r'(?P<TIMES>\*)'
EQ = r'(?P<EQ>=)'
# WS = r'(?P<WS>\s+)'

pat = re.compile('|'.join([NAME, NUM, PLUS, TIMES, EQ]))
pat

re.compile(r'(?P<NAME>[a-zA-Z][a-zA-Z_0-9]*)|(?P<NUM>\d+)|(?P<PLUS>\+)|(?P<TIMES>\*)|(?P<EQ>=)',
           re.UNICODE)

In [59]:
for m in re.finditer(pat, text):
    print(m.lastgroup, ':', m.group(m.lastgroup))

NAME : foo
EQ : =
NUM : 23
PLUS : +
NUM : 42
TIMES : *
NUM : 10


In [63]:
from collections import namedtuple

def generate_tokens(pat, text):
    Token = namedtuple('Token', ['type','value'])
    for m in re.finditer(pat, text):
        yield Token(m.lastgroup, m.group())

In [64]:
for tok in generate_tokens(pat, text):
    print(tok)

Token(type='NAME', value='foo')
Token(type='EQ', value='=')
Token(type='NUM', value='23')
Token(type='PLUS', value='+')
Token(type='NUM', value='42')
Token(type='TIMES', value='*')
Token(type='NUM', value='10')


### Align text:

In [74]:
text = "Hello World"
text.ljust(40)

'Hello World                             '

In [75]:
text.center(40)

'              Hello World               '

In [76]:
print(f"{text:<40}")

Hello World                             


In [77]:
print(f"{text:>40}")

                             Hello World


In [70]:
print(f"{text:^40}")

              Hello World               


In [93]:
num = 1234566
print(f"{num:,}")

1,234,566


In [82]:
text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
re.sub(r'(\d+)/(\d+)/(\d+)', r'\3-\1-\2', text)


'Today is 2012-11-27. PyCon starts 2013-3-13.'

In [98]:
sorted_l1, sorted_l2 = [1,6,11], [5,8,10,12]


def merge_it(sorted_l1, sorted_l2):
    res = []
    i , j = 0, 0
    while i < len(sorted_l1) and j<len(sorted_l2):
        if sorted_l1[i] < sorted_l2[j]:
            res.append(sorted_l1[i])
            i += 1
        elif sorted_l1[i] > sorted_l2[j]:
            res.append(sorted_l2[j])
            j += 1
        else:
            res.append(sorted_l1[i])
            i += 1
            j += 1

    if i < len(sorted_l1):
        res.extend(sorted_l1[i:])
    if j < len(sorted_l2):
        res.extend(sorted_l2[j:])

    return res 



In [104]:
%%timeit
merge_it(sorted_l1, sorted_l2)

4.02 μs ± 54.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [102]:
import heapq

def merge_it2(sorted_l1, sorted_l2):
    return list(heapq.merge(sorted_l1, sorted_l2))
    

In [103]:
%%timeit
merge_it2(sorted_l1, sorted_l2)

8.53 μs ± 436 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


### Logging

In [1]:
import logging

In [4]:
def main():
    # Change Python session if you change config setting for new changes to take effect
    logging.basicConfig(
        filename='/project/log/app.log',
        level=logging.WARNING,
        format='%(levelname)s:%(asctime)s:%(message)s'
    )
    # Variables (to make the calls that follow work)
    hostname = 'www.python.org'
    item = 'spam'
    filename = 'data.csv'
    mode = 'r'

    # Example logging calls (insert into your program)
    logging.critical('Host %s unknown', hostname)
    logging.error("Couldn't find %r" , item)
    logging.warning('Feature is deprecated')
    logging.info('Opening file %r, mode=%r', filename, mode)
    logging.debug('Got here')


main()

You would like to add a logging capability to a library, but don’t want it to interfere with programs that don’t use logging. For libraries that want to perform logging, you should create a dedicated logger object, and initially configure it

```
# somelib.py
import logging
log = logging.getLogger(__name__)
log.addHandler(logging.NullHandler())
# Example function (for testing)
def func():
    log.critical('A Critical Error!')
    log.debug('A debug message')
```
