# Python Intermediate

![python](https://upload.wikimedia.org/wikipedia/commons/c/c3/Python-logo-notext.svg)

![one step](/files/images/onestep.jpg)

## Functional programming

![lambda](/files/images/lambda.png)

>it’s almost certainly ture that functional programming is the next big thing

--- Uncle Bob Martin

In [None]:
# use function as parameter
def get_visit_ips(file_path, callback):
    with open(file_pth) as fp:
        return [callback(line) for line in fp]

In [None]:
# return function
def cache(func):
    cached = {}
    def _func(attr, *args, **kwargs):
        if attr not in cached:
            cached[attr] = func(attr, *args, **kwargs)
        return cached[attr]
    
    return _func
        

In [None]:
# lambda
lambda : True

In [None]:
lambda x: x ** 2

In [None]:
lambda x, y: x + y

In [None]:
# map
map(lambda x: x ** 2, range(10))

In [None]:
# reduce
from functools import reduce

reduce(lambda x, y: x + y, range(10))

In [None]:
# filter
filter(lambda x: x % 2 == 0, range(10))

### Decorator

![Telecom Tree](/files/images/telecomtree.jpg)

In [None]:
class MyDict(dict):
    @property
    def max(self):
        return max(self.values())

MyDict(a=1, b=2, c=3).max

In [None]:
# cache wrapper
def cache(func):
    cached = {}
    def _f(*args):
        if args not in cached:
            result = func(*args)
            cached[args] = result
        else:
            print('cache hint!')
        return cached[args]
    return _f

@cache
def sum_(*args):
    return sum(args)

print(sum_(1,2,3))
print(sum_(1,2,3))

### One more step

In [None]:
# cache wrapper for function
import time

def cache(timeout):
    def _wrapped(func):
        cached_start = {}
        cached = {}
        def _f(*args):
            if args not in cached or ((time.time() - cached_start[args]) > timeout):
                result = func(*args)
                cached[args] = result
                cached_start[args] = time.time()
            else:
                print('cache hint!')
            return cached[args]
        return _f
    return _wrapped

@cache(2)
def sum_(*nums):
    return sum(nums)

print(sum_(1,2,3))
print(sum_(1,2,3))
from time import sleep
sleep(2)
print(sum_(1,2,3))

### functools

>Tools for working with functions and callable objects

In [None]:
# functools.partial
import functools

def echo(name, city, country):
    print(f'{name} live in {city}, {country}')
    
fn = functools.partial(echo, city='Hangzhou', country='China')

fn('Tom and Jerry')

In [None]:
# functools.wraps
import functools

def before_deco(f):
    @functools.wraps(f)
    def wrapper(*args, **kwargs):
        print('before')
        return f(*args, **kwargs)
    
    return wrapper

@before_deco
def test(name):
    print(name)

print(test.__name__)
test('hello')

### Practice

In [None]:
# implement a to_int function, that convert hex bytes data to integer
# eg:
#     to_int(b'\xef')  ==> 239
#     to_int(b'\xef\x01')  ==> 61185
# NOTE: builtin function ord can return the integer ordinal of a one-character string


## Unit Testing

In [None]:
# Unit Testing
import unittest

def to_int(data):
    return 1

class TestToInt(unittest.TestCase):
    def test_to_int_with_one_char_string(self):
        self.assertEqual(to_int(b'\x01'), 1)
    
    def test_to_int_with_two_chars_string(self):
        self.assertEqual(to_int(b'\xef\x01'), 61185)

suite = unittest.TestLoader().loadTestsFromTestCase(TestToInt)
unittest.TextTestRunner().run(suite)

In [None]:
# mock
import time

def delay_print(msg, delay):
    time.sleep(delay)
    print(msg)
    
import unittest

time.sleep = lambda x: True

class TestDelayPrint(unittest.TestCase):
    def test_delay_print_empty_string(self):
        delay_print('', 5)
            
suite = unittest.TestLoader().loadTestsFromTestCase(TestDelayPrint)
unittest.TextTestRunner().run(suite)

### Write UT friendly code

```python
from .rpc import Server

class Master(object):
    def __init__(self, port):
        self._server = Server(port)
    
    # ...
        
master = Master(12234)
```

```python
from .rpc import Server

class Master(object):
    def __init__(self, server):
        self._server = server
    
    # ...
    
master = Master(Server(12234))
```

### Practice

> write UT for to_int function

## Introspection

![introspection](/files/images/introspection.jpg)

In [None]:
# dir
import os

print(dir(os))

In [None]:
# type
s = 'hello world'

print(type(s))

> Everything is an object in Python.

In [None]:
a = 5
print(type(a))
print(type(type(a)))

In [None]:
# hasattr
class DynamicObject(object):
    def __getattr__(self, attr):
        if attr in ('a', 'b', 'c'):
            return attr.upper()
        raise AttributeError

obj = DynamicObject()

print(hasattr(obj, 'a'))
print(hasattr(obj, 'd'))


### Practice

In [None]:
# 1. implement a bash wrapper, so that I can call bash command like a class attribute
#
#     bash = BashWrapper()
#     bash.ping('10.69.69.124')
#     bash.ls('-l', '~')
#
# Write your code here

In [None]:
# one more step...
# 2. what about a bash wrapper module, so that I can call bash command like a module import
#     from bashwrapper import ping
#     ping('10.69.69.124')
#     from bashwrapper import ls
#     ls('-l', '~')

## Parallel Handling

![run](/files/images/run.png)

In [None]:
# fetch content size from a series of web sites
import urllib

urls = ['https://www.baidu.com',
        'https://www.google.com',
        'https://www.facebook.com']

for url in urls:
    print(f'{url} - {len(urllib.request.urlopen(url).read())}')

In [None]:
# introduce thread
from threading import Thread
import urllib

urls = ['https://www.baidu.com',
        'https://www.google.com',
        'https://www.facebook.com']

class UrlFetchThread(Thread):
    def __init__(self, url, *args):
        super(UrlFetchThread, self).__init__(*args)
        self._url = url
        
    def run(self):
        print(f'{self._url} - {len(urllib.request.urlopen(self._url).read())}')
        
threads = map(UrlFetchThread, urls)
for t in threads:
    t.start()
    t.join()

In [None]:
# introduce multi process
from multiprocessing import Process
import urllib

urls = ['https://www.baidu.com',
        'https://www.google.com',
        'https://www.facebook.com']

class UrlFetchProcess(Process):
    def __init__(self, url, *args):
        super(UrlFetchProcess, self).__init__(*args)
        self._url = url
        
    def run(self):
        print(f'{self._url} - {len(urllib.request.urlopen(self._url).read())}')
        
processes = map(UrlFetchProcess, urls)
for p in processes:
    p.start()
    p.join()

In [None]:
# use Pool
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool

urls = ['https://www.baidu.com',
        'https://www.google.com',
        'https://www.facebook.com']

def fetch_content(url):
    print(f'{url} - {len(urllib.request.urlopen(url).read())}')
    
pool = Pool()
pool.map(fetch_content, urls)
pool.close()
pool.join()

# -----------------------------------------
thread_pool = ThreadPool()
thread_pool.map(fetch_content, urls)
thread_pool.close()
thread_pool.join()

In [None]:
# introduce aiohttp
import aiohttp

urls = ['https://www.baidu.com',
        'https://www.google.com',
        'https://www.facebook.com']

async def fetch_content(session, url):
    async with session.get(url) as response:
        length = len(await response.text())
        print(f'{url} - {length}')
    
async def main():
    async with aiohttp.ClientSession() as session:
        await asyncio.gather(*[fetch_content(session, url) for url in urls])

asyncio.run(main())

In [None]:
# Queue
from multiprocessing import Process, Queue

def f(q, num):
    q.put([num, None, 'hello'])

if __name__ == '__main__':
    q = Queue()
    p1 = Process(target=f, args=(q,12))
    p2 = Process(target=f, args=(q,24))
    p1.start()
    p2.start()
    print(q.get())
    print(q.get())
    p1.join()
    p2.join()

### Practice

In [None]:
# (after class) implement a FAST ftp downloder 
# eg: download ftp://hztdltev01.china.nsn-net.net/esa_data/


## Web Development

![Python Web](/files/images/pyweb.png)

In [None]:
# BaseHTTPServer and SimpleHTTPServer
# python -m SimpleHTTPServer 8080
from http.server import HTTPServer, SimpleHTTPRequestHandler

server = HTTPServer(('0.0.0.0', 8282), SimpleHTTPRequestHandler)

server.serve_forever()

In [None]:
# with micro framework bottle.py
from bottle import route, run, template

@route('/hello/<name>')
def index(name):
    return template('<b>Hello {{name}}</b>!', name=name)

run(host='localhost', port=8181)

### Practice

In [None]:
# implement a simple REST service for user operation using Python, 
#   you can store the user info into memory, DB or files.
#   the return data should be in JSON format, 
#     GET/POST/DELETE method should be supported
# Example:
#     GET  /api/users  ==> 
#         ['tom', 'jerry', 'lily']
#     POST  /api/users  ['james', 'terry']  ==>  
#         ['tom', 'jerry', 'lily', 'james', 'terry']
#     DELETE  /api/users/james  ==>  
#         ['tom', 'jerry', 'lily', 'terry']
from bottle import get, post, delete, response, run
import json

@get('/api/users')
def users():
    response.set_header('Content-Type', 'application/json')
    return json.dumps(['tom', 'jerry', 'lily'])

run(host='localhost', port=11111)

## A byte of pandas

![pandas](/files/images/panda.jpg)

In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
# a simplest example
from pandas import DataFrame
data = DataFrame([1,2,3,4,5,6,7,8,9,10])
data.plot()

In [None]:
# read data from nginx access log
from pandas import DataFrame
import re

_ptn = re.compile(r'(\S+) \S+ \S+ \[(.+)\] "(\S+) (\S+) (\S+)" (\d+) (\d+) ".*" "(.*)"')
def _parse_line(line):
    return _ptn.search(line).groups()

with open('access_10000.log') as f:
    data = DataFrame(
        map(_parse_line, f),
        columns=['src', 'date', 'method', 'dst', 'protocol', 'code', 'length', 'agent'])

In [None]:
# using data
data # all columns and rows
data[:10] # first 10 rows
data['src'] # only src column
data[['src', 'agent']] # src and agent columns
data[data['code'].astype(int) >= 400] # code larger than 400 records

In [None]:
# groupby
data.groupby('src').size().sort_values(ascending=False)[:10] # top 10 visitors

In [None]:
# or
data['src'].value_counts()[:10] # top 10 visitors

In [None]:
# use time series
# 1. convert string date to datetime
from dateutil.parser import parse

def convert_date(raw_date):
    return parse(raw_date.replace(':', ' ', 1))

data['date'] = data.date.apply(convert_date)

In [None]:
# 2. datetime series
from pandas import Series
from datetime import datetime

dates = [
    datetime(2011, 1, 2), 
    datetime(2011, 1, 2), 
    datetime(2011, 1, 4), 
    datetime(2011, 1, 4), 
    datetime(2011, 1, 5), 
    datetime(2011, 1, 6)]
ts = Series(np.random.randn(6), index=dates)
print(ts)
ts['2011/1/2']

In [None]:
# groupby with timeseries, and plot
df = data.groupby('date').size()
df.cumsum().plot(figsize=(12, 6))
# df.plot(figsize=(12,6), style='o')

### Practice

In [None]:
# based on access_10000.log,
# 1. list the top 5 web browsers
# 2. plot it

## Reference

* http://www.diveintopython.net/power_of_introspection/
* https://docs.python.org/3/library/inspect.html
* https://docs.python.org/3/howto/functional.html
* https://en.wikipedia.org/wiki/Functional_programming
* http://butunclebob.com/files/downloads/Prime%20Factors%20Kata.ppt
* https://blog.8thlight.com/uncle-bob/2013/05/27/TheTransformationPriorityPremise.html
* https://wiki.python.org/moin/Generators
* https://docs.python.org/3/library/threading.html
* https://docs.python.org/3/library/multiprocessing.html
* http://bottlepy.org/docs/dev/index.html
* https://docs.python.org/3/library/http.html

## Q & A