In [5]:
# 看过了基础的python，向中级的python语法的学习
# 22 尽量用辅助类来维护程序的状态，而不要用字典和元组
from collections import namedtuple
Grade=namedtuple('Grade',('score','weight'))

class Subject(object):
    def __init__(self):
        self._grades=[]
    def report_grades(self,score,weight):
        self._grades.append(Grade(score,weight))
    def average_grade(self):
        total,total_weight=0,0
        for grade in self._grades:
            total+=grade.score*grade.weight
            total_weight+=grade.weight
        return total/total_weight
    
class Student(object):
    def __init__(self):
        self._subjects=dict()
    def subject(self,name):
        if name not in self._subjects:
            self._subjects[name]=Subject()
        return self._subjects[name]
    def average_grade(self):
        total,count=0,0
        for subject in self._subjects.values():
            total+=subject.average_grade()
            count+=1
        return total/count

class Gradebook(object):
    def __init__(self):
        self._students=dict()
        
    def student(self,name):
        if name not in self._students:
            self._students[name]=Student()
        return self._students[name]
    
book=Gradebook()
albert=book.student('Albert Einstein')
math=albert.subject('Math')
math.report_grade(80,0.10)
# ...
print(albert.average_grade())
>>>
81.5        

In [None]:
# 23 简单的借口应该接收函数，而不是类的实例

def increment_with_report(current,increments):
    add_count=0
    def missing():
        nolocal added_count # Stateful closure
        added_count+=1
        return 0
    result=defaultdict(missing,current)
    for key,amount in increments:
        result[key]+=amount
    return result,added_count

class BetterCountMissing(object):
    def __init__(self):
        self.added=0
    def __call__(self):
        self.added+=1
        return 0
    
counter=BetterCountMissing()
result=defaultDict(counter,current);
for key,amount in increments:
    result[key]+=amount
assert counter.added=2



In [1]:
from threading import Thread
import os
# 24 以@classmethod形式的多态去通用地构建对象
class InputData(object):
    def read(self):
        raise NotImplementedError
        
class PathInputData(InputData):
    def __init__(self,path):
        super().__init__()
        self.path=path
        
    def read(self):
        return open(self.path).read()
    
class Worker(object):
    def __init__(self,input_data):
        self.input_data=input_data
        self.result=None
        
    def map(self):
        raise NotImplementedError
        
    def reduce(self,other):
        raise NotImplementedError

class LineCountWorker(Worker):
    def map(self):
        data=self.input_data.read()
        self.result=data.count('\n')
        
    def reduce(self,other):
        self.result+=other.result
        
        
def create_workers(input_list):
    workers=[]
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers


def execute(workers):
    threads=[Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()
        
    first,rest= workers[0],workers[1:]
    for work in rest:
        first.reduce(worker)
    return first.result


def mapreduce(data_dir)
    inputs=generate_inputs(data_dir)
    workers=create_workers(inputs)
    return execute(workers)


from tempfile import TemporaryDirectory
 
def write_test_file(tmpdir):
    #...

with TemporaryDirectory() as tmpdir:
    write_test_files(tmpdir)
    result=mapreduce(tmpdir)
    
print('There are',result,'lines')    
    

class GenericInputData(object):
    def read(self):
        raise NotImplementedError
        
    @classmethod
    def generate_inputs(cls,config):
        raise NotImplementedError
        
class GenericInputData(object):
    def read(self):
        raise NotImplementedError
        
    @classmethod
    def generate_inputs(cls,config):
        raise NotImplementedError
        
        
class PathInputData(GenericInputData):
    def read(self):
        return open(self.path).read()
    @classmethod
    def generate_inputs(cls,config):
        data_dir=config['data_dir']
        for name in os.listdir(data_dir):
            yield cls(os.path.join(data_dir,name))
        
class GenericWorker(object):
    def map(self):
        raise NotImplementedError
    def reduce(self,other):
        raise NotImplementedError
        
    @classmethod
    def create_workers(cls,input_class,config):
        workers=[]
        for input_data in input_class.generate_inputs(config):
            workers.append(cls(input_data))
        return workers
    
class LineCountWorker(GenericWorker):
    #...
    
def mapreduce(worker_class,input_class,config):
    workers=worker_class.create_workers(input_class,config)
    return execute(workers)

with TemporaryDirectory() as tmpdir:
    write_test_files(tmpdir):
    config={'data_dir':tmpdir}
    result=mapreduce(LineCountWorker,PathInputData,config)
    

In [None]:
# 25 用super初始化父类
class MyBaseClass(object):
    def __init__(self,value):
        self.value=value
class MyChildClass(MyBaseClass):
    def __init__(self):
        MyBaseClass.__init__(self,5)
        
        
# 如果子类受到了多重继承的影响（通常应该避免这种）      

class TimesTwo(object):
    def __init__(self):
        self.value*=2
        
class PlusFive(object):
    def __init__(self):
        self.value+=5
        
class OneWay(MyBaseClass,TimesTwo,PlusFive):
    def __init__(self,value):
        MyBaseClass.__init__(self,value)
        TimesTwo.__init__(self)
        PlusFive.__init__(self)
        
        
# python3 则没有这些问题，因为它提供了一种不带参数的super调用方式。
class Explicit(MyBaseClass)：
    def __init__(self,value):
        super(__class__,self).__init__(value*2)
        
class Implicit(MyBaseClass):
    def __init__(self,value):
        super().__init__(value*2)
        
assert Explicit(10).value== Implicit(10).value

#由于python3程序可以在方法中通过__class__变量准确地引用当前类。

In [None]:
# 26 只有使用mix-in组件制作工具类时进行多重继承

# 了解

In [1]:
# 28 继承collections.abc以实现自定义的容器类型
class FrequencyList(list):
    def __init__(self,members):
        super().__init__(members)
    def frequency(self):
        counts={}
        for item in self:
            counts.setdefault(item,0)
            counts[item]+=1
        return counts
    
foo=FrequencyList(['a','b','a','c','b','a','d'])
print('Length is',len(foo))
foo.pop()
print('After pop:',repr(foo))
print('Frequency:',foo.frequency())


Length is 7
After pop: ['a', 'b', 'a', 'c', 'b', 'a']
Frequency: {'a': 3, 'b': 2, 'c': 1}


In [None]:
# 第29条：用纯属性取代get 和set方法
class VoltageResistance(Resistor):
    def __init__(self,ohms):
        super().__init__(ohms)
        self._voltage=0
        
    @property
    def voltage(self):
        return self._voltage
    
    @voltage.setter
    def voltage(self,voltage):
        self._voltage=voltage
        self.current=self._voltage/self.ohms
        
# @property的最大缺点在于：和属性相关的方法，只能在子类里面共享，而与之无关的其他类，则无法
#而与之无关的其他类，则无法服用同一份实现代码。


class LazyDB(object):
    def __init__(self):
        self.exists=5
    def __getattr__(self,name):
        value='Value for %s'%name
        setattr(self,name,value)
        return value
    
    
data=LazyDB()
print('Before:',data.__dict__)
print('foo:',data.foo)
print('After:',data.__dict__)
        
# 31 用描述符来改写需要复用的@property方法

class Homework(object):
    def __init__(self):
        self._grade=0
        
    @propery
    def grade(self):
        return self._grade
    
    @grade.setter
    def grade(self,value):
        if not (0<=value<=100):
            raise ValueError('Grade must be between 0 and 100')
        self._grade=value
        
galileo=Homework()
galileo.grade=95


class Grade(object)：
    def __get__(*args,**kwargs):
        #...
        
    def __set__(*args,**kwargs)：
        #...
        
class Exam(object):
    #Class attributes
    math_grade=Grade()
    writing_grade=Grade()
    science_grade=Grade()
    
    
exam=Exam()
exam.writing_grade=40

        

In [None]:
class Grade(object):
    def __init__(self):
        self._value=0
        
    def __get__(self,instance,instance_type):
        return self._value
    def __set__(self,instance,value):
        if not (0 <=value <=100):
            raise ValueError('Grade must be between 0 and 100')
        self._value=value
        
        
first_exam=Exam()
first_exam.writing_grade=82
first_exam.science_grade=99
print('Writing',first_exam.writing_grade)
print('Science',first_exam.science_grade)

In [3]:
# 36 用subprocess模块管理子进程
import subprocess

proc=subprocess.Popen(['echo','Hello from the child!'],stdout=subprocess.PIPE)
out,err=proc.communicate()
print(type(out))
print(type(out.decode('utf-8')))

<class 'bytes'>
<class 'str'>


In [4]:
proc=subprocess.Popen(['sleep','0.3'])
while proc.poll() is None:
    print('Working...')
    # some time-consuming work here
    
print('Exit statue',proc.poll())

Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...

Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...
Working...

In [11]:
from time import time
def run_sleep(period):
    proc=subprocess.Popen(['sleep',str(period)])
    return proc

start=time()
procs=[]
for _ in range(10):
    proc=run_sleep(0.1)
    procs.append(proc)
    
    
for proc in procs:
    proc.communicate()

end=time()
print('Finished in %.3f seconds'%(end-start))

Finished in 0.146 seconds


In [13]:
proc=run_sleep(10)
try:
    proc.communicate(timeout=0.1)
except subprocess.TimeoutExpired:
    proc.terminate()
    proc.wait()

In [None]:
# 多线程是的程序看上去好像能够在同一时间做许多事情。
# 多线程的第二理由，是处理阻塞式的I/o操作，

In [15]:
import select

def slow_systemcall():
    select.select([],[],[],0.1)
    
start=time()
for _ in range(5):
    slow_systemcall()
end=time()
print('Took %.3f seconds'%(end-start))

Took 0.514 seconds


In [16]:
from threading import Thread
start=time()
threads=[]
for _ in range(5):
    thread=Thread(target=slow_systemcall)
    thread.start()
    threads.append(thread)
    
for thread in threads:
    thread.join()
    
end=time()
print('Took %.3f seconds'%(end-start))

Took 0.103 seconds


In [22]:
# 38 在线程中使用lock来防止数据竞争
from threading import Lock

class Counter(object):
    def __init__(self):
        self.count=0
        
    def increment(self,offset):
        self.count+=offset

class LockingCounter(object):
    def __init__(self):
        self.lock=Lock()
        self.count=0
    def increment(self,offset):
        with self.lock:
            self.count+=offset


def worker(sensor_index,how_many,counter):
    for _ in range(how_many):
        counter.increment(1)

def run_threads(func,how_many,counter):
    threads=[]
    for i in range(5):
        args=(i,how_many,counter)
        thread=Thread(target=func,args=args)
        threads.append(thread)
        thread.start()
    for thread in threads:
        thread.join()
        
how_many=10**5
counter=LockingCounter()

print(counter.__dict__)
run_threads(worker,how_many,counter)
print('Counter should be %d ,found %d'% (5*how_many,counter.count))

{'lock': <unlocked _thread.lock object at 0x10fff85f8>, 'count': 0}
Counter should be 500000 ,found 500000


In [None]:
#39 用Queue 来协调各线程之间的工作
# 生产者和消费者

Class MyQueue(object):
    def __init__(self):
        self.items=deque()
        self.lock=Lock()
        
    def put(self,item):
        with self.lock:
            self.items.append(item)
            
    def get(self):
        with self.lock:
            return self.items.popleft()

In [23]:
from queue import Queue
queue=Queue()

def consumer():
    print('Consumer waiting')
    queue.get()
    print('Consumer done')
    
thread=Thread(target=consumer)
thread.start()

print('Producer putting')
queue.put(object())
thread.join()
print('Producer done')

Consumer waiting
Producer putting
Consumer done
Producer done


In [None]:
# 40 考虑用协程来并发地运行多个函数
def my_coroutine():
    while True:
        received=yield
        print('Received',received)
        
it=my_coroutine()
next(it)
it.send('First')
it.send('Second')


In [35]:
# 41 考虑用concurrent.futures来实现真正的平行计算
# 我们可以试着通过内置的concurrent.futures模块，来利用另外一个名叫multiprocessing的内置模块

from concurrent.futures import ThreadPoolExecutor
from random import random

# def gcd(pair):
#     a,b=pair
#     low=min(a,b)
#     for i in range(low,0,-1)
#         if a%i==0 and b%i==0:
#             return i

       
#for (randint(100000000,9999999),randint(100000000,9999999)) in range(4):
    
    

#print(numbers)

85262051.19190511


In [None]:
start=time()
pool=ThreadPoolExecutor(max_workers=2)
results=list(pool.map(gcd,numbers))
end=time()
print('Took %.3f seconds'%(end-start))

In [None]:
start=time()
pool=ProcessPoolExecutor(max_workers=2)
results=list(pool.map(gcd,numbers))
end=time()
print('Took%.3f second'%(end-start))
# 对于某些较为孤立，且数据利用率较高的任务来说，这套方案非常合适。

In [3]:
# 42 用functools.wraps定义函数修饰器

def trace(func):
    def wrapper(*args,**kwargs):
        result=func(*args,**kwargs)
        print('%s(%r,%r)->%r'%(func.__name__,args,kwargs,result))
        return result
    return wrapper


@trace
def fibonacci(n):
    """Return the n-th Fibonacci number"""
    if n in (0,1):
        return n
    return (fibonacc(n-2)+fibonacci(n-1)
 
            

fibonacci=trace(fibonacci)
            

SyntaxError: invalid syntax (<ipython-input-3-653fec796dc1>, line 20)

In [None]:
def trace(func):
    @wraps(func)
    def wrapper(*args,**kwargs):
        #...
    return wrapper

@trace
def ifbonacci(n):
    #...
    

In [None]:
# 43 考虑以contextlib和with语句来改写可服用的try/finnally代码
lock=Lock()
with lock:
    print('lock is help')
    
    
lock.acquire()
try:
    print('Lock is held')
finally:
    lock.release()
    
# 使用带有目标的with语句
# 传给with语句的那个情景管理器，本身就可以

# 可以用with语句来改写try/finally块中的逻辑，以便提升复用程度，并使代码更加整洁
# 内置的contextlib模块提供了名叫contextmanager的装饰器
# 情境管理器可以通过yield语句向with语句返回一个值

In [None]:
# 44 用copyreg实现可靠的pickle操作
import pickle


with open('test.db','wb') as f:
        pickle.dump(stat,f)
        
with open('test.db','rb') as f
        pickle.load(f)
    
    
    
        

In [None]:
# 45 应该使用datetime模块来处理本地时间，而不是用time模块

from datetime import datetime,timezone

now=datetime(2014,8,10,18,18,30)


In [4]:
# 46 使用内置算法和数据结构

# cllections 模块中的deque类，是一中双向队列(double-ended queue,双端队列)
# 从该队列的头部或尾部插入或移除一个元素。

from collections import deque

fifo=deque()
fifo.append(1)
x=fifo.popleft()

In [5]:
from collections import OrderedDict
a=OrderedDict()
a['foo']=1
a['bar']=2
b=OrderedDict()
b['foo']='red'
b['bar']='blue'

for value1,value2 in zip(a.values(),b.values()):
    print(value1,value2)

1 red
2 blue


In [None]:
from collections import defaultdict

# 这使得该字典能够以0为默认值

stats=defaultdict(int)
stats['my_counter']+=1


In [None]:
# 二分查找
# 在list上面使用index方法来搜索某个元素，所耗的时间会与列表的长度呈线性比例

# bisect 模块中的bisect_left等函数，提供了高效的而二分折版搜索算法，能够在一系列
# 排列好顺序的元素之中搜寻某个值，

from bisect import bisect_left

i=bisect_left(x,991234)


In [None]:
#itertools 函数分为三大类
# itertools函数分为三大类
# - 

In [8]:
# 47 在重视精度的场合，应该使用decimal

from decimal import Decimal 

rate=1.45
seconds=3*60+42
cost=rate*seconds/60
print(cost)

round(cost)

rate=Decimal('1.45')
seconds=Decimal('222')
cost=rate*seconds/Decimal('60')
print(cost)

5.364999999999999
5.365
