# collections

Python 的数据类型有 list, tuple, dict, str 等，collections 模块提供了额外 5 个高性能的数据类型

* Counter: 计数器
* OrderedDict: 有序字典
* defaultdict: 带有默认值的字典
* namedtuple: 生成可以通过属性访问元素内容的 tuple 子类
* deque: 双端队列，能够在队列两端添加或删除元素


In [6]:
import collections
from collections import Counter
from collections import OrderedDict
from collections import defaultdict
from collections import namedtuple
from collections import deque
collections?

[1;31mType:[0m        module
[1;31mString form:[0m <module 'collections' from 'd:\\software\\Anaconda3\\envs\\ai\\lib\\collections\\__init__.py'>
[1;31mFile:[0m        d:\software\anaconda3\envs\ai\lib\collections\__init__.py
[1;31mDocstring:[0m  
This module implements specialized container datatypes providing
alternatives to Python's general purpose built-in containers, dict,
list, set, and tuple.

* namedtuple   factory function for creating tuple subclasses with named fields
* deque        list-like container with fast appends and pops on either end
* ChainMap     dict-like class for creating a single view of multiple mappings
* Counter      dict subclass for counting hashable objects
* OrderedDict  dict subclass that remembers the order entries were added
* defaultdict  dict subclass that calls a factory function to supply missing values
* UserDict     wrapper around dictionary objects for easier dict subclassing
* UserList     wrapper around list objects for easier list s

In [11]:
Point = namedtuple('Point',['x','y'])
p = Point(1,2.3)
[p.x, p.y]

[1, 2.3]

In [21]:
a = [1,2,3]
print(a)
print(a.pop())
print(a)
a.append('hh')
print(a)

[1, 2, 3]
3
[1, 2]
[1, 2, 'hh']


In [29]:
b = deque([1,2,3,4,5])
print(b)
b.append('x')
b.appendleft('y')
b

deque([1, 2, 3, 4, 5])


deque(['y', 1, 2, 3, 4, 5, 'x'])

In [31]:
from collections import defaultdict
d = defaultdict(lambda:'N/A')
d['k1'] = 23
d['k2']

'N/A'

In [35]:
from collections import OrderedDict
d = OrderedDict()
d['k1'] = 23
d['k3'] = 32
d['k2'] = 22
d

OrderedDict([('k1', 23), ('k3', 32), ('k2', 22)])

In [106]:
# 先进先出
from collections import OrderedDict
class LastUpdatedOrderedDict(OrderedDict):
    def __init__(self, capacity):
#       ???
        super(LastUpdatedOrderedDict, self).__init__()
        self._capacity = capacity
#     继承自 OrderedDict, 可以自如使用 len, popitem, pop
    def __setitem__(self, key, value):
        containKey = 1 if key in self else 0
        if len(self) - containKey >= self._capacity:
            last = self.popitem(last=False)
            print('remove:',last)
        if containKey:
            self.pop(key)
            print('set:',(key,value))
        else:
            print('add:',(key,value))
#       ???
        OrderedDict.__setitem__(self,key,value)

In [107]:
c = LastUpdatedOrderedDict(3)
print(c)
c['key1'] = 12
c['key2'] = 21
c['key3'] = 34
print(c)
c['key4'] = 55
c.__setitem__('key5',1)
print(c)

LastUpdatedOrderedDict()
add: ('key1', 12)
add: ('key2', 21)
add: ('key3', 34)
LastUpdatedOrderedDict([('key1', 12), ('key2', 21), ('key3', 34)])
remove: ('key1', 12)
add: ('key4', 55)
remove: ('key2', 21)
add: ('key5', 1)
LastUpdatedOrderedDict([('key3', 34), ('key4', 55), ('key5', 1)])


In [108]:
from collections import Counter
c = Counter()
for i in 'hello world!':
    c[i] += 1
c

Counter({'h': 1,
         'e': 1,
         'l': 3,
         'o': 2,
         ' ': 1,
         'w': 1,
         'r': 1,
         'd': 1,
         '!': 1})

# itertools, operator

In [77]:
import itertools
import operator

In [93]:
operator.getitem?

[1;31mDocstring:[0m getitem(a, b) -- Same as a[b].
[1;31mType:[0m      builtin_function_or_method


In [94]:
operator.attrgetter?

[1;31mInit signature:[0m [0moperator[0m[1;33m.[0m[0mattrgetter[0m[1;33m([0m[0mself[0m[1;33m,[0m [1;33m/[0m[1;33m,[0m [1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
attrgetter(attr, ...) --> attrgetter object

Return a callable object that fetches the given attribute(s) from its operand.
After f = attrgetter('name'), the call f(r) returns r.name.
After g = attrgetter('name', 'date'), the call g(r) returns (r.name, r.date).
After h = attrgetter('name.first', 'name.last'), the call h(r) returns
(r.name.first, r.name.last).
[1;31mFile:[0m           d:\software\anaconda3\envs\ai\lib\operator.py
[1;31mType:[0m           type


# base64

In [120]:
# Base64 是一种用 64 个字符来表示任意二进制数据的方法。
# Base64 是一种任意二进制到文本字符串的编码方法，常用于在 URL、Cookie、网页中传输少量二进制数据.
import base64
cache = base64.b64encode(b'binary\x00s\xb7\x1d\xfb\xef\xff')
print(cache)
base64.b64decode(cache)
# Base64 是一种通过查表的编码方法，不能用于加密，即使使用自定义的编码表也不行。
# 适用于小段内容的编码，比如数字证书签名、Cookie 的内容等。

b'YmluYXJ5AHO3Hfvv/w=='


b'binary\x00s\xb7\x1d\xfb\xef\xff'

In [130]:
n = 1024009
b1 = chr((n & 0xff000000) >> 24)
import struct
struct.pack('>I',n)
# >表示字节顺序是 big­endian，也就是网络序，I 表示 4 字节无符号整数。


b'\x00\x0f\xa0\t'

# hashlib, hmac

HMAC 是用于消息认证的加密哈希算法，全称是 keyed-Hash Message Authentication Code。HMAC 利用哈希算法，以一个密钥和一个消息作为输入，生成一个加密串作为输出。HMAC 可以有效防止类似 MD5 的彩虹表等攻击，比如将常见密码的 MD5 值存入数据库，可能被反向破解。

In [8]:
import hmac
hmac?

[1;31mType:[0m        module
[1;31mString form:[0m <module 'hmac' from 'd:\\software\\Anaconda3\\envs\\ai\\lib\\hmac.py'>
[1;31mFile:[0m        d:\software\anaconda3\envs\ai\lib\hmac.py
[1;31mDocstring:[0m  
HMAC (Keyed-Hashing for Message Authentication) Python module.

Implements the HMAC algorithm as described by RFC 2104.


In [39]:
import hashlib
md5 = hashlib.md5()
md5.update(b'how to use md5 in python hashlib?')
md5.hexdigest()
# MD5速度很快，生成128bit，用一个32位16进制的数字表示

'd26a53750bc40b38b65a520292f69306'

In [44]:
sha1 = hashlib.sha1()
sha1.update('how to use'.encode('utf-8'))
sha1.update('hash in python'.encode('utf-8'))
sha1.hexdigest()
# 160bit, 40位16进制表示

'a3753dbb0887493b5c194a81f897e3f03c817c05'

In [56]:
sha1 = hashlib.sha1()
sha1.update('how to usehash in python'.encode('utf-8'))
sha1.hexdigest()

'a3753dbb0887493b5c194a81f897e3f03c817c05'

In [60]:
'sdsd'.encode('utf8')

b'sdsd'

In [62]:
'sssssaefwefa是'.encode('utf8')

b'sssssaefwefa\xe6\x98\xaf'

# time, datetime, calender

`datetime`是对`time`的封装

In [83]:
import time, calender
import datetime
datetime?

ModuleNotFoundError: No module named 'calender'

In [20]:
datetime.datetime.now()

datetime.datetime(2018, 11, 30, 7, 0, 29, 743161)

In [23]:
datetime.datetime.utcnow()

datetime.datetime(2018, 11, 29, 23, 1, 4, 569364)

In [38]:
a = datetime.datetime.now()
a.weekday(), a.year, a.month

(4, 2018, 11)

# argparse, click

**Argparse** 是一个用于命令项选项与参数解析的内置模块。
**Click** 是 **Flask** 的开发团队 **Pallets** 的另一款开源项目，它是用于快速创建命令行的第三方模块。我们知道，Python 内置了一个 **Argparse** 的标准库用于创建命令行，但使用起来有些繁琐。

**Click 相比于 Argparse，就好比 requests 相比于 urllib**。


In [5]:
import argparse, click
click?

[1;31mType:[0m        module
[1;31mString form:[0m <module 'click' from 'd:\\software\\Anaconda3\\envs\\ai\\lib\\site-packages\\click\\__init__.py'>
[1;31mFile:[0m        d:\software\anaconda3\envs\ai\lib\site-packages\click\__init__.py
[1;31mDocstring:[0m  
click
~~~~~

Click is a simple Python module inspired by the stdlib optparse to make
writing command line scripts fun. Unlike other modules, it's based
around a simple API that does not come with too much magic and is
composable.

:copyright: © 2014 by the Pallets team.
:license: BSD, see LICENSE.rst for more details.


# urllib, requests

In [7]:
import urllib
import requests
urllib?
requests?

[1;31mType:[0m        module
[1;31mString form:[0m <module 'requests' from 'd:\\software\\Anaconda3\\envs\\ai\\lib\\site-packages\\requests\\__init__.py'>
[1;31mFile:[0m        d:\software\anaconda3\envs\ai\lib\site-packages\requests\__init__.py
[1;31mDocstring:[0m  
Requests HTTP Library
~~~~~~~~~~~~~~~~~~~~~

Requests is an HTTP library, written in Python, for human beings. Basic GET
usage:

   >>> import requests
   >>> r = requests.get('https://www.python.org')
   >>> r.status_code
   200
   >>> 'Python is a programming language' in r.content
   True

... or POST:

   >>> payload = dict(key1='value1', key2='value2')
   >>> r = requests.post('https://httpbin.org/post', data=payload)
   >>> print(r.text)
   {
     ...
     "form": {
       "key2": "value2",
       "key1": "value1"
     },
     ...
   }

The other HTTP methods are supported - see `requests.api`. Full documentation
is at <http://python-requests.org>.

:copyright: (c) 2017 by Kenneth Reitz.
:license: Apache 2.0,

In [10]:
with urllib.request.urlopen('http://python.org/') as response:
   html = response.read()

In [18]:
print('sds\nsfd')

sds
sfd
