> 鏈接在此: [link](https://python-cookbook-3rd-edition.readthedocs.io/zh_CN/latest/c01/p01_unpack_sequence_into_separate_variables.html)

#### unpack

In [172]:
# most common
x,y = 1,2 
x,y 

# split string 
x,y,z = 'XYZ'
x,y,z

# unpack -> unpack
info, (y,m), week = ['date',(2018,7),'Wed.']
info, (y,m), week

# skip
_, name, age, _ = ['shit','Alex',20,'shiiit']
name,age

# skip (batch)
_, name, *_, age = ['shit','Alice','what','the',19]
name,age

(1, 2)

('X', 'Y', 'Z')

('date', (2018, 7), 'Wed.')

('Alex', 20)

('Alice', 19)

In [173]:
# [1,2,3]
first, *middle, last = ['Start',1,2,3,'End']

# [100,120,80]
name, gender, *scores = ['Alex','male',100,120,80]

# 100.0
def avg_the_scores(info=['alex',100,110,90]):
    name, *scores = info
    return sum(scores) / len(scores)

In [174]:
line = 'root:*:0:0:System Administrator:/var/root:/bin/sh'

# split -> filter -> done!
uname, *other, home, shell = line.split(':')
uname, home, shell

('root', '/var/root', '/bin/sh')

#### deque - fast ins/del

In [175]:
from collections import deque

In [176]:
''' fixed size '''

q = deque(maxlen=3)

q.append(1)     
q.extend([2,3]) 

# normal 
q

# the first N items'll be removed 
q.append(4)
q

# all gone, [2,3]
q.extend([5,6])
q

' fixed size '

deque([1, 2, 3])

deque([2, 3, 4])

deque([4, 5, 6])

In [177]:
''' deque VS list '''

# append/delete in both sides:
#   list  ->  O(N)
#   deque ->  O(1)

dq = deque()

dq.extend([1,2,3])

dq.appendleft(0) # left   ( extend is also available )
dq.append(4)     # right   

dq

dq.popleft()     # left 
dq.pop()         # right 

' deque VS list '

deque([0, 1, 2, 3, 4])

0

4

#### heapq - largest/smallest

In [178]:
import heapq 

In [179]:
nums = [1,8,12,39,12,0,-12,21]

# basic 
heapq.nlargest(3,nums)
heapq.nsmallest(3,nums)

# btw, if u want the ONLY largest/smallest 
# use max()/min() instead !

# specifying key 
high_dimen = [
    {'name': 'YHOO', 'shares': 45, 'price': 16.35},
    {'name': 'FB', 'shares': 200, 'price': 21.09},
    {'name': 'HPQ', 'shares': 35, 'price': 31.75},
    {'name': 'IBM', 'shares': 100, 'price': 91.1},
    {'name': 'ACME', 'shares': 75, 'price': 115.65},
]

heapq.nlargest(
    2,                         
    high_dimen,                # the container 
    key=lambda x: x['price']   # list[elem-idx]: list[elem-idx]['key']
)

[39, 21, 12]

[-12, 0, 1]

[{'name': 'ACME', 'shares': 75, 'price': 115.65},
 {'name': 'IBM', 'shares': 100, 'price': 91.1}]

In [180]:
nums = [ 8, 2, 23, 7, -4, 18, 23, 42, 37]

# transform into a 'heap' (堆)
heapq.heapify(nums)
nums

# heap[0] is always the smallest item ('堆'的特性)
# 其時間複雜度僅為 O(log N) 
heapq.heappop(nums)
nums 

for i in range(3):
    
    # get the smallest N element 
    a = heapq.heappop(nums)
    
    print('smallest:',a,'... for now.')

[-4, 2, 18, 7, 8, 23, 23, 42, 37]

-4

[2, 7, 18, 37, 8, 23, 23, 42]

smallest: 2 ... for now.
smallest: 7 ... for now.
smallest: 8 ... for now.


In [181]:
# USE IT RIGHT 

nums = [ 8, 2, 23, 7, -4, 18, 23, 42, 37]

if 'you-want-only-one':
    max(nums), min(nums)

if 'the-items-you-want-is-nearly-the-total':
    sorted(nums)[:3], sorted(nums)[-3:]
    
if 'your-need-is-not-one-of-them':
    'think again', 'and use heapq!'

(42, -4)

([-4, 2, 7], [23, 37, 42])

('think again', 'and use heapq!')

#### heapq - priority

In [182]:
import heapq 

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
        
    def push(self, item, priority):
        heapq.heappush(                     # add element
            self._queue,                    # specifying the object (list/heaq)
            (-priority, self._index, item)  # [], (-5 , 0, Item('commit'))
        )
        self._index += 1 
        
    def pop(self):
        return heapq.heappop(self._queue)[-1]
    
class Item:
    def __init__(self,name):
        self.name = name 
    def __repr__(self):
        return 'Item({!r})'.format(self.name)

In [183]:
# push/pop

q = PriorityQueue()

q.push(Item('fork'),1)
q.push(Item('star'),1)

q.push(Item('commit'),4)
q.push(Item('pull-request'),5)

# always pop the highest priority element 
q.pop()
q.pop()

# if duplicates, pop by creating order (early first)
q.pop()

Item('pull-request')

Item('commit')

Item('fork')

In [184]:
# cmp

try:
    
    # missing info
    #   so it can't cmp with each other
    a = Item('foo')
    b = Item('bar')
    c = None             # placeholder
    
    # bad guy
    a < b 
    
except Exception as err:
    
    # same priority is fine 
    a = (1,Item('foo'))
    b = (5,Item('bar'))  
    c = None             # placeholder
    
    # if they both have the same pri,
    #   there's still an error will be raised
    pass

finally:
    
    # (pri, idx, item)
    a = (1,0,Item('foo'))
    b = (5,1,Item('fuc'))
    c = (1,2,Item('bar'))
    
a < b
a < c 

True

True

#### defaultdict - mapping multiple values

In [185]:
# quick tips:
#   list  --  ordering reserved 
#   set   --  no duplicates & not ordered 

In [186]:
from collections import defaultdict 

tips = ''' 
    Initializing as XXX, u're able use the methods of XXX.

    That is:
        list  -- append, extend etc. (IT IS list!!)
        set   -- add, update    etc.
        
    The tips are applied to all the methods down below.
'''


def dic_mulval_normal():
    
    d1 = {
        'a': [1,2,3],
        'b': [4,5]
    }

    d1['a'].append(4)
    d1['a'].extend([5,6])


def dic_mulval_setdefault():

    d2 = {}
    
    d2.setdefault('a',[]).append(1)
    d2.setdefault('a',[]).extend([2,3])
    

def dic_mulval_collc_defaultdict():
    
    d = defaultdict(list)

    # init them automatically (value: [])
    d['a'], d['what']

    d['a'].append(1)
    d['a'].extend([2,3])

In [187]:
# add existing vals to dict 

existing_vals  = { 'a' : [1, 2], 'b' : [3, 4] }.items()  
dict_to_be_add = defaultdict(list)

for key,value in existing_vals:
    dict_to_be_add[key].extend(value)
    
dict_to_be_add

defaultdict(list, {'a': [1, 2], 'b': [3, 4]})

#### OrderedDict - dict ordering

In [188]:
from collections import OrderedDict
import json 

d = OrderedDict()

d['foo'] = "It is"
d['bar'] = "a dict"
d['oop'] = "after all"

# Loop it as a normal dict (IT IS)
for k in d:
    print(k, ':', d[k])
    
# JSON will be nice :)
json.dumps(d)

# btw
tips = """
    The size of 'OrderedDict' is twice of the normal dict!
    In human words, 
        it'll consume much MORE memory than normal dict.
"""

foo : It is
bar : a dict
oop : after all


'{"foo": "It is", "bar": "a dict", "oop": "after all"}'

#### dict - k,v / max,min

In [189]:
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}

In [190]:
# 'evolution' (sort of XD)

# key only
min(prices)

# value only  (what, what the fuck XD)
min(prices.values())

# the corresponding key, and val (hmm)
min(
    prices, key=lambda k: prices[k]
)

prices[
    min(
        prices, key=lambda k: prices[k]
    )
]

'AAPL'

10.75

'FB'

10.75

In [191]:
# 更佳的一種求極值的方式 (zip)

min_price_zipped = min(
    zip(prices.values(), prices.keys())
)

min_price_sorted = sorted(
    zip(prices.values(), prices.keys())
)

# min(Them) is fine 
min_price_zipped

# or you could sort it and pick the first (min)
min_price_sorted[0]


# ----- BUT ----- 

same_val_wtf = {'AAA':200,'GGG':200}

min(
    zip(
        same_val_wtf.values(),  # cmp values first, but "not" in this case 
        same_val_wtf.keys(),    # same val? continue to cmp key, key does the sort!
    )
)

(10.75, 'FB')

(10.75, 'FB')

(200, 'AAA')

#### dict - find the similarities

In [192]:
a = {
    'w': 1,
    'x': 2,
    'z': 3
}

b = {
    'x': 10,
    'y': 20,
    'z': 3
}


# in common 
a.keys() & b.keys()

# a has
# b doesn't 
a.keys() - b.keys()

# k,v all the same 
a.items() & b.items()

{'x', 'z'}

{'w'}

{('z', 3)}

In [193]:
# remove specific keys - dict comprehension 

{ 
    # cleaner, and readability
    k:a[k] for k in a.keys() - {'z'} 
}

{'x': 2, 'w': 1}

#### del duplicates while keeping order

In [194]:
def dedupe(items):
    ''' dedupe: 重復資料刪除 '''
    
    seen = set()              # Hmm..
    
    for item in items:        # 遍歷
        
        if item not in seen:  # if 沒有 -> add 至空集合 (起始狀態)
            yield item        # 'return' result (generator)
            
            seen.add(item)    # add 至空集合 (若 if 結果為 False, 壓根不會執行到這兒)

            
a_list = [1,2,2,3,7,2,18] 
a_dict = [ {'x':1, 'y':2}, {'x':1, 'y':2}, {'x':3, 'y':4}]


list(dedupe(a_list))

try:
    list(dedupe(a_dict))
except TypeError as err:
    err

[1, 2, 3, 7, 18]

TypeError("unhashable type: 'dict'")

In [195]:
def dedupe_advance(items,key=None):
    
    seen = set()
    
    for item in items:
        
        val = item if key is None else key(item)
        
        if val not in seen:
            yield item 
            
            seen.add(val)

            
a_list = [1,2,2,3,7,2,18] 
a_dict = [ 
    {'x':1, 'y':2}, 
    {'x':1, 'y':2}, 
    {'x':1, 'y':3},
    {'x':3, 'y':4}
]


# just like old times            
list(dedupe_advance(a_list))

# for dict 
list(dedupe_advance(
    a_dict,
    key=lambda d: (d['x'])         # remove once the key is the same 
))

list(dedupe_advance(
    a_dict,
    key=lambda d: (d['x'],d['y'])  # remove if key,val both the same 
))


[1, 2, 3, 7, 18]

[{'x': 1, 'y': 2}, {'x': 3, 'y': 4}]

[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 3, 'y': 4}]

In [196]:
# the dedupe() could also
#   be used to remove the duplicating line (file)

with open('test.txt','r') as f:
    for line in dedupe(f):
        print(line,end='')  # 666 is actually '666\n' (at least the 1st line XD)

666
111
222


#### slice - naming slice

In [197]:
a = "######0123456789012345678901234567890123456789012345678901234567890"

# just like a[6,17]
# but created as an object 
pick_natural_num = slice(6,17,1)

# readability!
a[pick_natural_num]

# it's even got Attributes!
pick_natural_num.indices
pick_natural_num.start, pick_natural_num.stop, pick_natural_num.step

'01234567890'

<function slice.indices>

(6, 17, 1)

In [198]:
a = 'Hello'

obj = slice(0, 50, 1)

obj                       # slice
obj.indices(len(a))       # tuple 

slice(
    *obj.indices(len(a))  # conv 'indice' to 'slice'
)

# actual usage 
for i in range(*obj.indices(len('Hello'))):
    print(s[i],end='\t')

slice(0, 50, 1)

(0, 5, 1)

slice(0, 5, 1)

H	e	l	l	o	

#### Counter - most common elements

In [199]:
words = [
    'look','eyes','the', 'eyes', 'the', 'eyes',
    'eyes', "don't", 'under', 'not', 'around', 'the'
]

In [200]:
from collections import Counter 

word_cnt = Counter(words)

word_cnt
word_cnt.most_common(2)

def hey():
    word_cnt.items()
    word_cnt.keys()
    word_cnt.values()

# and these 
word_cnt['not'], word_cnt['eyes']  # 1,4 

# add counting results 
more_words = ['crap','not','eyes'] # 2,5
for w in more_words:
    word_cnt[w] += 1 
    
# or using update 
word_cnt.update(more_words)        # 3,6 -- add again! 
     
word_cnt['not'], word_cnt['eyes']

Counter({'look': 1,
         'eyes': 4,
         'the': 3,
         "don't": 1,
         'under': 1,
         'not': 1,
         'around': 1})

[('eyes', 4), ('the', 3)]

(1, 4)

(3, 6)

In [201]:
word      = words       # above 
word_more = more_words  # above 

a = Counter(word)
b = Counter(word_more)

# You're able to do arithmetic on it!!
a + b 
a - b
b - a
a & b

Counter({'look': 1,
         'eyes': 5,
         'the': 3,
         "don't": 1,
         'under': 1,
         'not': 2,
         'around': 1,
         'crap': 1})

Counter({'look': 1, 'eyes': 3, 'the': 3, "don't": 1, 'under': 1, 'around': 1})

Counter({'crap': 1})

Counter({'eyes': 1, 'not': 1})

#### itemgetter - sort dict by giving key

In [202]:
# Basis

func_pick_one  = itemgetter(0)
func_pick_more = itemgetter(0,-1)

func_pick_one(
    [1,2,3,4]
)

func_pick_more(
    [1,2,3,4]
)

1

(1, 4)

In [203]:
rows = [
    {'fname': 'Brian', 'uid': 1003},
    {'fname': 'David', 'uid': 1004},
    {'fname': 'John', 'uid': 1001},
    {'fname': 'Big', 'uid': 1002}
]

In [204]:
from operator import itemgetter

# this one is faster than 'lambda'
by_fname_ig = sorted(rows, key=itemgetter('uid'))
by_fname_ig

by_fname_lm = sorted(rows, key=lambda r: r['uid'])
by_fname_lm

# also, there's max/min
min(rows, key=itemgetter('uid'))
max(rows, key=itemgetter('uid'))

[{'fname': 'John', 'uid': 1001},
 {'fname': 'Big', 'uid': 1002},
 {'fname': 'Brian', 'uid': 1003},
 {'fname': 'David', 'uid': 1004}]

[{'fname': 'John', 'uid': 1001},
 {'fname': 'Big', 'uid': 1002},
 {'fname': 'Brian', 'uid': 1003},
 {'fname': 'David', 'uid': 1004}]

{'fname': 'John', 'uid': 1001}

{'fname': 'David', 'uid': 1004}

#### attrgetter - sort 'cannot be sorted'

In [243]:
from operator import attrgetter

# attrgetter basis 

dict_pick_one  = attrgetter('user_id')
dict_pick_more = attrgetter('user_id','name')

[dict_pick_one(i) for i in users ]  # each elem call its 'User(xx).user_id'
[ dict_pick_more(i) for i in users] # more than one, '.user_id' & '.name' 

[40, 12, 27]

[(40, 'xxx'), (12, 'xxx'), (27, 'xxx')]

In [244]:
class User:
    def __init__(self, user_id,name='xxx'):
        self.user_id = user_id 
        self.name = name
    
    def __repr__(self):
        return 'User({})'.format(self.user_id)
    
    
users = [User(40), User(12), User(27)]

In [245]:
sorted(
    users,
    key=attrgetter('user_id') # still, this one is faster than 'lambda'
)

sorted(
    users, 
    key=lambda u: u.user_id   # User - (self) - user_id
)


# and.. max/min are supported 

min(
    users,
    key=attrgetter('user_id')
)

max(
    users,
    key=attrgetter('user_id')
)

[User(12), User(27), User(40)]

[User(12), User(27), User(40)]

User(12)

User(40)