# 元组

元组的拆包一个常用场景就是遍历元组或列表组成的序列

In [2]:
seq = [(1,2,3),(4,5,6),(7,8,9)]
for a,b,c in seq:
    print('a = {0},b = {1},c = {2}'.format(a,b,c))

a = 1,b = 2,c = 3
a = 4,b = 5,c = 6
a = 7,b = 8,c = 9


\* rest用于在函数调用时获取任意长度的位置参数列表

In [3]:
values = 1,2,3,4,5
a,b,*rest = values

In [4]:
a,b

(1, 2)

In [5]:
rest

[3, 4, 5]

为了方便，也可用_代替

In [6]:
a,b, *_ = values

In [7]:
a,b

(1, 2)

# 列表

pop：特定位置元素移除并返回

In [8]:
b_list = ['a','python','interesting','!']
b_list.pop(2)

'interesting'

In [9]:
b_list

['a', 'python', '!']

remove:删除特定元素

In [11]:
b_list.remove('!')
b_list

['a', 'python']

通过extend构建大型列表比+快

列表内部排序：


In [1]:
a = [7,2,5,1,3]
a.sort()

In [2]:
a

[1, 2, 3, 5, 7]

sort的一些选项：

In [4]:
b = ['saw','small','python','try']
b.sort(key = len)
b

['saw', 'try', 'small', 'python']

**二分搜索和已排序列表的维护**

内建的bisect模块实现了二分搜索和已排序列表的插值。bisect.bisect会找到元素应当被插入的位置，并保持序列排序，而bisect.insort将元素插入到相应位置。

In [5]:
import bisect
c=[1,2,2,2,3,4,7]

In [6]:
bisect.bisect(c,2)

4

In [7]:
bisect.bisect(c,5)

6

In [8]:
bisect.insort(c,6)

In [9]:
c

[1, 2, 2, 2, 3, 4, 6, 7]

bisect并不会检查列表是否已经排序

**内建序列函数**

enumerate：在遍历一个序列的同时追踪当前元素的索引返回（index，value）

In [11]:
some_list = ['foo','bar','baz']
mapping = {}
for i, v in enumerate(some_list):
    mapping[v] = i
mapping

{'foo': 0, 'bar': 1, 'baz': 2}

sorted：返回一个根据任意序列中的元素新建的已排序列表

In [12]:
sorted([7,1,2,6,0,3,2])

[0, 1, 2, 2, 3, 6, 7]

In [13]:
sorted('python learning')

[' ', 'a', 'e', 'g', 'h', 'i', 'l', 'n', 'n', 'n', 'o', 'p', 'r', 't', 'y']

zip:将列表、元组或其他序列的元素配对，新建一个元组构成的列表

In [14]:
seq1 = ['i','try','to']
seq2 = ['learing','data','mining']
zipped = zip(seq1,seq2)

In [15]:
zipped

<zip at 0x260a51938c8>

In [16]:
list(zipped)

[('i', 'learing'), ('try', 'data'), ('to', 'mining')]

zip可以处理任意长度的序列，生成列表由最短的序列决定

In [17]:
seq3 = [False,True]
list(zip(seq1,seq2,seq3))

[('i', 'learing', False), ('try', 'data', True)]

zip的常用场景为同时遍历多个序列，有时候会和enumerate同时使用

In [18]:
for i,(a,b) in enumerate(zip(seq1,seq2)):
    print('{0}:{1},{2}'.format(i,a,b))

0:i,learing
1:try,data
2:to,mining


已配对序列拆分：

In [19]:
pitchers = [('i','try'),('to','learn'),('data','analysis')]
first,second = zip(*pitchers)

In [20]:
first

('i', 'to', 'data')

In [21]:
second

('try', 'learn', 'analysis')

reversed:倒序排列

In [22]:
list(reversed(range(10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

# 字典

用del或pop方法删除值

In [23]:
d={'a':'some value','b':[1,2,3,4],7:'an interger',5:'sv','dummy':'another val'}

In [25]:
del d[5]

In [26]:
d

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an interger',
 'dummy': 'another val'}

In [27]:
ret = d.pop('dummy')

In [28]:
ret

'another val'

In [29]:
d

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an interger'}

key,value：键，值

In [30]:
list(d.keys())

['a', 'b', 7]

In [31]:
list(d.values())

['some value', [1, 2, 3, 4], 'an interger']

update:两字典合并

In [32]:
d.update({1:'i',2:'beautiful'})

In [33]:
d

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an interger',
 1: 'i',
 2: 'beautiful'}

字典可以接受一个2-元组的列表作为参数

In [34]:
mapping = dict(zip(range(5),reversed(range(5))))

In [35]:
mapping

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

字典默认值dict.get(key,default_value)

In [37]:
by_letter = {}
words = ['apple','bat','bar','atom','book']
for word in words:
    letter = word[0]
    by_letter.setdefault(letter,[]).append(word)

In [38]:
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

同样功能：defaultdict

In [39]:
from collections import defaultdict

In [40]:
by_letter = defaultdict(list)
for word in words:
    by_letter[word[0]].append(word)

In [41]:
by_letter

defaultdict(list, {'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']})

字典的键必须是不可变对象,可哈希化

In [42]:
hash('string')

-8014342494828246780

# 集合

集合运算

![image.png](attachment:image.png)

In [43]:
a = {1,2,3,4,5}
b = {3,4,5,6,7,8}

In [44]:
a.union(b)

{1, 2, 3, 4, 5, 6, 7, 8}

In [45]:
a|b

{1, 2, 3, 4, 5, 6, 7, 8}

In [46]:
a.intersection(b)

{3, 4, 5}

In [47]:
a&b

{3, 4, 5}

集合的元素也必须是不可变的，如果想包含列表型的元素，必须先转换为元组

In [48]:
my_data = [1,2,3,4]
my_set = {tuple(my_data)}

In [49]:
my_set

{(1, 2, 3, 4)}

In [50]:
a_set = {1,2,3,4,5}
{1,2,3}.issubset(a_set)

True

## 列表、集合、字典的推导式

[expr for val in collection if condition]

{key-expr : value-expr for value in collection if condition}

In [51]:
strings = ['b','balbal','ganbadie','ilovexeer']

In [52]:
[x.upper() for x in strings]

['B', 'BALBAL', 'GANBADIE', 'ILOVEXEER']

{expr for val in collection if condition}

In [53]:
{len(x) for x in strings}

{1, 6, 8, 9}

In [54]:
set(map(len, strings))

{1, 6, 8, 9}

In [55]:
loc_mapping = {val:index for index, val in enumerate(strings)}

In [56]:
loc_mapping

{'b': 0, 'balbal': 1, 'ganbadie': 2, 'ilovexeer': 3}

嵌套列表推导式

In [58]:
all_data = [['ho','yyg','fr','kjw','xxy'],['cr','xeer','lk','jpf','ytc']]

以下两种相等

In [59]:
names_of_interest = []
for names in all_data:
    enough_es = [name for name in names if name.count('e') >=2]
    names_of_interest.extend(enough_es)

In [60]:
names_of_interest

['xeer']

In [62]:
result = [name for names in all_data for name in names if name.count('e')>=2]

In [63]:
result

['xeer']

for表达式的顺序与写嵌套for循环来替代列表推导式的顺序一致

In [64]:
some_tuples = [(1,2,3),(4,5,6),(7,8,9)]
[[x for x in tup] for tup in some_tuples]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

# 函数

函数是对象

In [65]:
states = ['  Alabama','Georgia!','Georgia','georgia','FlOrIda','south    carolina##','west balaa?']

In [68]:
import re
def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]','',value)
        value = value.title()
        result.append(value)
    return result

In [69]:
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South    Carolina',
 'West Balaa']

In [70]:
def remove_punctuation(value):
    return re.sub('[!#?]','',value)

clean_ops = [str.strip, remove_punctuation,str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

In [71]:
clean_strings(states,clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South    Carolina',
 'West Balaa']

In [72]:
for x in map(remove_punctuation, states):
    print(x)

  Alabama
Georgia
Georgia
georgia
FlOrIda
south    carolina
west balaa


## 匿名函数