# 查找列表中频率最高的值

In [1]:
a = [1, 2, 3, 1, 2, 3, 2, 2, 4, 5, 1]
print(max(set(a), key = a.count))

2


In [2]:
from collections import Counter
cnt = Counter(a)
print(cnt.most_common(1))

[(2, 4)]


In [3]:
for (k, c) in cnt.items():
    print(k, c)

1 3
2 4
3 2
4 1
5 1


# 建立m*n维的list数组

In [4]:
m, n = 2, 3
[[0] * n for _ in range(m)]

[[0, 0, 0], [0, 0, 0]]

# n开m次根号取整

In [5]:
n, m = 81, 4
print(int(n**m**-1))

3


# 执行hql脚本

In [6]:
path = 'hdfs://data/test/'
date = '20180808'
sql = """
INSERT OVERWRITE DIRECTORY '{hdfs_path}'
row format delimited
fields terminated by '\\t'
SELECT
    id
FROM
    aweme_i18n_dump.ies_item_dict
WHERE
    date = '{date}'
"""

sql_para = {'hdfs_path' : path,
       'date' : date,
      }
sql = sql.format(**sql_para)
print(sql)


INSERT OVERWRITE DIRECTORY 'hdfs://data/test/'
row format delimited
fields terminated by '\t'
SELECT
    id
FROM
    aweme_i18n_dump.ies_item_dict
WHERE
    date = '20180808'



# list数据类型转换 

In [7]:
a = [1, 2, 3]
b = list(map(str, a))
c = list(map(float, b))
print('a: %s' % a)
print('b: %s' % b)
print('c: %s' % c)

a: [1, 2, 3]
b: ['1', '2', '3']
c: [1.0, 2.0, 3.0]


# 根据条件筛选数据

## 列表

In [8]:
from random import randint
a = [randint(-10, 10) for _ in range(10)]
print(a)
# filter 函数
print(list(filter(lambda x: x >= 0, a)))
# 列表解析
print([x for x in a if x >= 0])

[0, 5, -4, -5, -4, 0, -5, -6, 3, 4]
[0, 5, 0, 3, 4]
[0, 5, 0, 3, 4]


In [9]:
timeit [x for x in a if x >= 0]

694 ns ± 16.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [10]:
timeit list(filter(lambda x: x >= 0, a))

1.95 µs ± 65.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## 字典

In [11]:
from random import randint
d = {i: randint(60, 100) for i in range(20)}
print(d)
# 字典解析
print({k: v for k, v in d.items() if v > 90})

{0: 70, 1: 63, 2: 80, 3: 98, 4: 85, 5: 90, 6: 62, 7: 97, 8: 92, 9: 82, 10: 76, 11: 95, 12: 72, 13: 75, 14: 85, 15: 75, 16: 84, 17: 76, 18: 84, 19: 86}
{3: 98, 7: 97, 8: 92, 11: 95}


## 集合

In [12]:
from random import randint
s = {randint(-10, 10) for _ in range(10)}
print(s)
print({x for x in s if x >= 0})

{2, 5, 6, 8, 10, -7, -6, -4}
{2, 5, 6, 8, 10}


In [13]:
from collections import Counter
ct = Counter('abracadabra')

# 多个字典的公共键

In [14]:
from random import randint
from random import sample
d1 = {k:randint(1,4) for k in sample('abcdefgxyz',7)} #生成一个key 是在abcdefgxyz的三个,value是1到4的随机数
d2 = {k:randint(1,4) for k in sample('abcdefgxyz',7)} #生成一个key 是在abcdefgxyz的三个,value是1到4的随机数
d3 = {k:randint(1,4) for k in sample('abcdefgxyz',7)} #生成一个key 是在abcdefgxyz的三个到六个
print('d1: %s' % d1)
print('d2: %s' % d2)
print('d3: %s' % d3)

d1: {'b': 3, 'x': 2, 'd': 3, 'a': 2, 'z': 3, 'c': 4, 'f': 4}
d2: {'f': 2, 'b': 1, 'd': 4, 'x': 2, 'z': 3, 'g': 4, 'c': 1}
d3: {'g': 2, 'd': 4, 'y': 3, 'a': 3, 'x': 2, 'f': 4, 'e': 4}


In [15]:
from functools import reduce
reduce(lambda a, b: a & b, map(dict.keys, [d1, d2, d3]))

{'d', 'f', 'x'}

# 字符串对齐

In [16]:
s = ['Beijing', 'Xian', 'Shanghai', 'Jinan', 'Haerbing']
for x in s:
    print(x.ljust(15, '*'))
print()
for x in s:
    print(x.rjust(15, '*'))
print()
for x in s:
    print(x.center(15, '*'))
    
    
# format(s, '<15') #左对齐
# format(s, '>15') #右对齐
# format(s, '^15') #居中

Beijing********
Xian***********
Shanghai*******
Jinan**********
Haerbing*******

********Beijing
***********Xian
*******Shanghai
**********Jinan
*******Haerbing

****Beijing****
******Xian*****
****Shanghai***
*****Jinan*****
****Haerbing***
