# 高效工具

## collections

### defaultdict 快速定义字典类型

#### 字典与整数的结合：defaultdict(int)

In [1]:
import collections  # 导入collections模块

di = collections.defaultdict(int)  # 从collections模块中导入defaultdict类
ls = ["a", "b", "a", "b", "c", "a", "b", "c"]  # 定义一个列表
for i in ls:  # 遍历列表中的每个元素
    di[i] += 1  # 将每个元素的值加1
di  # 打印结果

defaultdict(int, {'a': 3, 'b': 3, 'c': 2})

In [2]:
# 从collections模块中导入defaultdict类，这样子可以直接使用defaultdict，省略collections前缀
from collections import defaultdict

di1 = defaultdict(int)  # 创建一个defaultdict对象，默认值为int类型
for i in ls:  # 遍历列表中的每个元素
    di1[i] += 1  # 将每个元素的值加1
di1  # 打印结果

defaultdict(int, {'a': 3, 'b': 3, 'c': 2})

#### 字典与列表的结合：defaultdict(list)

In [3]:
dl=defaultdict(list)  # 创建一个defaultdict对象，默认值为list类型
key_value = [("a", 1), ("b", 2), ("a", 3), ("b", 4), ("c", 5)]  # 定义一个包含元组的列表

for key, value in key_value:  # 遍历列表中的每个元组
    dl[key].append(value)  # 将元组的第二个元素添加到字典中对应键的列表中
dl  # 打印结果

defaultdict(list, {'a': [1, 3], 'b': [2, 4], 'c': [5]})

#### 字典与集合的结合：defaultdict(set)

In [4]:
ds=defaultdict(set)  # 创建一个defaultdict对象，默认值为set类型
key_value = [("a", 1), ("b", 2), ("a", 3), ("b", 4), ("c", 5)]  # 定义一个包含元组的列表

for key, value in key_value:  # 遍历列表中的每个元组
    ds[key].add(value)  # 将元组的第二个元素添加到字典中对应键的集合中
ds  # 打印结果

defaultdict(set, {'a': {1, 3}, 'b': {2, 4}, 'c': {5}})

### counter 计数器

In [6]:
from collections import Counter  # 从collections模块中导入Counter类
# Counter是一个字典子类，用于计数可哈希对象
string = "abcaabbccdee"  # 定义一个字符串
counter = Counter(string)  # 创建一个Counter对象，统计字符串中每个字符的出现次数
counter  # 打印结果

Counter({'a': 3, 'b': 3, 'c': 3, 'e': 2, 'd': 1})

In [10]:
print(counter.most_common(2))  # 打印出现次数最多的两个字符及其次数
print(counter.most_common())  # 打印所有字符及其出现次数
print(counter.keys())  # 打印所有字符
print(counter.values())  # 打印所有字符的出现次数
print(counter.items())  # 打印所有字符及其出现次数的元组
print(counter['a'])  # 打印字符'a'的出现次数
print(counter['z'])  # 打印字符'z'的出现次数，如果不存在则返回0

[('a', 3), ('b', 3)]
[('a', 3), ('b', 3), ('c', 3), ('e', 2), ('d', 1)]
dict_keys(['a', 'b', 'c', 'd', 'e'])
dict_values([3, 3, 3, 1, 2])
dict_items([('a', 3), ('b', 3), ('c', 3), ('d', 1), ('e', 2)])
3
0


In [None]:
# Counter对象的加法操作
counter1 = Counter(a=1, b=2)  # 创建一个Counter对象
counter2 = Counter(a=2, b=1)  # 创建另一个Counter对象
counter3 = counter1 + counter2  # 将两个Counter对象相加
print(counter3)  # 打印结果

Counter({'a': 3, 'b': 3})


Counter({'b': 2, 'a': 1})

### deque

- itertools
  - product
  - permutations
  - combinations
  - groupby

In [None]:
from itertools import product, permutations, combinations, cycle, accumulate, groupby

# 笛卡尔积
print(list(product([1, 2], ["a", "b"])))  # [(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')]

# 全排列
print(list(permutations([1, 2, 3], 2)))  # [(1, 2), (1, 3), (2, 1), ...]

# 组合
print(list(combinations([1, 2, 3], 2)))  # [(1, 2), (1, 3), (2, 3)]

# 无限循环
cycle_list = cycle([1, 2, 3])
print(next(cycle_list), next(cycle_list))  # 1, 2

# 累积求和
print(list(accumulate([1, 2, 3, 4])))  # [1, 3, 6, 10]

# 分组
data = [
    {"name": "Alice", "age": 30},
    {"name": "Bob", "age": 25},
    {"name": "Charlie", "age": 25},
]
grouped = groupby(sorted(data, key=lambda x: x["age"]), key=lambda x: x["age"])
for k, v in grouped:
    print(
        k, list(v)
    )  # 25 -> [{'name': 'Bob', 'age': 25}, {'name': 'Charlie', 'age': 25}]

## functools

### lru_cache

### partial

## heapq

## bisect

## math, statistics 数学工具

In [None]:
import math
import statistics

print(math.gcd(48, 18))  # 6
print(math.factorial(5))  # 120
print(math.isqrt(10))  # 3
print(math.comb(5, 2))  # 10  (5 选 2 组合)
print(statistics.median([1, 3, 3, 6, 7, 8, 9]))  # 6

## pandas 数据表格

## re, textwrap 字符串处理

In [14]:
import re
import textwrap

text = "hello 123 world 456"
print(re.findall(r"\d+", text))  # ['123', '456']
print(re.sub(r"\d+", "X", text))  # 'hello X world X'

wrapped_text = textwrap.wrap(
    "This is a very long sentence that needs wrapping.", width=10
)
print("\n".join(wrapped_text))  # 自动换行

['123', '456']
hello X world X
This is a
very long
sentence
that needs
wrapping.
