In [1]:
import string

# 字符串常量
print(string.ascii_letters)  # abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
print(string.digits)         # 0123456789
print(string.punctuation)    # !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

# 字符串模板
template = string.Template('$name was born in $year')
print(template.substitute(name='Alice', year=1990))  # Alice was born in 1990

# 字符串格式化
print('Hello, {}!'.format('World'))  # 传统方式
print(f'Hello, {"World"}!')         # f-string (Python 3.6+)

abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
0123456789
!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
Alice was born in 1990
Hello, World!
Hello, World!


In [2]:
import re

# 匹配数字
text = "There are 123 apples and 456 oranges."
numbers = re.findall(r'\d+', text)
print(numbers)  # ['123', '456']

# 替换文本
new_text = re.sub(r'\d+', 'X', text)
print(new_text)  # There are X apples and X oranges.

# 验证电子邮件格式
def is_valid_email(email):
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return bool(re.match(pattern, email))

print(is_valid_email('test@example.com'))  # True
print(is_valid_email('invalid.email'))     # False

# 分组提取
text = "Date: 2023-08-15"
match = re.match(r'Date: (\d{4})-(\d{2})-(\d{2})', text)
if match:
    year, month, day = match.groups()
    print(f"Year: {year}, Month: {month}, Day: {day}")

['123', '456']
There are X apples and X oranges.
True
False
Year: 2023, Month: 08, Day: 15


In [3]:
from difflib import Differ, unified_diff, get_close_matches

# 比较文本差异
text1 = "Hello world\nPython is great\nGoodbye"
text2 = "Hello there\nPython is awesome\nGoodbye"

# 使用 Differ
d = Differ()
diff = list(d.compare(text1.splitlines(), text2.splitlines()))
print('\n'.join(diff))
"""
  Hello world
- Hello there
?        ^^^
+ Python is great
- Python is awesome
?              ^^^
  Goodbye
"""

# 使用 unified_diff
diff = unified_diff(text1.splitlines(), text2.splitlines(), lineterm='')
print('\n'.join(diff))
"""
--- 
+++ 
@@ -1,3 +1,3 @@
-Hello world
-Python is great
+Hello there
+Python is awesome
 Goodbye
"""

# 查找相似字符串
words = ['apple', 'ape', 'app', 'peach', 'puppy']
print(get_close_matches('appel', words))  # ['apple', 'ape', 'app']

- Hello world
- Python is great
+ Hello there
+ Python is awesome
  Goodbye
--- 
+++ 
@@ -1,3 +1,3 @@
-Hello world
-Python is great
+Hello there
+Python is awesome
 Goodbye
['apple', 'app', 'ape']


In [1]:
import unicodedata

# 字符名称
char = 'ß'
print(unicodedata.name(char))  # LATIN SMALL LETTER SHARP S

# 查找字符
print(unicodedata.lookup('LATIN SMALL LETTER SHARP S'))  # ß

# 规范化文本
s1 = 'café'
s2 = 'cafe\u0301'  # 使用组合字符
print(s1 == s2)  # False
print(unicodedata.normalize('NFC', s1) == unicodedata.normalize('NFC', s2))  # True

# 分类字符
for char in 'A1!あ':
    print(f"{char}: {unicodedata.category(char)}")
"""
A: Lu  # Letter, uppercase
1: Nd  # Number, decimal digit
!: Po  # Punctuation, other
あ: Lo  # Letter, other
"""

LATIN SMALL LETTER SHARP S
ß
False
True
A: Lu
1: Nd
!: Po
あ: Lo


'\nA: Lu  # Letter, uppercase\n1: Nd  # Number, decimal digit\n!: Po  # Punctuation, other\nあ: Lo  # Letter, other\n'