In [5]:
# import patool
# Download CBDB sqlite
# !git clone https://github.com/cbdb-project/cbdb_sqlite.git
import requests
import os
import warnings
import urllib3

# 禁用所有 InsecureRequestWarning 警告
warnings.simplefilter('ignore', urllib3.exceptions.InsecureRequestWarning)

# url = "https://hf-mirror.com/datasets/cbdb/cbdb-sqlite/resolve/main/latest.7z?download=true"
url = "https://huggingface.co/datasets/cbdb/cbdb-sqlite/resolve/main/latest.7z?download=true"
r = requests.get(url, verify=False)
os.makedirs("./cbdb_sqlite", exist_ok=True)
with open("./cbdb_sqlite/latest.7z", "wb") as f:
    f.write(r.content)

In [6]:
# import glob
import os
latest_file = "./cbdb_sqlite/latest.7z"
print(latest_file)
latest_db = latest_file.split("/")[-1].split(".")[0] + ".db"
print(latest_db)
if os.path.isfile(latest_db):
    os.remove(latest_db)

./cbdb_sqlite/latest.7z
latest.db


In [7]:
import py7zr

# 指定压缩文件和目标目录
archive = './cbdb_sqlite/latest.7z'
output_dir = ''

# 解压文件
with py7zr.SevenZipFile(archive, 'r') as archive:
    archive.extractall(output_dir)

print("解压完成！")



解压完成！


In [9]:
import sqlite3
import pandas as pd
import os
import datetime
import re

# 连接数据库
database_file = "latest.db"  # 请确保数据库路径正确
conn = sqlite3.connect(database_file)

# 五代的ID集合
five_dynasties_ids = [7, 34, 47, 48, 52, 49, 36, 75, 9, 8, 11, 38, 12, 13, 55, 10, 66]

# 查询各个朝代的男女人数，并处理五代的归类
query = f"""
SELECT 
    CASE 
        WHEN bm.c_dy IN ({','.join(map(str, five_dynasties_ids))}) 
             OR d.c_dynasty = 'Five Dynasties' THEN '5 Dynasties'
        ELSE d.c_dynasty 
    END AS dynasty_name,
    bm.c_female AS gender,
    COUNT(*) AS count
FROM 
    BIOG_MAIN bm
JOIN 
    DYNASTIES d ON bm.c_dy = d.c_dy
WHERE 
    d.c_dynasty IN ('Tang', 'Song', 'Liao', 'Jin', 'Yuan', 'Ming', 'Qing', 'Republic of China') 
    OR bm.c_dy IN ({','.join(map(str, five_dynasties_ids))})
GROUP BY 
    dynasty_name, bm.c_female
"""

# 加载查询结果
df = pd.read_sql(query, conn)

# 朝代英文名称映射为拼音
dynasty_mapping = {
    'Tang': 'Tang',
    'Song': 'Song',
    'Liao': 'Liao',
    'Jin': 'Jin',
    'Yuan': 'Yuan',
    'Ming': 'Ming',
    'Qing': 'Qing',
    '5 Dynasties': '5 Dynasties',
    'Republic of China': 'Minguo'
}
df['dynasty_name'] = df['dynasty_name'].map(dynasty_mapping)
df['gender'] = df['gender'].map({0: '男', 1: '女'})

# 指定朝代的顺序（按拼音顺序）
dynasty_order = ['Tang', '5 Dynasties', 'Song', 'Liao', 'Jin', 'Yuan', 'Ming', 'Qing', 'Minguo']

# 将数据按顺序排序
df['dynasty_name'] = pd.Categorical(df['dynasty_name'], categories=dynasty_order, ordered=True)
df = df.sort_values('dynasty_name')

# 构建 Chart 4 Data 的新数据格式
chart4_data = ',\n'.join(
    f'{{ label: "{row["dynasty_name"]}", gender: "{row["gender"]}", value: {int(row["count"])} }}'
    for _, row in df.iterrows()
)

# 读取原始 script2.js 文件内容
with open('script.js', 'r', encoding='utf-8') as file:
    js_code = file.read()

# 定位并删除旧的 Chart 4 Data 部分
start_delimiter = 'const chart4Data = ['
end_delimiter = '];//Chart 4 Data'
start_index = js_code.find(start_delimiter)
end_index = js_code.find(end_delimiter) + len(end_delimiter)
js_code = js_code[:start_index] + js_code[end_index:]

# 插入新的 Chart 4 Data 数据
new_data = f'{start_delimiter}\n{chart4_data}\n{end_delimiter}'
updated_js_code = new_data + '\n' + js_code

# 将更新后的内容写回 script2.js 文件
with open('script.js', 'w', encoding='utf-8') as file:
    file.write(updated_js_code)

# 更新文件的最后修改日期
svg_path = os.path.join(os.getcwd(), 'images', 'sex_count_date.svg')
with open(svg_path, 'r', encoding='utf-8') as file:
    svg_content = file.read()

# 获取当前日期
current_time = datetime.datetime.now().strftime('%Y年%m月%d日')
current_time_english = datetime.datetime.now().strftime('%B %d, %Y')

# 更新 SVG 文件中的日期
updated_svg_content = re.sub(r'更新日期：\d+年\d+月\d+日', f'更新日期：{current_time}', svg_content)
updated_svg_content = re.sub(r'Updated on: \w+ \d+, \d+', f'Updated on: {current_time_english}', updated_svg_content)

with open(svg_path, 'w', encoding='utf-8') as file:
    file.write(updated_svg_content)

# 关闭数据库连接
conn.close()
print("script.js 和 sex_count_date.svg 文件已更新完成！")


script.js 和 sex_count_date.svg 文件已更新完成！
