In [2]:
# ---- Matplotlib: use a font that supports Hangul on macOS ----
import os
import matplotlib
from matplotlib import rcParams

# 优先用系统自带字体
apple_font = "/System/Library/Fonts/AppleSDGothicNeo.ttc"
nanum_font = "/Library/Fonts/NanumGothic.ttf"  # 如果你安装了 NanumGothic

if os.path.exists(apple_font):
    rcParams["font.family"] = "Apple SD Gothic Neo"
elif os.path.exists(nanum_font):
    rcParams["font.family"] = "NanumGothic"
else:
    # 备用：提示一下你也可以安装 Nanum 字体：brew install --cask font-nanum
    print("⚠️ 找不到可显示韩文的字体，建议：brew install --cask font-nanum")

# 让负号正常显示
rcParams["axes.unicode_minus"] = False

In [7]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import unicodedata as ud

def nfc(x):
    return ud.normalize("NFC", x) if isinstance(x, str) else x

def parse_filename(filename):
    name, _ = os.path.splitext(filename)
    name = nfc(name)
    parts = name.split("_")

    try:
        amount = int(parts[0])
    except:
        amount = 0

    applicant = nfc(parts[1]) if len(parts) > 1 else "Unknown"

    # 尝试解析日期，自动判断到哪一位
    date = None
    date_end_index = None
    for fmt, take in [("%Y_%m_%d_%H_%M", 5), ("%Y_%m_%d", 3)]:
        try:
            date = datetime.strptime("_".join(parts[2:2 + take]), fmt)
            date_end_index = 2 + take  # 记录日期解析截止位置
            break
        except:
            continue

    # 如果没解析出日期，就默认跳过前两个（金额+申请人）
    if date_end_index is None:
        date_end_index = 2

    # 内容 = 日期之后的所有部分
    content = nfc("_".join(parts[date_end_index:]) if len(parts) > date_end_index else "Unknown")

    return amount, applicant, date, content

def process_receipts(folder="example_receipts"):
    rows = []
    for f in os.listdir(folder):
        if f.startswith("."):
            continue
        # 路径与文件名也做 NFC 规范化
        f_nfc = nfc(f)
        path = nfc(os.path.join(folder, f_nfc))

        amount, name, date, content = parse_filename(f_nfc)
        rows.append({
            "이름": nfc(name),
            "일자": date.strftime("%Y-%m-%d") if date else "",
            "내용": nfc(content),
            "지출": amount,
            "총 지출액": None,
            "비고": "",
            "영수증 경로": path
        })

    # DataFrame 层面再保险：对所有字符串列整体 NFC 一次
    df = pd.DataFrame(rows)
    for col in ["이름", "일자", "내용", "비고", "영수증 경로"]:
        df[col] = df[col].map(nfc)

    # 排序 & 累积
    df = df.sort_values("일자")
    df["총 지출액"] = df["지출"].cumsum()

    # 写 Excel（openpyxl 引擎，UTF-8 的 .xlsx 原生支持韩文）
    df.to_excel("receipts_summary.xlsx", index=False, engine="openpyxl")
    print("✅ receipts_summary.xlsx 已生成")

    # ---- 图1：按日期柱状图 ----
    plt.figure(figsize=(10, 5))
    plt.bar(df["일자"], df["지출"])
    plt.xticks(rotation=45, ha="right")
    plt.xlabel("일자")
    plt.ylabel("지출 (₩)")
    plt.title("일자별 지출 내역")
    plt.tight_layout()
    plt.savefig("expense_by_date.png")
    plt.close()

    # ---- 图2：申请人饼图 ----
    plt.figure(figsize=(6, 6))
    df.groupby("이름")["지출"].sum().plot.pie(autopct="%1.1f%%")
    plt.title("신청자별 지출 비율")
    plt.ylabel("")
    plt.tight_layout()
    plt.savefig("expense_by_person.png")
    plt.close()
    print("📈 图像已生成：expense_by_date.png, expense_by_person.png")

process_receipts("example_receipts")

✅ receipts_summary.xlsx 已生成
📈 图像已生成：expense_by_date.png, expense_by_person.png
