In [1]:
# docxtpl & docx2pdf
!pip install docxtpl
!pip install docx2pdf



In [2]:
import os
from pathlib import Path
import pandas as pd
from docxtpl import DocxTemplate
import platform
from docx2pdf import convert as docx2pdf_convert

# 路径设置
OUT_DIR = Path(r"D:\Quant\Homework\Homework_2\School_Application")
OUT_DIR.mkdir(parents=True, exist_ok=True)

UNIVERSITY_FILE = Path("university.xlsx")
RESEARCH_AREA_FILE = Path("research_area.xlsx")
TEMPLATE_FILE = Path("template.docx")

# 姓名设置
NAME = "Puwen Sun"

# Step 1: 读取Excel
df_university = pd.read_excel(UNIVERSITY_FILE)
df_research_area = pd.read_excel(RESEARCH_AREA_FILE)

# Step 2: 生成所有组合
generated_files = []

for _, row_u in df_university.iterrows():
    university = str(row_u["university"]).strip()
    
    for _, row_r in df_research_area.iterrows():
        Research_Area = str(row_r["Research_Area"]).strip()
        research_area = str(row_r["research_area"]).strip()
        journal_1 = str(row_r["journal_1"]).strip()
        journal_2 = str(row_r["journal_2"]).strip()
        journal_3 = str(row_r["journal_3"]).strip()
        career = str(row_r["career"]).strip()
        skills = str(row_r["skills"]).strip()

        # 每次都重新加载模板，防止上一次渲染残留
        tpl = DocxTemplate(str(TEMPLATE_FILE))

        # 构造模板上下文
        context = {
            "name": NAME,
            "university": university,
            "Research_Area": Research_Area,
            "research_area": research_area,
            "journal_1": journal_1,
            "journal_2": journal_2,
            "journal_3": journal_3,
            "career": career,
            "skills": skills,
        }

        # 渲染模板
        tpl.render(context)

        # 安全文件名处理
        safe_uni = "".join(c for c in university if c.isalnum() or c in (" ", "_", "-")).strip()
        safe_area = "".join(c for c in research_area if c.isalnum() or c in (" ", "_", "-")).strip()
        filename = f"SOP_{safe_uni}_{safe_area}.docx"
        output_path = OUT_DIR / filename

        # 保存 Word 文件
        tpl.save(output_path)
        generated_files.append(output_path)
        print(f"[INFO] 生成 Word: {output_path}")

# Step 3: 批量转换 PDF
if platform.system() == "Windows":
    print("\n[INFO] 开始批量转换 PDF ...")
    try:
        docx2pdf_convert(str(OUT_DIR))  # 一次性转换整个文件夹
        print("[INFO] ✅ 所有 PDF 转换完成！")
    except Exception as e:
        print(f"[WARN] PDF 批量转换失败: {e}")
else:
    print("[INFO] 非 Windows 系统，跳过 PDF 转换。")

print(f"\n✅ 已生成 {len(generated_files)} 份 Word 文件，保存在 {OUT_DIR}")

[INFO] 生成 Word: D:\Quant\Homework\Homework_2\School_Application\SOP_Harvard University_accounting.docx
[INFO] 生成 Word: D:\Quant\Homework\Homework_2\School_Application\SOP_Harvard University_economics.docx
[INFO] 生成 Word: D:\Quant\Homework\Homework_2\School_Application\SOP_Harvard University_finance.docx
[INFO] 生成 Word: D:\Quant\Homework\Homework_2\School_Application\SOP_Massachusetts Institute of Technology_accounting.docx
[INFO] 生成 Word: D:\Quant\Homework\Homework_2\School_Application\SOP_Massachusetts Institute of Technology_economics.docx
[INFO] 生成 Word: D:\Quant\Homework\Homework_2\School_Application\SOP_Massachusetts Institute of Technology_finance.docx
[INFO] 生成 Word: D:\Quant\Homework\Homework_2\School_Application\SOP_University of California-Berkeley_accounting.docx
[INFO] 生成 Word: D:\Quant\Homework\Homework_2\School_Application\SOP_University of California-Berkeley_economics.docx
[INFO] 生成 Word: D:\Quant\Homework\Homework_2\School_Application\SOP_University of California-Berke

  0%|          | 0/90 [00:00<?, ?it/s]

[INFO] ✅ 所有 PDF 转换完成！

✅ 已生成 90 份 Word 文件，保存在 D:\Quant\Homework\Homework_2\School_Application
