In [1]:
pip install docxtpl docx2pdf python-docx pandas openpyxl

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting docx2pdf
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/53/4f/1155781308281e67f80b829738a29e5354e03664c62311f753056afc873b/docx2pdf-0.1.8-py3-none-any.whl (6.7 kB)
Installing collected packages: docx2pdf
Successfully installed docx2pdf-0.1.8
Note: you may need to restart the kernel to use updated packages.


In [2]:
from docxtpl import DocxTemplate
from docx2pdf import convert
import pandas as pd
import os

In [3]:
# 读取Excel数据
# 读取研究领域详情（research_area/journals/skills/job）
detail_df = pd.read_excel("detaillist.xlsx")
# 读取大学列表（university）
university_df = pd.read_excel("universitylist.xlsx")

In [4]:
# 转换为字典列表（便于遍历）
detail_list = detail_df.to_dict("records")  # 3个研究领域的字典列表
university_list = university_df["university"].tolist()  # 30所大学的字符串列表

In [5]:
detail_list

[{'research_area': 'economics',
  'journals': 'American Economic Review, Annals of Statistics and Econometrica',
  'job': 'quant researcher',
  'skills': 'Python, SQL, Math, PowerBI, Tableau'},
 {'research_area': 'finance',
  'journals': 'Journal of Finance, Journal of Financial Economics and Review of Financial Studies',
  'job': 'financial analyst',
  'skills': 'auditing, accounting, English'},
 {'research_area': 'accounting',
  'journals': 'Accounting Review, Journal of Accounting and Economics and Journal of Accounting Research',
  'job': 'accountant',
  'skills': 'project management, financial analysis, financial reporting'}]

In [6]:
university_list

['Harvard University',
 'Massachusetts Institute of Technology',
 'University of California-Berkeley',
 'University of Chicago',
 'Paris School of Economics',
 'Princeton University',
 'Stanford University',
 'Yale University',
 'Toulouse School of Economics',
 'Oxford University',
 'Columbia University',
 'University of Cambridge',
 'Boston College',
 'University of Toronto',
 'University of Warwick',
 'Cornell University',
 'University of California-Davis',
 'University of Nottingham',
 'Johns Hopkins University',
 'University of California-Irvine',
 'University of Queensland',
 'University of Virginia',
 'Georgetown University',
 'University of Minnesota',
 'New York University',
 'University of California-Santa Barbara',
 'Washington University in St. Louis',
 'Pennsylvania State University',
 'University of Colorado',
 'University of California-Santa Cruz']

In [7]:
# 加载Word模板（仅加载1次，提升效率）
doc = DocxTemplate("template.docx")

In [12]:
# 双重遍历：先遍历研究领域，再遍历每所大学
for detail in detail_list:
    # 提取当前研究领域的关键信息
    current_area = detail["research_area"]
    current_journals = detail["journals"]
    current_skills = detail["skills"]
    current_job = detail["job"]

    # 为每个研究领域创建单独的文件夹（避免文件混乱）
    area_output_dir = os.path.join(r"E:\aifianceHW2", current_area)
    os.makedirs(area_output_dir, exist_ok=True)  # 文件夹不存在则创建

    # 遍历每所大学，生成对应文书
    for idx, university in enumerate(university_list, 1):
        # 定义当前文书的替换变量（与模板{{变量名}}完全对应）
        context = {
            "research_area": current_area,
            "university": university,
            "journals": current_journals,
            "skills": current_skills,
            "job": current_job
        }

        # 4. 渲染模板（替换变量）并保存
        doc.render(context)  # 替换当前文书的变量
        # 定义输出文件名（格式：大学名称-研究领域.docx，避免重名）
        output_filename = f"{university.replace(' ', '_')}_{current_area}.docx"
        output_path = os.path.join(area_output_dir, output_filename)
        # 保存文书
        doc.save(output_path)     
        #word转PDF
        pdf_output_path = output_path.replace(".docx", ".pdf")
        convert(output_path, pdf_output_path)

        # 打印进度（方便查看生成状态）
        print(f"已生成 {idx}/30 | 研究领域：{current_area} | 大学：{university} | 路径：{output_path}")

print(f"\n批量生成完成！共生成 {len(detail_list) * len(university_list)} 份文书，保存路径：E:\aifianceHW2")

已生成 1/30 | 研究领域：economics | 大学：Harvard University | 路径：E:\aifianceHW2\economics\Harvard_University_economics.docx
已生成 2/30 | 研究领域：economics | 大学：Massachusetts Institute of Technology | 路径：E:\aifianceHW2\economics\Massachusetts_Institute_of_Technology_economics.docx
已生成 3/30 | 研究领域：economics | 大学：University of California-Berkeley | 路径：E:\aifianceHW2\economics\University_of_California-Berkeley_economics.docx
已生成 4/30 | 研究领域：economics | 大学：University of Chicago | 路径：E:\aifianceHW2\economics\University_of_Chicago_economics.docx
已生成 5/30 | 研究领域：economics | 大学：Paris School of Economics | 路径：E:\aifianceHW2\economics\Paris_School_of_Economics_economics.docx
已生成 6/30 | 研究领域：economics | 大学：Princeton University | 路径：E:\aifianceHW2\economics\Princeton_University_economics.docx
已生成 7/30 | 研究领域：economics | 大学：Stanford University | 路径：E:\aifianceHW2\economics\Stanford_University_economics.docx
已生成 8/30 | 研究领域：economics | 大学：Yale University | 路径：E:\aifianceHW2\economics\Yale_University_economics.docx
