In [5]:
import os
import pandas as pd
from docxtpl import DocxTemplate
from pathlib import Path

# 1. 读取数据 --------------------------------------------------------------
uni_df   = pd.read_excel('university.xlsx').rename(columns={'university name':'university'})
job_df   = pd.read_excel('job.xlsx')

# 去掉可能的表头重复行
uni_df = uni_df[uni_df['university'] != 'university name'].reset_index(drop=True)
job_df = job_df[job_df['research_area'] != 'research_area'].reset_index(drop=True)

# 2. 建立输出文件夹 --------------------------------------------------------
out_dir = Path('output_word')
out_dir.mkdir(exist_ok=True)

# 3. 加载模板 --------------------------------------------------------------
template = DocxTemplate('template.docx')

# 4. 双重循环生成 90 份 ----------------------------------------------------
for _, uni_row in uni_df.iterrows():
    university = uni_row['university']
    for _, job_row in job_df.iterrows():
        context = {
            'university'   : university,
            'research_area': job_row['research_area'],
            'journals'     : job_row['journals'],
            'skills'       : job_row['skills'],
            'career_goal'  : job_row['career_goal']
        }
        # 渲染
        template.render(context)
        # 保存
        file_name = f"SOP_{university}_{job_row['research_area']}.docx"
        file_path = out_dir / file_name
        template.save(file_path)

print(f'✅ 全部生成完成，共 {len(uni_df)*len(job_df)} 份 → {out_dir.absolute()}')

✅ 全部生成完成，共 90 份 → E:\quant\B_homework\output_word


In [7]:
import docx2pdf, os, pathlib
print('docx2pdf 版本:', docx2pdf.__version__)

docx2pdf 版本: 0.1.8


In [None]:
from docx2pdf import convert
import pathlib, os

src_dir  = pathlib.Path('output_word')          # 放Word的目录
dst_dir  = pathlib.Path('output_pdf')           # 即将生成的PDF目录
dst_dir.mkdir(exist_ok=True)

# 遍历所有docx
for docx in src_dir.glob('*.docx'):
    pdf = dst_dir / f'{docx.stem}.pdf'          # 同名只换后缀
    print(f'转换中: {docx.name} ...')
    convert(str(docx), str(pdf))                # 核心一行

print(f'✅ 全部转换完成 → {dst_dir.absolute()}')