# Tips:


- Nice try to use `pd.merge`

- The third cell can be split into multiple cells (multiple cells can be executed independently and easily debugged).

- The function `read_excel_data` could be rewirten as follows (more general):

```python
def read_excel_data(file_path):
    """
    Read excel data from file_path.
    """
    df = pd.read_excel(file_path)
    return df
```


In [4]:
# 完整代码：大学申请文书生成器
import pandas as pd
import os
from docxtpl import DocxTemplate
import time

In [2]:
pip install docxtpl

Collecting docxtpl
  Downloading docxtpl-0.20.1-py3-none-any.whl.metadata (9.4 kB)
Collecting python-docx>=1.1.1 (from docxtpl)
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Collecting docxcompose (from docxtpl)
  Downloading docxcompose-1.4.0.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25ldone
Downloading docxtpl-0.20.1-py3-none-any.whl (20 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
Building wheels for collected packages: docxcompose
[33m  DEPRECATION: Building 'docxcompose' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'docxcompose'. Discussion can be found at https://github.com/pypa/pip/issues/6334[0m[33m
  Building wheel for docxcompose 

In [5]:
# ---------------------- 步骤1：读取Excel数据 ----------------------
def read_excel_data(uni_excel_path, research_excel_path):
    """读取大学列表和研究领域数据"""
    uni_df = pd.read_excel(uni_excel_path, engine="openpyxl")
    research_df = pd.read_excel(research_excel_path, engine="openpyxl")
    return uni_df, research_df

# ---------------------- 步骤2：生成Word文档（增加模式参数） ----------------------
def generate_word_documents(uni_df, research_df, template_path, output_dir, generate_mode="batch"):
    """
    生成Word文档
    generate_mode: "batch" 生成全部组合, "single" 只生成第一份
    """
    # 创建笛卡尔积组合
    uni_df["temp_key"] = 1
    research_df["temp_key"] = 1
    combined_df = pd.merge(uni_df, research_df, on="temp_key").drop("temp_key", axis=1)
    
    # 如果是 single 模式，只保留第一行
    if generate_mode == "single":
        combined_df = combined_df.iloc[[0]]
    
    # 读取模板
    template = DocxTemplate(template_path)
    
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)
    
    # 循环生成Word
    for idx, row in combined_df.iterrows():
        context = {
            "university_name": row["university_name"],
            "program_name": row["program_name"],
            "research_area": row["research_area"],
            "journal1": row["journal1"],
            "journal2": row["journal2"],
            "journal3": row["journal3"],
            "career_goal": row["career_goal"],
            "skills": row["skills"]
        }
        template.render(context)
        file_name = f"SOP_{row['university_name'].replace(' ', '_')}_{row['research_area']}.docx"
        file_path = os.path.join(output_dir, file_name)
        template.save(file_path)
        print(f"生成Word：{file_name}")
    
    return len(combined_df), output_dir


In [15]:
def main():
    # 生成模式："batch" 生成全部，"single" 只生成第一份
    generate_mode = "single"  # 在这里切换模式
    
    # 1. 配置文件路径（替换为你的实际路径）
    uni_excel_path = "excel_list1.xlsx"
    research_excel_path = "excel_list2.xlsx"
    template_path = "SOP_template.docx"
    
    # 2. 配置输出目录
    base_dir = os.path.expanduser("~/HW_School_Application")
    word_output_dir = os.path.join(base_dir, "Word_Files")
    sample_pdf_path = os.path.join(base_dir, "Sample_SOP.pdf")
    
    # 3. 执行流程
    print("=== 步骤1：读取Excel数据 ===")
    uni_df, research_df = read_excel_data(uni_excel_path, research_excel_path)
    
    print("\n=== 步骤2：生成Word文档 ===")
    doc_count, word_dir = generate_word_documents(uni_df, research_df, template_path, word_output_dir, generate_mode)
    print(f"共生成 {doc_count} 份Word文档")
    
    

In [13]:
# 运行主函数
if __name__ == "__main__":
    main()

=== 步骤1：读取Excel数据 ===

=== 步骤2：生成Word文档 ===
生成Word：SOP_Harvard_University_economics.docx
共生成 1 份Word文档
