In [14]:
import pandas as pd
from docxtpl import DocxTemplate
import os

def main():
    # 创建输出目录
    output_dir = "HW_School_Application"
    os.makedirs(output_dir, exist_ok=True)
    
    # 读取Excel文件
    universities = pd.read_excel('universities.xlsx', header=None).iloc[:, 0].tolist()
    research = pd.read_excel('research.xlsx')
    
    # 加载Word模板
    template = DocxTemplate('template.docx')
    
    # 生成SOP文档
    generated_count = 0
    
    for university in universities[:30]:
        for _, row in research.iterrows():
            # 使用正确的列名
            area = row['research_area']
            journals = [row['journal_1'], row['journal_2'], row['journal_3']]
            skills_for_area = row['skill']
            
            # 确定项目名称
            program_mapping = {
                'ECON': 'Master of Economics',
                'FINANCE': 'Master of Finance', 
                'SOC SCI': 'Master of Social Sciences',
            }
            program_name = program_mapping.get(area, f'Master of {area}')
            
            # 准备上下文数据
            context = {
                'name': 'Xiong Junfeng',
                'university': university,
                'program': program_name,
                'research_area': area,
                'journals': ", ".join(str(journal) for journal in journals),
                'skills': skills_for_area
            }
            
            # 渲染模板
            template.render(context)
            
            # 生成文件名
            safe_uni = "".join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in str(university))
            safe_area = "".join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in area)
            filename = f"SOP_{safe_uni.replace(' ', '_')}_{safe_area.replace(' ', '_')}.docx"
            
            # 保存文档
            output_path = os.path.join(output_dir, filename)
            template.save(output_path)
            generated_count += 1
            print(f"生成 [{generated_count}]: {filename}")
    
    print(f"\n生成完成！共创建 {generated_count} 份目的声明")
    
    # 询问是否生成PDF
    response = input("\n是否将Word文档转换为PDF？(y/n): ")
    if response.lower() == 'y':
        convert_to_pdf(output_dir)

def convert_to_pdf(output_dir):
    """将生成的Word文档转换为PDF"""
    try:
        from docx2pdf import convert
        print("正在转换Word文档为PDF...")
        convert(output_dir)
        print("PDF转换完成！")
    except ImportError:
        print("未安装docx2pdf库，正在尝试使用win32com方法...")
        try:
            import win32com.client
            word = win32com.client.Dispatch("Word.Application")
            word.visible = 0
            
            # 遍历所有Word文档
            for filename in os.listdir(output_dir):
                if filename.endswith(".docx"):
                    in_file = os.path.abspath(os.path.join(output_dir, filename))
                    out_file = os.path.abspath(os.path.join(output_dir, filename.replace(".docx", ".pdf")))
                    
                    doc = word.Documents.Open(in_file)
                    doc.SaveAs(out_file, FileFormat=17)  # 17表示PDF格式
                    doc.Close()
            
            word.Quit()
            print("PDF转换完成！")
        except Exception as e:
            print(f"PDF转换失败: {e}")
            print("请安装docx2pdf库: pip install docx2pdf")
    except Exception as e:
        print(f"PDF转换出错: {e}")

if __name__ == "__main__":
    main()

生成 [1]: SOP_Harvard_University_Cambridge__Massachusetts__USA___ECON.docx
生成 [2]: SOP_Harvard_University_Cambridge__Massachusetts__USA___FINANCE.docx
生成 [3]: SOP_Harvard_University_Cambridge__Massachusetts__USA___SOC_SCI.docx
生成 [4]: SOP_Massachusetts_Institute_of_Technology__MIT__Cambridge__Massachusetts__USA___ECON.docx
生成 [5]: SOP_Massachusetts_Institute_of_Technology__MIT__Cambridge__Massachusetts__USA___FINANCE.docx
生成 [6]: SOP_Massachusetts_Institute_of_Technology__MIT__Cambridge__Massachusetts__USA___SOC_SCI.docx
生成 [7]: SOP_University_of_California-Berkeley_Berkeley__California__USA___ECON.docx
生成 [8]: SOP_University_of_California-Berkeley_Berkeley__California__USA___FINANCE.docx
生成 [9]: SOP_University_of_California-Berkeley_Berkeley__California__USA___SOC_SCI.docx
生成 [10]: SOP_University_of_Chicago_Chicago__Illinois__USA___ECON.docx
生成 [11]: SOP_University_of_Chicago_Chicago__Illinois__USA___FINANCE.docx
生成 [12]: SOP_University_of_Chicago_Chicago__Illinois__USA___SOC_SCI.docx
生

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=90.0), HTML(value='')))


PDF转换完成！
