# Tips

- Split the code into multiple cells (multiple cells can be executed independently and easily debugged)

- Load the template out of the loop to improve efficiency (loaded once and used repeatedly). (In this code, the template is loaded repeatedly.)

- Please name your file with a meaningful name.

In [2]:
import pandas as pd
from docxtpl import DocxTemplate
import os
from pathlib import Path
import random

def create_application_letters():
    # 创建输出目录
    output_dir = Path.home() / "HW_School_Application"
    output_dir.mkdir(exist_ok=True)
    
    # 步骤1: 查找桌面上的data文件
    desktop_path = Path.home() / "Desktop"
    data_file_path = desktop_path / "data.xlsx"
    
    # 如果桌面上没有data.xlsx，尝试当前目录
    if not data_file_path.exists():
        data_file_path = Path("data.xlsx")
        if not data_file_path.exists():
            print("在桌面和当前目录都找不到data.xlsx文件，创建示例数据...")
            create_sample_data(desktop_path)
            data_file_path = desktop_path / "data.xlsx"
    
    print(f"使用数据文件: {data_file_path}")
    
    # 步骤2-6: 读取数据
    try:
        # 读取大学列表
        universities_df = pd.read_excel(data_file_path, sheet_name='universities')
        
        # 读取研究领域
        research_areas_df = pd.read_excel(data_file_path, sheet_name='research_areas')
        
        # 读取技能
        skills_df = pd.read_excel(data_file_path, sheet_name='skills')
        
        print("数据读取成功!")
        print(f"大学数量: {len(universities_df)}")
        print(f"研究领域数量: {len(research_areas_df)}")
        print(f"技能数量: {len(skills_df)}")
        print(f"预计生成: {len(universities_df) * len(research_areas_df)} 份申请信")
        
    except Exception as e:
        print(f"读取数据文件失败: {e}")
        return
    
    # 个人信息
    personal_info = {
        'name': 'Fu',
        'program': 'Master of Finance'
    }
    
    # 步骤7: 循环生成申请信
    generated_count = 0
    
    print("开始生成申请信...")
    
    for _, university_row in universities_df.iterrows():
        for _, research_row in research_areas_df.iterrows():
            try:
                # 随机选择技能
                random_skill = random.choice(skills_df['skills'].tolist())
                
                # 准备模板数据
                context = {
                    'name': personal_info['name'],
                    'university': university_row['University Name'],  # 注意列名
                    'program': personal_info['program'],
                    'research_area': research_row['Research Area'],  # 注意列名
                    'journal1': research_row['Journal 1'],  # 注意列名
                    'journal2': research_row['Journal 2'],  # 注意列名
                    'journal3': research_row['Journal 3'],  # 注意列名
                    'career_goal': research_row['Career Goal'],  # 注意列名
                    'skills': random_skill
                }
                
                # 检查模板文件
                template_path = "template.docx"
                if not os.path.exists(template_path):
                    print(f"错误: 模板文件 {template_path} 不存在!")
                    # 创建一个简单的模板文件
                    create_template_file()
                
                # 步骤8: 生成Word文档
                doc = DocxTemplate(template_path)
                doc.render(context)
                
                # 生成文件名
                filename = f"SOP_{university_row['University Name'].replace(' ', '_')}_{research_row['Research Area']}.docx"
                filepath = output_dir / filename
                doc.save(filepath)
                
                generated_count += 1
                print(f"✓ 已生成: {filename}")
                
            except Exception as e:
                print(f"✗ 生成失败: {e}")
                continue
    
    print(f"\n完成! 成功生成 {generated_count} 份申请信!")
    return output_dir

def create_sample_data(desktop_path):
    """创建示例数据文件到桌面"""
    
    # 使用英文数据
    universities_data = {
        'Rank Group': ['Top 30']*10 + ['31-60']*10 + ['61-90']*10,
        'University Name': [
            # Top 30
            'Massachusetts Institute of Technology (MIT)', 'University of Oxford', 
            'Stanford University', 'Peking University', 'University of Cambridge',
            'Tsinghua University', 'Harvard University', 'University of California, Berkeley',
            'University of Chicago', 'London School of Economics and Political Science (LSE)',
            # 31-60
            'Yale University', 'National University of Singapore (NUS)', 
            'University of Pennsylvania', 'Columbia University', 'Duke University',
            'Northwestern University', 'New York University (NYU)', 'Cornell University',
            'ETH Zurich', 'University of California, Los Angeles (UCLA)',
            # 61-90
            'University of Michigan', 'University College London (UCL)', 
            'Erasmus University Rotterdam', 'University of Toronto', 'Shanghai Jiao Tong University',
            'Carnegie Mellon University', 'Fudan University', 'University of Tokyo',
            'University of British Columbia', 'University of Warwick'
        ]
    }
    
    research_areas_data = {
        'Research Area': ['Economics', 'Finance', 'Management'],
        'Journal 1': ['American Economic Review (AER)', 'Journal of Finance (JF)', 'Academy of Management Journal (AMJ)'],
        'Journal 2': ['Journal of Political Economy (JPE)', 'Review of Financial Studies (RFS)', 'Strategic Management Journal (SMJ)'],
        'Journal 3': ['Econometrica', 'Journal of Financial Economics (JFE)', 'Organization Science'],
        'Career Goal': ['Economic Researcher', 'Quant Researcher', 'Management Consultant']
    }
    
    skills_data = {
        'skills': [
            'Python Programming',
            'SQL & Database Management', 
            'Data Analysis & Visualization',
            'Machine Learning',
            'Econometrics',
            'Statistical Modeling',
            'Stata / R',
            'Financial Modeling', 
            'Project Management',
            'Communication & Presentation Skills',
            'Critical Thinking & Problem Solving'
        ]
    }
    
    # 创建Excel文件到桌面
    data_file_path = desktop_path / "data.xlsx"
    with pd.ExcelWriter(data_file_path, engine='openpyxl') as writer:
        pd.DataFrame(universities_data).to_excel(writer, sheet_name='universities', index=False)
        pd.DataFrame(research_areas_data).to_excel(writer, sheet_name='research_areas', index=False)
        pd.DataFrame(skills_data).to_excel(writer, sheet_name='skills', index=False)
    
    print(f"已创建示例数据文件: {data_file_path}")

def create_template_file():
    """创建模板文件"""
    from docx import Document
    
    doc = Document()
    
    # 添加模板内容
    doc.add_paragraph('Dear Admission Committee,')
    doc.add_paragraph('')
    doc.add_paragraph('My name is {{ name }}, and I am pleased to apply for the {{ program }} program at {{ university }}.')
    doc.add_paragraph('')
    doc.add_paragraph('In my free time, I enjoy reading top-tier academic research to stay updated with the latest advancements in {{ research_area }}. I occasionally study articles from leading ABS 4+ rated journals such as {{ journal1 }}, {{ journal2 }}, and {{ journal3 }}, among others. This habit not only deepens my understanding of theoretical and empirical approaches in {{ research_area }} but also sharpens my ability to critically analyze complex economic phenomena.')
    doc.add_paragraph('')
    doc.add_paragraph('I want to be a {{ career_goal }}. To achieve my dream, I have practical skills such as {{ skills }}.')
    doc.add_paragraph('')
    doc.add_paragraph('I am particularly drawn to {{ university }} due to its strong academic environment and research-oriented approach.')
    doc.add_paragraph('')
    doc.add_paragraph('Thank you for considering my application. I am eager to contribute to and benefit from the rigorous academic culture at {{ university }}.')
    doc.add_paragraph('')
    doc.add_paragraph('Sincerely,')
    doc.add_paragraph('')
    doc.add_paragraph('{{ name }}')
    
    doc.save('template.docx')
    print("已创建模板文件: template.docx")

# 主程序
if __name__ == "__main__":
    print("开始生成大学申请信...")
    output_dir = create_application_letters()
    if output_dir:
        print(f"\n所有文件已保存到: {output_dir}")

开始生成大学申请信...
使用数据文件: C:\Users\FHH\Desktop\data.xlsx
数据读取成功!
大学数量: 30
研究领域数量: 3
技能数量: 11
预计生成: 90 份申请信
开始生成申请信...
✓ 已生成: SOP_Massachusetts_Institute_of_Technology_(MIT)_Economics.docx
✓ 已生成: SOP_Massachusetts_Institute_of_Technology_(MIT)_Finance.docx
✓ 已生成: SOP_Massachusetts_Institute_of_Technology_(MIT)_Management.docx
✓ 已生成: SOP_University_of_Oxford_Economics.docx
✓ 已生成: SOP_University_of_Oxford_Finance.docx
✓ 已生成: SOP_University_of_Oxford_Management.docx
✓ 已生成: SOP_Stanford_University_Economics.docx
✓ 已生成: SOP_Stanford_University_Finance.docx
✓ 已生成: SOP_Stanford_University_Management.docx
✓ 已生成: SOP_Peking_University_Economics.docx
✓ 已生成: SOP_Peking_University_Finance.docx
✓ 已生成: SOP_Peking_University_Management.docx
✓ 已生成: SOP_University_of_Cambridge_Economics.docx
✓ 已生成: SOP_University_of_Cambridge_Finance.docx
✓ 已生成: SOP_University_of_Cambridge_Management.docx
✓ 已生成: SOP_Tsinghua_University_Economics.docx
✓ 已生成: SOP_Tsinghua_University_Finance.docx
✓ 已生成: SOP_Tsinghua_University_