In [60]:
import os
import pandas as pd
from docxtpl import DocxTemplate
from docx2pdf import convert
from pathlib import Path


In [62]:
def read_excel_data(file_path):
    
    # 读取Excel文件，不包含表头
    df = pd.read_excel(file_path, header=None)
    
    # 提取大学列表（去掉空值）
    universities = df.iloc[:, 0].dropna().tolist()
    
    # 提取研究领域和对应的期刊、技能
    # 从Excel的第1-3行获取研究领域信息
    research_areas = []
    
    for i in range(3):  # 只取前三行有数据的行
        area_data = {
            'field': df.iloc[i, 1],  # B列：研究领域
            'journals': [
                df.iloc[i, 2],  # C列：期刊1
                df.iloc[i, 3],  # D列：期刊2  
                df.iloc[i, 4]   # E列：期刊3
            ],
            'skills': df.iloc[i, 5]  # F列：技能
        }
        research_areas.append(area_data)
    
    return universities, research_areas


In [64]:
def create_application_letter(template_data): #根据模板数据生成申请信内容
   
    # 根据研究领域设置职业目标
    career_goals = {
        'accounting': 'accountant',
        'finance': 'quant researcher', 
        'marketing': 'marketing analyst'
    }
    
    letter_template = f"""
Dear Admission Committee,

My name is Haiyuan Zhang, and I am pleased to apply for the Master of {template_data['field']} program at {template_data['university']}.

In my free time, I enjoy reading top-tier academic research to stay updated with the latest advancements in {template_data['field']}. I occasionally study articles from leading ABS 4+ rated journals such as {template_data['journals'][0]}, {template_data['journals'][1]}, and {template_data['journals'][2]}, among others. This habit not only deepens my understanding of theoretical and empirical approaches in {template_data['field']} but also sharpens my ability to critically analyze complex economic phenomena.

I want to be a {career_goals.get(template_data['field'], 'professional')}. To achieve my dream, I have practical skills such as {template_data['skills']}.

I am particularly drawn to {template_data['university']} due to its strong academic environment and research-oriented approach.

Thank you for considering my application. I am eager to contribute to and benefit from the rigorous academic culture at {template_data['university']}.

Sincerely,

Haiyuan Zhang
"""
    return letter_template


In [66]:
def generate_documents(): # 主函数：生成所有申请文档
  
    # 读取Excel数据
    universities, research_areas = read_excel_data('list.xlsx')
    
    # 创建输出目录
    output_dir = Path.home() / "HW_School_Application"
    output_dir.mkdir(exist_ok=True)
    
    print(f"找到 {len(universities)} 所大学和 {len(research_areas)} 个研究领域")
    print(f"将生成 {len(universities) * len(research_areas)} 份申请信")
    
    count = 0
    
    # 循环所有大学和研究领域的组合
    for university in universities:
        for area in research_areas:
            # 模板数据
            template_data = {
                'university': university,
                'field': area['field'],
                'journals': area['journals'],
                'skills': area['skills']
            }
            
            # 生成申请信内容
            letter_content = create_application_letter(template_data)
            
            # 创建Word文档
            doc = DocxTemplate("application_template.docx")
            context = {
                'university': university,
                'field': area['field'],
                'journal1': area['journals'][0],
                'journal2': area['journals'][1], 
                'journal3': area['journals'][2],
                'skills': area['skills'],
                'career_goal': 'quant researcher' if area['field'] == 'finance' else 
                              'accountant' if area['field'] == 'accounting' else 'marketing analyst'
            }
            
            doc.render(context)
            
            # 保存文档
            filename = f"application_{university.replace(' ', '_').replace(',', '')}_{area['field']}.docx"
            filepath = output_dir / filename
            doc.save(filepath)

            # 转换为pdf
            pdf_filename = f"application_{university.replace(' ', '_').replace(',', '')}_{area['field']}.pdf"
            pdf_filepath = output_dir / pdf_filename
            convert(str(filepath), str(pdf_filepath))

            
            count += 1
            print(f"已生成: {filename}")
    
    print(f"\n完成！共生成 {count} 份申请文档")
    print(f"文件保存在: {output_dir}")


In [None]:
if __name__ == "__main__":
    generate_documents()