In [11]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5


In [6]:
!pip install pandas docxtpl docx2pdf



In [7]:
import pandas as pd
from docxtpl import DocxTemplate
import os
from datetime import datetime

In [13]:
def generate_application_letters():
    # 创建输出目录
    output_dir = "HW_School_Application"
    os.makedirs(output_dir, exist_ok=True)
    
    # 读取Excel数据
    try:
        # 读取目标学校
        df_schools = pd.read_excel(r"C:\Users\lenovo\Desktop\Python homework.xlsx", sheet_name="target college")
        schools = df_schools.iloc[:, 1].dropna().tolist()  # 获取B列的所有学校
        
        # 读取领域和期刊
        df_journals = pd.read_excel(r"C:\Users\lenovo\Desktop\Python homework.xlsx", sheet_name="Field and Journal")
    except Exception as e:
        print(f"读取Excel文件错误: {e}")
        return
    
    # 处理期刊数据
    fields_journals = {}
    current_field = None
    
    for index, row in df_journals.iterrows():
        field = row['Field']
        journal = row['Journal']
        
        if pd.notna(field):
            current_field = field
            fields_journals[current_field] = []
        
        if pd.notna(journal) and current_field:
            fields_journals[current_field].append(journal)
    
    # 技能列表（从step6获取）
    skills = [
        "Python, R, Stata, SQL, MATLAB, Econometrics, Statistical Analysis, Machine Learning",
        "Python, SQL, Tableau, Power BI, Financial Modeling, Risk Management, Data Visualization", 
        "Stata, R, Python, Econometric Analysis, Time Series Analysis, Statistical Modeling, Data Mining",
        "MATLAB, Python, SQL, Quantitative Analysis, Mathematical Modeling, Optimization Techniques",
        "R, Python, SQL, Statistical Inference, Experimental Design, Causal Inference Methods"
    ]
    
    # 个人基本信息
    personal_info = {
        'name': 'Zimin Gu ',
        'program': 'Doctor of Economics'
    }
    
    # 加载Word模板
    try:
        doc = DocxTemplate(r"C:\Users\lenovo\Desktop\MS Word template(Admission Letter).docx")
    except Exception as e:
        print(f"加载Word模板错误: {e}")
        return
    
    # 生成申请信
    generated_count = 0
    
    for i, school in enumerate(schools):
        for field, journals in fields_journals.items():
            # 选择技能（循环使用）
            skill = skills[i % len(skills)]
            
            # 准备上下文数据
            context = {
                'name': personal_info['name'],
                'program': personal_info['program'],
                'university': school,
                'journals': ', '.join(journals[:3]),  # 取前3个期刊
                'skills': skill,
                'field': field.lower(),
                'current_date': datetime.now().strftime("%B %d, %Y")
            }
            
            # 渲染文档
            doc.render(context)
               
            # 生成文件名
            filename = f"{school.replace(' ', '_').replace('(', '').replace(')', '')}_{field}_application.docx"
            filepath = os.path.join(output_dir, filename)
            
            # 保存文档
            doc.save(filepath)
            generated_count += 1
            
            print(f"生成: {filename}")
    
    print(f"\n成功生成 {generated_count} 封申请信!")
    
    # 生成一个示例PDF（如果环境支持）
    try:
        # 生成一个示例文件用于提交
        example_context = {
            'name': personal_info['name'],
            'program': personal_info['program'],
            'university': schools[0],
            'journals': ', '.join(list(fields_journals.values())[0][:3]),
            'skills': skills[0],
            'field': list(fields_journals.keys())[0].lower(),
            'current_date': datetime.now().strftime("%B %d, %Y")
        }
        
        example_doc = DocxTemplate(r"C:\Users\lenovo\Desktop\MS Word template(Admission Letter).docx")
        example_doc.render(example_context)
        example_path = os.path.join(output_dir, "Example_Application.docx")
        example_doc.save(example_path)
        
        # 尝试转换为PDF（仅Windows）
        try:
            from docx2pdf import convert
            convert(example_path, example_path.replace('.docx', '.pdf'))
            print(f"已生成示例PDF文件: {example_path.replace('.docx', '.pdf')}")
        except:
            print("PDF转换不可用，仅生成Word文档")
            
    except Exception as e:
        print(f"生成示例文件时出错: {e}")

if __name__ == "__main__":
    generate_application_letters()

生成: Economics_Department,_London_School_of_Economics_LSE_ECON_application.docx
生成: Economics_Department,_London_School_of_Economics_LSE_FINANCE_application.docx
生成: Economics_Department,_London_School_of_Economics_LSE_MKT_application.docx
生成: Department_of_Economics,_University_College_London_UCL_ECON_application.docx
生成: Department_of_Economics,_University_College_London_UCL_FINANCE_application.docx
生成: Department_of_Economics,_University_College_London_UCL_MKT_application.docx
生成: Department_of_Economics,_University_of_California-Berkeley_ECON_application.docx
生成: Department_of_Economics,_University_of_California-Berkeley_FINANCE_application.docx
生成: Department_of_Economics,_University_of_California-Berkeley_MKT_application.docx
生成: Paris_School_of_Economics_ECON_application.docx
生成: Paris_School_of_Economics_FINANCE_application.docx
生成: Paris_School_of_Economics_MKT_application.docx
生成: Department_of_Economics,_Oxford_University_ECON_application.docx
生成: Department_of_Economics,_Oxf

  0%|          | 0/1 [00:00<?, ?it/s]

已生成示例PDF文件: HW_School_Application\Example_Application.pdf


In [14]:
import os
print("当前工作目录:", os.getcwd())

当前工作目录: C:\Users\lenovo
