In [9]:
# 导入需要的包

import pandas as pd
from docxtpl import DocxTemplate
from docx2pdf import convert
import os

In [None]:
#设置读取和输出路径

EXCEL_PATH = r"F:\大学之道\人工智能与python程序设计\HW2_excel_list.xlsx"
TEMPLATE_PATH = r"F:\大学之道\人工智能与python程序设计\HW2_word_template.docx"
OUTPUT_FOLDER = r"F:\大学之道\人工智能与python程序设计\HW2_Application_Letter"

#定义所需的函数############################################

def create_output_folder(folder_path):
    """创建申请信文件夹"""
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    else:
        print('文件夹已存在')

def load_excel_data(excel_path):
    """读取Excel数据并提取各列表"""
    df = pd.read_excel(excel_path)
    
    # 提取非空且唯一的数据
    universities = df['university_name'].dropna().unique().tolist()
    research_interests = df['research_interest'].dropna().unique().tolist()
    journals = df['top_journals'].dropna().unique().tolist()
    skills = df['desired_skills'].dropna().unique().tolist()
    
    print(f"读取到 {len(universities)} 所大学")
    print(f"读取到 {len(research_interests)} 个研究领域: {research_interests}")
    print(f"读取到 {len(journals)} 个期刊")
    print(f"读取到 {len(skills)} 个技能")
    
    return df, universities, research_interests, skills

def map_journals_to_interests(df):
    """从excel中获取研究领域和期刊的对应关系"""
    journal_mapping = {}

    for interest in df['research_interest'].dropna().unique():
        journals_for_interest = df[df['research_interest'] == interest]['top_journals'].dropna().tolist()
        journal_mapping[interest] = ', '.join(journals_for_interest)
    
    print(f"\n期刊对应关系为:")
    for interest, journal_list in journal_mapping.items():
        print(f"  {interest}: {journal_list}")
    
    return journal_mapping

def generate_application_letter(template_path, context, output_path):
    """生成单份申请信"""
    doc = DocxTemplate(template_path)
    doc.render(context)
    doc.save(output_path)

def generate_all_letters(universities, research_interests, journal_mapping, 
                         skills, template_path, output_folder):
    """生成所有申请信的循环"""
    counter = 0
    
    for university in universities:
        for interest in research_interests:
            counter += 1
            
            context = {
                'university_name': university,
                'research_interest': interest,
                'top_journals': journal_mapping[interest],
                'desired_skills': ', '.join(skills)
            }
            
            #生成对应的文件名
            safe_university = university.replace(' ', '_').replace('-', '_')
            safe_interest = interest.replace(' & ', '_').replace(' ', '_')
            filename = f"Application_{counter:02d}_{safe_university}_{safe_interest}.docx"
            output_path = os.path.join(output_folder, filename)
            
            generate_application_letter(template_path, context, output_path)
            
            # 显示前3个输出的文件，从而查看输出情况
            if counter <= 3:
                print(f"第{counter}份: {filename}")
    
    return counter

def convert_to_pdf(output_folder):
    """word转PDF"""
    try:
        print("\nword转PDF启动！")
        docx_files = [f for f in os.listdir(output_folder) if f.endswith('.docx')]
        
        for i, filename in enumerate(docx_files, 1):
            docx_path = os.path.join(output_folder, filename)
            pdf_path = docx_path.replace('.docx', '.pdf')
            convert(docx_path, pdf_path)
            
            if i <= 3 or i == len(docx_files):
                print(f"转换第{i}份word: {filename} 为PDF")
        
        print(f"共转换了 {len(docx_files)} 份PDF文档")
        
    except Exception as e:
        print(f"\n转换失败: {e}")

#############################主程序##############################

def main():   
    
    print("生成申请信任务开始")
    
    #1.创建文件夹
    create_output_folder(OUTPUT_FOLDER)
    
    #2.读取Excel数据
    print("\n读取excel数据中")
    df, universities, research_interests, skills = load_excel_data(EXCEL_PATH)
    
    #3.识别期刊对应关系
    journal_mapping = map_journals_to_interests(df)
    
    #4.生成所有申请信
    print(f"\n开始生成申请信...")
    total_count = generate_all_letters(
        universities, research_interests, journal_mapping, 
        skills, TEMPLATE_PATH, OUTPUT_FOLDER
    )
    
    print(f"\n共生成 {total_count} 份Word申请信")
    print(f"文件保存至 {OUTPUT_FOLDER}")
    
    #5.转为PDF
    convert_to_pdf(OUTPUT_FOLDER)
    
    print("任务完成")

#运行
if __name__ == "__main__":
    main()

生成申请信任务开始
文件夹已存在

读取excel数据中
读取到 30 所大学
读取到 3 个研究领域: ['Economics', 'Finance', 'Operational Research & Management Science']
读取到 9 个期刊
读取到 4 个技能

期刊对应关系为:
  Economics: Econometrica, Journal of Political Economy, American Economic Review
  Finance: Journal of Financial Economics, Journal of Corporate Finance, Journal of Financial and Quantitative Analysis
  Operational Research & Management Science: Management Science, European Journal of Operational Research, Annals of Operations Research

开始生成申请信...
第1份: Application_01_University_of_Chicago_Economics.docx
第2份: Application_02_University_of_Chicago_Finance.docx
第3份: Application_03_University_of_Chicago_Operational_Research_Management_Science.docx

共生成 90 份Word申请信
文件保存至 F:\大学之道\人工智能与python程序设计\HW2_Application_Letter

word转PDF启动！


  0%|          | 0/1 [00:00<?, ?it/s]

转换第1份word: Application_01_University_of_Chicago_Economics.docx 为PDF


  0%|          | 0/1 [00:00<?, ?it/s]

转换第2份word: Application_02_University_of_Chicago_Finance.docx 为PDF


  0%|          | 0/1 [00:00<?, ?it/s]

转换第3份word: Application_03_University_of_Chicago_Operational_Research_Management_Science.docx 为PDF


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]