In [33]:
import pandas as pd
import os
from docx import Document


def generate_application_letters(universities_file, fields_file, template_file, output_dir="application_letters"):
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)
    
    # 读取大学列表（第一列）
    universities_df = pd.read_excel("Universities.xlsx", header=None)
    universities = universities_df[0].tolist()  # 获取第一列所有大学名称
    
    # 读取领域、期刊和技能数据
    fields_df = pd.read_excel("Interested_research_areas.xlsx", header=None)
    # 假设数据格式：第一列=领域，第三列=期刊，第四列=技能（索引从0开始）
    fields_data = {}
    for idx, row in fields_df.iterrows():
        field = row[0]
        journal = row[2]
        skill = row[3]
        
        if field not in fields_data:
            fields_data[field] = {
                'journals': [],
                'skill': skill  # 每个领域只保留一个技能（取第一个出现的）
            }
        fields_data[field]['journals'].append(journal)
    
    # 提取三个领域及其数据（按出现顺序）
    fields = list(fields_data.keys())[:3]
    field_a, field_b, field_c = fields[0], fields[1], fields[2]
    
    # 读取模板内容
    doc = Document("Template.docx")
    # 把Word文档的段落内容拼接成字符串（简单处理，可根据模板复杂度调整）
    template = "\n".join([para.text for para in doc.paragraphs])
    
    # 个人信息（请根据实际情况修改）
    personal_info = {
        'name': 'Shuyu Yang',
        'program_name':"Master's Program in Quantitative Finance",
        'field_of_study': 'quant finance',
        'previous_university_name': 'Renmin University',  
        'email_address': 'yangshuyu047972@qq.com',
        'phone_number': '+86 189 8128 6231',
    }
    
    # 为每所大学生成申请信
    for university in universities:
        # 准备替换数据
        data = {
            **personal_info,
            'university_name': university,
            'Field A': field_a,
            'Field B': field_b,
            'Field C': field_c,
            # 领域A的期刊
            'Journal E1': fields_data[field_a]['journals'][0],
            'Journal E2': fields_data[field_a]['journals'][1],
            'Journal E3': fields_data[field_a]['journals'][2],
            # 领域B的期刊
            'Journal F1': fields_data[field_b]['journals'][0],
            'Journal F2': fields_data[field_b]['journals'][1],
            'Journal F3': fields_data[field_b]['journals'][2],
            # 领域C的期刊
            'Journal G1': fields_data[field_c]['journals'][0],
            'Journal G2': fields_data[field_c]['journals'][1],
            'Journal G3': fields_data[field_c]['journals'][2],
            # 技能
            'skill 1': fields_data[field_a]['skill'],
            'skill 2': fields_data[field_b]['skill'],
            'skill 3': fields_data[field_c]['skill']
        }
        
        # 填充模板
        letter_content = template.format(** data)
        
        # 保存为文件（以大学名称命名）
        filename = f"{university.replace(' ', '_')}_application_letter.txt"
        filepath = os.path.join(output_dir, filename)
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(letter_content)
        
        print(f"已生成: {filepath}")


if __name__ == "__main__":
    generate_application_letters(
        universities_file="Universities.xlsx",  # 包含30所大学的Excel文件
        fields_file="Interested_reaserch_areas.xlsx",              # 包含领域、期刊和技能的Excel文件
        template_file="Template.docx" # 申请信模板文件
    )


已生成: application_letters\Harvard_University_application_letter.txt
已生成: application_letters\Massachusetts_Institute_of_Technology__application_letter.txt
已生成: application_letters\University_of_California-Berkeley_application_letter.txt
已生成: application_letters\University_of_Chicago_application_letter.txt
已生成: application_letters\Paris_School_of_Economics_application_letter.txt
已生成: application_letters\Princeton_University_application_letter.txt
已生成: application_letters\Stanford_University_application_letter.txt
已生成: application_letters\Yale_University_application_letter.txt
已生成: application_letters\Toulouse_School_of_Economics_application_letter.txt
已生成: application_letters\Oxford_University_application_letter.txt
已生成: application_letters\Columbia_University_application_letter.txt
已生成: application_letters\University_of_Cambridge_application_letter.txt
已生成: application_letters\Boston_College_application_letter.txt
已生成: application_letters\University_of_Toronto_application_letter.txt
已生成

In [34]:
import os
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

def txt_to_docx(txt_file_path, docx_file_path=None):
    """
    将单个TXT文件转换为Word文档
    参数:
        txt_file_path: TXT文件的路径
        docx_file_path: 生成的Word文档路径，默认为与TXT同目录同名称的.docx文件
    """
    # 处理输出路径
    if docx_file_path is None:
        docx_file_path = txt_file_path.replace('.txt', '.docx')
    
    # 创建Word文档对象
    doc = Document()
    
    # 设置默认字体样式
    style = doc.styles['Normal']
    font = style.font
    font.name = 'Times New Roman'
    font.size = Pt(12)
    
    try:
        # 读取TXT文件内容
        with open(txt_file_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except UnicodeDecodeError:
        # 若utf-8解码失败，尝试用gbk解码（Windows系统中文本文件常见编码）
        with open(txt_file_path, 'r', encoding='gbk') as f:
            content = f.read()
    
    # 按换行符分割段落
    paragraphs = content.split('\n')
    
    # 将每个段落添加到Word文档
    for para_text in paragraphs:
        if para_text.strip() == '':
            continue
        para = doc.add_paragraph(para_text)
        para.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    
    # 保存Word文档
    doc.save(docx_file_path)
    print(f"已转换: {txt_file_path} -> {docx_file_path}")

# 遍历文件夹并转换所有TXT文件
folder_path = r"C:\Users\杨\application_letters"

# 遍历文件夹中的所有文件
for file_name in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file_name)
    # 只处理.txt后缀的文件
    if os.path.isfile(file_path) and file_name.endswith('.txt'):
        txt_to_docx(file_path)

已转换: C:\Users\杨\application_letters\Australian_National_University_application_letter.txt -> C:\Users\杨\application_letters\Australian_National_University_application_letter.docx
已转换: C:\Users\杨\application_letters\Boston_College_application_letter.txt -> C:\Users\杨\application_letters\Boston_College_application_letter.docx
已转换: C:\Users\杨\application_letters\Columbia_University_application_letter.txt -> C:\Users\杨\application_letters\Columbia_University_application_letter.docx
已转换: C:\Users\杨\application_letters\Cornell_University_application_letter.txt -> C:\Users\杨\application_letters\Cornell_University_application_letter.docx
已转换: C:\Users\杨\application_letters\Georgetown_University_application_letter.txt -> C:\Users\杨\application_letters\Georgetown_University_application_letter.docx
已转换: C:\Users\杨\application_letters\Harvard_University_application_letter.txt -> C:\Users\杨\application_letters\Harvard_University_application_letter.docx
已转换: C:\Users\杨\application_letters\Johns_Hopk