In [1]:
import pandas as pd
from docxtpl import DocxTemplate
import os
from datetime import date
from docx import Document
from docx.shared import Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
import subprocess
import tqdm

In [2]:
# Function to create the Word template
def create_word_template():
    
    # Create a new Document
    doc = Document()
    
    # Set margins
    sections = doc.sections
    for section in sections:
        section.top_margin = Inches(1)
        section.bottom_margin = Inches(1)
        section.left_margin = Inches(1)
        section.right_margin = Inches(1)
    
    # Add date placeholder
    date_paragraph = doc.add_paragraph("{{ date }}")
    date_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    doc.add_paragraph()
    
    # Add address block
    address_block = doc.add_paragraph()
    address_block.add_run("Graduate Admissions Committee\n")
    address_block.add_run("Department of {{ department }}\n")
    address_block.add_run("{{ university }}\n")
    doc.add_paragraph()
    
    # Add salutation
    doc.add_paragraph("Dear Admissions Committee,")
    doc.add_paragraph()
    
    # Add opening paragraph
    opening = doc.add_paragraph()
    opening.add_run("I am writing to express my strong interest in the ")
    opening.add_run("{{ program }}")
    opening.add_run(" at ")
    opening.add_run("{{ university }}")
    opening.add_run(". With my dual background in Economics and Computer Science from Renmin University of China, coupled with significant research and practical experience in quantitative methods, I am eager to contribute to your distinguished program.")
    doc.add_paragraph()
    
    # Academic Background section
    doc.add_heading("Academic Background and Research Experience", level=2)
    academic_para = doc.add_paragraph()
    academic_para.add_run("As a top student in the Mingde Environmental Economics-Science Gaoli Dual Degree Program at Renmin University of China (GPA: 3.82/4.0 in Economics, 3.87/4.0 in Computer Science), I have developed a robust foundation in both economic theory and quantitative methodologies. My exchange experience at the University of California, Davis, where I achieved a 4.0 GPA in Economic & Finance Forecasting and Intermediate Micro Theory, further broadened my international perspective.")
    doc.add_paragraph()
    academic_para = doc.add_paragraph()
    academic_para.add_run("My research experience demonstrates my ability to apply theoretical knowledge to real-world problems. Working with Prof. Xu, I developed optimized CPPI strategies using VaR for risk control and EGARCH models, implementing rolling-window backtesting for major market indices. This project enhanced my understanding of mathematical modeling and financial econometrics, skills directly applicable to the ")
    academic_para.add_run("{{ program }}")
    academic_para.add_run(" at your institution.")
    doc.add_paragraph()
    
    # Publications section
    doc.add_heading("Publications and Collaborative Research", level=2)
    publications_para = doc.add_paragraph()
    publications_para.add_run("My academic pursuits have already yielded tangible results with two publications on the concept of prosumers in the new economy. Working on the Mitacs Globalink Research Project with Prof. Ertz from Université du Québec, I designed a novel index to quantify prosumer roles, employing factor analysis and weighted scoring techniques. This collaborative international research experience prepared me for the rigorous academic environment of your program.")
    doc.add_paragraph()
    
    # Applied Experience section
    doc.add_heading("Applied Experience and Technical Skills", level=2)
    experience_para = doc.add_paragraph()
    experience_para.add_run("My internship at Trove Applied Quantitative Technology allowed me to develop practical expertise in quantitative finance, where I built a comprehensive long-short backtesting system and implemented risk control measures, achieving significant excess returns over the benchmark. Additionally, at the International Monetary Institute, I conducted an in-depth review of CBDC literature and developed a multi-dimensional index system, utilizing NLP techniques and Word2Vec for semi-supervised text learning.")
    doc.add_paragraph()
    experience_para = doc.add_paragraph()
    experience_para.add_run("My technical proficiency in STATA, Python, R, MATLAB, SQL, and various data visualization tools enables me to approach complex problems from multiple angles. These skills, combined with my strong mathematical background evidenced by awards in mathematical modeling competitions, position me to make meaningful contributions to research at ")
    experience_para.add_run("{{ university }}")
    experience_para.add_run(".")
    doc.add_paragraph()
    
    # Program Fit section
    doc.add_heading("Program Fit and Future Goals", level=2)
    fit_para = doc.add_paragraph()
    fit_para.add_run("I am particularly drawn to ")
    fit_para.add_run("{{ university }}")
    fit_para.add_run("'s ")
    fit_para.add_run("{{ program }}")
    fit_para.add_run(" because of its excellent reputation in the field and distinguished faculty. The program's focus on quantitative methods and rigorous curriculum aligns perfectly with my research interests in economic modeling and data analysis.")
    doc.add_paragraph()
    fit_para = doc.add_paragraph()
    fit_para.add_run("After completing the ")
    fit_para.add_run("{{ program }}")
    fit_para.add_run(", I aim to pursue a career that combines advanced quantitative analysis with economic insights, either in academia or in policy research. The rigorous curriculum, distinguished faculty, and research opportunities at ")
    fit_para.add_run("{{ university }}")
    fit_para.add_run(" will provide the ideal environment for me to develop the expertise necessary to achieve this goal.")
    doc.add_paragraph()
    
    # Closing paragraph
    closing = doc.add_paragraph()
    closing.add_run("I am confident that my academic background, research experience, and technical skills make me a strong candidate for your program. I would be honored to join the ")
    closing.add_run("{{ university }}")
    closing.add_run(" community and contribute to its tradition of excellence.")
    doc.add_paragraph()
    closing = doc.add_paragraph()
    closing.add_run("Thank you for considering my application. I look forward to the opportunity to discuss how I can contribute to your program.")
    doc.add_paragraph()
    
    # Add signature
    doc.add_paragraph("Sincerely,")
    doc.add_paragraph()
    doc.add_paragraph()
    signature = doc.add_paragraph()
    signature.add_run("Xinyuan Cao")
    contact = doc.add_paragraph()
    contact.add_run("Tel: +86-187-1099-4837")
    email = doc.add_paragraph()
    email.add_run("Email: caoxinyuan0703@ruc.edu.cn")
    
    # Save the template
    template_path = "application_template.docx"
    doc.save(template_path)
    print(f"Word template created: {template_path}")
    return template_path

In [6]:
# Function to convert Word documents to PDF using LibreOffice
def convert_word_to_pdf(word_folder, pdf_folder, libreoffice_path):
    if not os.path.exists(pdf_folder):
        os.makedirs(pdf_folder)
    
    # 获取所有文件
    files = os.listdir(word_folder)
    print(f"\nConverting Word documents to PDF using LibreOffice...")
    
    # 过滤出所有.doc和.docx文件
    word_files = [file for file in files if file.endswith((".doc", ".docx"))]
    
    # 进度条
    for file in tqdm.tqdm(word_files, desc="Converting to PDF"):
        # 拼接word文件完整路径
        word_path = os.path.join(word_folder, file)
        
        try:
            # 使用LibreOffice命令行参数进行转换
            cmd = f'"{libreoffice_path}" --headless --convert-to pdf --outdir "{pdf_folder}" "{word_path}"'
            result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
            
            # 不在循环中输出每个文件的转换状态，只在出错时显示
            if result.returncode != 0:
                print(f"Failed to convert {file}. Error: {result.stderr}")
                
        except Exception as e:
            print(f"Error converting {file}: {str(e)}")
    
    print("Word documents have been converted to PDF format!")

In [7]:
# Main function to generate letters
def generate_application_letters(libreoffice_path):
    # Create directories if they don't exist
    word_folder = "output_word"
    pdf_folder = "output_pdf"
    os.makedirs(word_folder, exist_ok=True)
    os.makedirs(pdf_folder, exist_ok=True)
    
    # Create the Word template
    template_path = create_word_template()
    
    # Read university data from Excel
    try:
        df = pd.read_excel("graduate program.xlsx")
        print(f"Successfully read {len(df)} universities from Excel file.")
    except Exception as e:
        print(f"Error reading Excel file: {e}")
        return
    
    # Define program list
    programs = ["MA in Economics", "MA in Statistics", "PhD in Data Science"]
    
    # Get today's date
    today = date.today().strftime("%B %d, %Y")
    
    # Initialize counters
    total_files = 0
    successful_files = 0
    
    print("\nGenerating application letters...")
    
    # 准备所有任务
    tasks = []
    for index, row in df.iterrows():
        university = row['University']
        for program in programs:
            tasks.append((university, program))
    
    # 进度条
    for university, program in tqdm.tqdm(tasks, desc="Generating documents"):
        # Determine department based on program
        if "Economics" in program:
            department = "Economics"
        elif "Statistics" in program:
            department = "Statistics"
        elif "Data Science" in program:
            department = "Data Science"
        else:
            department = "Graduate Studies"
        
        # Create context for template
        context = {
            'date': today,
            'university': university,
            'program': program,
            'department': department
        }
        
        # Create a unique filename
        university_short = university.replace(" ", "_").replace(",", "").replace(".", "")
        program_short = program.replace(" ", "_").replace(",", "").replace(".", "")
        filename = f"{university_short}_{program_short}"
        
        # Generate Word document
        try:
            doc = DocxTemplate(template_path)
            doc.render(context)
            word_output = f"{word_folder}/{filename}.docx"
            doc.save(word_output)
            successful_files += 1
            
        except Exception as e:
            print(f"Error generating document for {university} - {program}: {e}")
        
        total_files += 1
    
    print(f"\nGeneration completed. Generated {successful_files} out of {total_files} files.")
    
    # Convert Word documents to PDF
    convert_word_to_pdf(word_folder, pdf_folder, libreoffice_path)

In [8]:
if __name__ == "__main__":
    # LibreOffice
    libreoffice_path = r"D:\Program Files\LibreOffice\program\soffice.exe"
    
    print("Starting to generate application letters...")
    generate_application_letters(libreoffice_path)
    print("Process completed.")

Starting to generate application letters...
Word template created: application_template.docx
Successfully read 30 universities from Excel file.

Generating application letters...


Generating documents: 100%|████████████████████████████████████████████████████████████| 90/90 [00:04<00:00, 20.58it/s]



Generation completed. Generated 90 out of 90 files.

Converting Word documents to PDF using LibreOffice...


Converting to PDF: 100%|█████████████████████████████████████████████████████████████| 180/180 [04:18<00:00,  1.43s/it]

Word documents have been converted to PDF format!
Process completed.



