In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import random

url = "https://ideas.repec.org/top/top.econdept.html" # RePEc 经济学系排名 URL
response = requests.get(url)
response.encoding = 'utf-8'
html = response.text
soup = BeautifulSoup(html, 'html.parser')

# 存放 (rank, university) 元组
ranked_universities = []

for row in soup.select("table tr"):
    cols = row.find_all('td')
    if len(cols) >= 2:
        # 获取排名
        rank_text = cols[0].get_text(strip=True)
        try:
            rank = int(rank_text)
        except ValueError:
            continue  # 第一列不是数字，跳过

        # 获取大学名
        a_tag = cols[1].find('a')
        if a_tag:
            text = a_tag.get_text(strip=True)
            # 如果有逗号，则取最后一部分
            if ',' in text:
                uni_name = text.split(',')[-1].strip()
            else:
                uni_name = text.strip()
            ranked_universities.append((rank, uni_name))
top90_universities = ranked_universities[:90]
# print(top90_universities)

# 每段随机选择 10 所大学
top30 = random.sample(ranked_universities[:30], 10)
top60 = random.sample(ranked_universities[30:60], 10)
top90 = random.sample(ranked_universities[60:90], 10)

# 合并为最终列表
selected_universities = [uni_name for rank, uni_name in top30 + top60 + top90]

In [2]:
import json

# 研究领域
research_areas = ["Finance", "Marketing", "Social Science"]

# 从网址 https://www.scmor.com/view/10554 中得到:
journals_data = {
    "Finance": [
        "Journal of Finance",
        "Journal of Financial Economics",
        "Review of Financial Studies"
    ],
    "Marketing": [
        "Journal of Consumer Psychology",
        "Journal of Consumer Research",
        "Journal of Marketing"
    ],
    "Social Science": [
        "American Journal of Political Science",
        "American Journal of Sociology",
        "American Political Science Review"
    ]
}

# 从网址 https://www.glassdoor.com.hk/Job/index.htm 中得到:
skills_data = {
    "Social Science": [
        "Research and analytical skills",
        "Communication and interpersonal skills",
        "Organizational and problem-solving skills"
    ],
    "Marketing": [
        "Market analysis skills",
        "Communication and interpersonal skills",
        "Office software and data handling skills"
    ],
    "Finance": [
        "Financial analysis and modeling skills",
        "Analytical and problem-solving skills",
        "Microsoft Office proficiency and business communication skills"
    ]
}

# 构建数据表格并保存
rows = []
for uni in selected_universities:
    for area in research_areas:
        row = {
            "University": uni,
            "Research_Area": area,
            "Top_Journals": "; ".join(journals_data[area]),
            "Skills": "; ".join(skills_data[area])
        }
        rows.append(row)

df = pd.DataFrame(rows)

df.to_csv("data.csv", index=False, encoding="utf-8")

In [None]:
import os
import time
from docxtpl import DocxTemplate

name = "Mingyang Zhang"
date = time.strftime("%B %d, %Y")

# 读取 CSV
df = pd.read_csv("data.csv", encoding="utf-8")

# 创建保存文件夹
output_folder = "Results"
os.makedirs(output_folder, exist_ok=True)

# 遍历每一行
for idx, row in df.iterrows():
    # 将当前行的字段构建为 context
    context = {
        "name": name,
        "date": date,
        "selected_university": row["University"],
        "selected_area": row["Research_Area"],
        "selected_journals": row["Top_Journals"],
        "selected_skills": row["Skills"]
    }

    # 加载模板
    doc = DocxTemplate("sop_template.docx")
    doc.render(context)
    
    # 保存为新的 Word 文件
    filename = os.path.join(
        output_folder, 
        f"Application_{row['University'].replace(' ', '_')}_{row['Research_Area'].replace(' ', '_')}.docx"
    )

    doc.save(filename)
    print(f"Saved: {filename}")

Saved: Results/Application_Michigan_State_University_Finance.docx
Saved: Results/Application_Michigan_State_University_Marketing.docx
Saved: Results/Application_Michigan_State_University_Social_Science.docx
Saved: Results/Application_University_of_California-San_Diego_(UCSD)_Finance.docx
Saved: Results/Application_University_of_California-San_Diego_(UCSD)_Marketing.docx
Saved: Results/Application_University_of_California-San_Diego_(UCSD)_Social_Science.docx
Saved: Results/Application_University_of_California-Los_Angeles_(UCLA)_Finance.docx
Saved: Results/Application_University_of_California-Los_Angeles_(UCLA)_Marketing.docx
Saved: Results/Application_University_of_California-Los_Angeles_(UCLA)_Social_Science.docx
Saved: Results/Application_Paris_School_of_Economics_Finance.docx
Saved: Results/Application_Paris_School_of_Economics_Marketing.docx
Saved: Results/Application_Paris_School_of_Economics_Social_Science.docx
Saved: Results/Application_Brown_University_Finance.docx
Saved: Result