In [1]:
import pandas as pd
from docxtpl import DocxTemplate
import os
from tqdm import tqdm  # 用于显示进度条

In [2]:
template_path = r"D:\pythonai\template.docx"
universities_path = r"D:\pythonai\universities.xlsx"
areas_path = r"D:\pythonai\area_journals_skills.xlsx"

# 创建输出目录
output_dir = r"D:\pythonai\generated_sops1"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [4]:
try:
    universities_df = pd.read_excel(universities_path)
    # 假设大学名称在第一列，跳过标题行
    universities = universities_df.iloc[:, 0].tolist()[0:]  # 从第二行开始取数据
    print(f"成功读取 {len(universities)} 所大学")
except Exception as e:
    print(f"读取大学列表出错: {e}")
    raise

成功读取 30 所大学


In [5]:
try:
    areas_df = pd.read_excel(areas_path)
    # 重命名列以便于访问
    areas_df.columns = ['research_area', 'top_journals', 'skills']
    # 转换为字典列表
    research_areas = areas_df.to_dict('records')
    print(f"成功读取 {len(research_areas)} 个研究领域")
except Exception as e:
    print(f"读取研究领域数据出错: {e}")
    raise

成功读取 3 个研究领域


In [6]:
try:
    doc = DocxTemplate(template_path)
    print("成功加载模板文件")
except Exception as e:
    print(f"加载模板文件出错: {e}")
    raise

成功加载模板文件


In [7]:
total_documents = len(universities) * len(research_areas)
print(f"开始生成 {total_documents} 份文档...")

for i, university in enumerate(tqdm(universities, desc="处理大学列表")):
    for j, area in enumerate(research_areas):
        # 准备替换数据
        context = {
            'university': university,
            'research_area': area['research_area'],
            'top_journals': area['top_journals'],
            'skills': area['skills']
        }
        
        # 填充模板
        doc.render(context)
        
        # 生成文件名（避免特殊字符）
        safe_university = "".join([c for c in university if c.isalnum() or c in [' ', '-', '_']]).replace(' ', '_')
        safe_area = "".join([c for c in area['research_area'] if c.isalnum() or c in [' ', '-', '_']]).replace(' ', '_')
        output_filename = f"{i+1}_{safe_university}_{j+1}_{safe_area}.docx"
        output_path = os.path.join(output_dir, output_filename)
        
        # 保存生成的文档
        doc.save(output_path)

print(f"所有文档已生成并保存到: {output_dir}")
print(f"共生成 {total_documents} 份文档")

开始生成 90 份文档...


处理大学列表: 100%|██████████| 30/30 [00:03<00:00,  9.01it/s]

所有文档已生成并保存到: D:\pythonai\generated_sops1
共生成 90 份文档



