In [7]:
import pandas as pd
import re
from docx import Document
import os

# 读取Excel文件
file_path = '1.xlsx'
df = pd.read_excel(file_path)

# 定义一个函数来只保留中文字符
def keep_chinese(text):
    return ''.join(re.findall(r'[\u4e00-\u9fff]', str(text)))

# 清理第一列的特殊字符，只保留中文
df.iloc[:, 0] = df.iloc[:, 0].apply(keep_chinese)

# 按第一列的名字进行分组
grouped = df.groupby(df.columns[0])

# 创建输出文件夹
output_folder = 'output_docs'
os.makedirs(output_folder, exist_ok=True)

# 遍历每个分组并保存为单独的DOCX文件
for name, group in grouped:
    doc = Document()
    for index, row in group.iterrows():
        second_col = str(row[1])
        third_col = str(row[2])
        doc.add_paragraph(second_col + " " + third_col)
    
    output_file_path = os.path.join(output_folder, f"{name}.docx")
    doc.save(output_file_path)

print("转换完成！所有DOCX文件已保存到:", output_folder)


转换完成！所有DOCX文件已保存到: output_docs


In [9]:
import os
from docx import Document
from docx.oxml.ns import qn
from docx.shared import Pt

# 指定DOCX文件所在的文件夹
folder_path = 'output_docs'

# 定义一个函数，将DOCX文件的字体转换为宋体
def convert_font_to_songti(doc):
    for paragraph in doc.paragraphs:
        for run in paragraph.runs:
            run.font.name = '宋体'
            run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')

    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    for run in paragraph.runs:
                        run.font.name = '宋体'
                        run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')

# 遍历文件夹中的所有DOCX文件并进行转换
for filename in os.listdir(folder_path):
    if filename.endswith('.docx'):
        file_path = os.path.join(folder_path, filename)
        doc = Document(file_path)
        convert_font_to_songti(doc)
        doc.save(file_path)

print("转换完成！所有DOCX文件的字体已转换为宋体。")


转换完成！所有DOCX文件的字体已转换为宋体。


In [14]:
import os
from docx import Document

def extract_lines(doc):
    lines = []
    for para in doc.paragraphs:
        lines.extend(para.text.split('\n'))
    return lines

def rename_docx_files(folder_path):
    for filename in os.listdir(folder_path):
        if filename.endswith(".docx"):
            file_path = os.path.join(folder_path, filename)
            try:
                doc = Document(file_path)
                lines = extract_lines(doc)
                if len(lines) >= 2:
                    classname = lines[0].strip()
                    teachername = lines[1].strip()
                    new_filename = f"{teachername} {classname} 选修课评价.docx"
                    new_file_path = os.path.join(folder_path, new_filename)

                    os.rename(file_path, new_file_path)
                    print(f"Renamed {filename} to {new_filename}")
                else:
                    print(f"File {filename} does not have enough lines.")
            except Exception as e:
                print(f"Failed to process {filename}: {e}")

folder_path = "doc"  # 替换为你的文件夹路径
rename_docx_files(folder_path)


Failed to process image_0.docx: xmlns:ns0: 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"' is not a valid URI, line 2, column 2804 (<string>, line 2)
Failed to process image_104.docx: [WinError 3] 系统找不到指定的路径。: 'doc\\image_104.docx' -> 'doc\\周可/孙珉 戏剧文化与剧场实验 选修课评价.docx'
Failed to process image_121.docx: xmlns:ns0: 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"' is not a valid URI, line 2, column 2832 (<string>, line 2)
Failed to process image_184.docx: xmlns:ns0: 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"' is not a valid URI, line 2, column 2832 (<string>, line 2)
Renamed 不详 工程伦理学 选修课评价.docx to 不详 工程伦理学 选修课评价.docx
Renamed 不详 电子商务概论 选修课评价.docx to 不详 电子商务概论 选修课评价.docx
Renamed 不详 走进微电子 选修课评价.docx to 不详 走进微电子 选修课评价.docx
Renamed 东哥 岭南文化 选修课评价.docx to 东哥 岭南文化 选修课评价.docx
Renamed 刘博 生涯规划与求职技巧 选修课评价.docx to 刘博 生涯规划与求职技巧 选修课评价.docx
Renamed 刘喜琴 英语文学阅读与鉴赏 选修课评价.docx to 刘喜琴 英语文学阅读与鉴赏 选修课评价.docx
Renamed 刘妍君 生涯规划与求职技巧 选修课评价.

In [10]:
import os
from docx import Document

def check_docx_files(directory):
    for filename in os.listdir(directory):
        if filename.endswith('.docx'):
            file_path = os.path.join(directory, filename)
            try:
                doc = Document(file_path)
                # If no exception is raised, the file is considered valid
                print(f'Successfully opened: {file_path}')
            except Exception as e:
                # Print the error message for files that raise an exception
                print(f'Error reading {file_path}: {e}')

# Specify the directory to check
directory = 'doc'

# Check all .docx files in the specified directory
check_docx_files(directory)


Successfully opened: doc\东哥_岭南文化_选修课评价.docx
Successfully opened: doc\交通.docx
Successfully opened: doc\代理咨询.docx
Successfully opened: doc\刘博_生涯规划与求职技巧_选修课评价.docx
Successfully opened: doc\刘喜琴_英语文学阅读与鉴赏_选修课评价.docx
Successfully opened: doc\刘妍君_生涯规划与求职技巧_选修课评价.docx
Successfully opened: doc\刘畅_影视音乐赏析_选修课评价.docx
Successfully opened: doc\刘银娣_新媒体文化_选修课评价.docx
Successfully opened: doc\医院.docx
Successfully opened: doc\叶代勇_智慧生活智慧化工_选修课评价.docx
Successfully opened: doc\叶君，熊健_食品营养与安全_选修课评价.docx
Successfully opened: doc\叶圣涛_易学史_选修课评价.docx
Successfully opened: doc\吴耀华_生涯规划与求职技巧_选修课评价.docx
Successfully opened: doc\周可_城市文化与美学_选修课评价.docx
Successfully opened: doc\周建新_英语诗歌选读_选修课评价.docx
Successfully opened: doc\周皓_法语_选修课评价.docx
Successfully opened: doc\周育红_创业精神与创业故事会_选修课评价.docx
Successfully opened: doc\夏正林_逻辑与思维_选修课评价.docx
Successfully opened: doc\夏玲玲_生涯规划与求职技巧_选修课评价.docx
Successfully opened: doc\学院介绍.docx
Successfully opened: doc\屈薇_英语传媒与文化_选修课评价.docx
Successfully opened: doc\工程伦理学_选修课评价.docx
Successfully o

In [12]:
import os
import json
import shutil

def check_and_move_empty_knowledges(directory, target_directory):
    # Create the target directory if it doesn't exist
    if not os.path.exists(target_directory):
        os.makedirs(target_directory)
    
    empty_knowledges_files = []
    
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            file_path = os.path.join(directory, filename)
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    data = json.load(file)
                    if 'knowledges' in data and not data['knowledges']:
                        empty_knowledges_files.append(filename)
                        # Copy the file to the target directory
                        shutil.copy(file_path, os.path.join(target_directory, filename))
            except json.JSONDecodeError as e:
                print(f'Error reading {file_path}: {e}')
    
    return empty_knowledges_files

# Specify the directory containing the JSON files
source_directory = 'json'
# Specify the target directory to copy files with empty "knowledges"
target_directory = 'json1'

# Check and move the files with empty "knowledges"
empty_files = check_and_move_empty_knowledges(source_directory, target_directory)

# Print the list of files
print('Files with empty "knowledges" have been copied to the target directory:')
for filename in empty_files:
    print(filename)


Files with empty "knowledges" have been copied to the target directory:
代理咨询.json
医院.json
叶圣涛_易学史_选修课评价.json
学院介绍.json
方绘画_二十世纪美学视野中的西_选修课评价.json
李牧南_产品创新设计理论与实践_选修课评价.json
李龙一_创新管理_选修课评价.json
沈云芳_粤剧艺术赏析_选修课评价.json
维修.json
账号密码.json
