In [1]:
import pandas as pd
import json
import re

# 读取 Excel 文件并生成 JSONL 文件
def excel_to_jsonl(input_csv, output_jsonl):
    # 读取Excel文件，指定 sheet_name
    df = pd.read_csv(input_csv)
    
    # 输出 JSONL 文件
    with open(output_jsonl, 'w', encoding='utf-8') as f:
        for idx, row in df.iterrows():
            if isinstance(row.get("key"), str):
                key_split = re.split(r'[\s,，]+', row["key"]) 
                key_clean = [k.strip() for k in key_split if k.strip()]
            else:
                key_clean = []

            # 清洗数据，避免 NaN 或非 JSON 兼容值
            json_obj = {
                "id": str(row.get("question_id", idx)).strip(),
                "question": str(row.get("question", "")).strip(),
                "answer": str(row.get("answer", "")).strip(),
                "key": key_clean,
                "type": str(row.get("type", "")).strip(),
                "date": str(row.get("date", "")).strip() if pd.notna(row.get("date")) else "",
                "time": str(row.get("time", "")).strip() if pd.notna(row.get("time")) else "",
                "table": str(row.get("table", "")).strip() if pd.notna(row.get("table")) else "",
                "row": str(row.get("row", "")).strip() if pd.notna(row.get("row")) else "",
                "remarks": str(row.get("备注", "")).strip(),
                "author": str(row.get("出题人", "")).strip()
            }
            # 确保每行 JSON 有效
            f.write(json.dumps(json_obj, ensure_ascii=False) + "\n")



In [2]:
# 读取 Excel 文件并生成 JSONL 文件，仅保留 id 和 question
def excel_to_jsonl_simple(input_excel, output_jsonl):
    # 读取Excel文件，指定 sheet_name
    df = pd.read_csv(input_csv)
    
    # 输出 JSONL 文件
    with open(output_jsonl, 'w', encoding='utf-8') as f:
        for idx, row in df.iterrows():
            json_obj = {
                "id": row.get("question_id", idx),
                "question": row.get("question", ""),
                "answer":""
            }
            f.write(json.dumps(json_obj, ensure_ascii=False) + "\n")
 


In [4]:

input_csv = "C:\\Users\\Administrator\\Desktop\\SY730~732日志内容20241202-题目.csv"  # 输入的Excel文件路径
output_jsonl1 = "answer.jsonl"  # 输出的JSONL文件路径 
output_jsonl2 = "question.jsonl"  # 输出的JSONL文件路径 
excel_to_jsonl(input_csv, output_jsonl1)
excel_to_jsonl_simple(input_csv, output_jsonl2)
print(f"转换完成，输出文件：{output_jsonl1}")
print(f"转换完成，输出文件：{output_jsonl2}")


转换完成，输出文件：answer.jsonl
转换完成，输出文件：question.jsonl
