In [50]:
import os
import json
from copy import deepcopy
import numpy as np
from tqdm import tqdm
import jsonlines

# zylshs = np.loadtxt('/HL_user01/yc_ruxianwaike_test/2024_5_31出院小结完整演示_wjc版/流水号/ruxianwaike_新增源文件流水号0-100.csv', delimiter=',',dtype=str)
# zylshs = list(zylshs)
cyxj_format = {
    "患者基本信息": {
        "住院号": "基本信息---住院号",
        "床号": "基本信息---床号",
        "入院时间": "基本信息---入院时间",
        "出院时间": "基本信息---出院时间",
        "科别": "基本信息---科别",
        "科室": "病人信息---科室",
        "姓名": "基本信息---姓名",
        "年龄": "基本信息---年龄",
        "性别": "基本信息---性别",
        "脉搏(P)": "生命体征---P",
        "呼吸(R)": "生命体征---R",
        "体温(T)": "生命体征---T",
        "高压(BP高)": "生命体征---BP高",
        "低压(BP低)": "生命体征---BP低",
        "入院诊断": "基本信息---入院诊断",
        "入院时简要病史": "入院时简要病史",
        "体检摘要": "体检摘要"
    },
    "出院诊断": "基本信息---出院诊断",
    "住院期间医疗情况": "住院期间医疗情况",
    "出院时情况": "出院时情况",
    "病程与治疗情况": "病程与治疗情况",
    "出院后用药建议": "出院后用药建议",
}

def read_json(path):
    with open(path,'r') as f:
        content = json.load(f)
    return content

def create_dirs(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

def get_jsonlines(zylsh_ziduans,keshi,pred_dir,gold_dir,out_dir,out_name):
    pred_datas = []
    zylshs = zylsh_ziduans.keys()
    truth = read_json(f'/HL_user01/yc_ruxianwaike_test/2024_5_31出院小结完整演示_wjc版/评估/wjc_zylshs/{keshi}_truth.json')
    for zylsh in tqdm(zylshs):
        model_path = os.path.join(pred_dir,keshi,zylsh,f"{zylsh}_postprocessed.json")
        source_path = os.path.join(pred_dir,keshi,zylsh,f"{zylsh}_findsource.json")
        doctor_path = os.path.join(gold_dir,keshi,zylsh,f"{zylsh}.json")
        try : 
            model_content = read_json(model_path)
            source_content = read_json(source_path)
            doctor_content = read_json(doctor_path)
        except FileNotFoundError:
            print(zylsh)
            continue
        
        # 处理模型生成的格式
        ori_datas = model_content[zylsh]
        model_formats = deepcopy(cyxj_format)
        for key, value in model_formats.items():
            if isinstance(value, dict):
                for sub_key, path in value.items():
                    path_parts = path.split('---')
                    data = ori_datas
                    for part in path_parts:
                        data = data.get(part, "")
                        if isinstance(data, list):
                            if len(data)>=1:
                                data=data[0]
                    model_formats[key][sub_key] = data
            elif isinstance(value, str):
                path_parts = value.split('---')
                data = ori_datas
                for part in path_parts:
                    data = data.get(part, "")
                    if isinstance(data, list):
                        if len(data)>=1:
                            data=data[0]
                model_formats[key] = data
        # print(json.dumps(model_formats,ensure_ascii = False,indent=2))
        # 处理医生的格式
        ori_datas = doctor_content[zylsh]
        doctor_formats = deepcopy(cyxj_format)
        for key, value in doctor_formats.items():
            if isinstance(value, dict):
                for sub_key, path in value.items():
                    path_parts = path.split('---')
                    data = ori_datas
                    for part in path_parts:
                        data = data.get(part, "")
                        if isinstance(data, list):
                            if len(data)>=1:
                                data=data[0]
                    doctor_formats[key][sub_key] = data
            elif isinstance(value, str):
                path_parts = value.split('---')
                data = ori_datas
                for part in path_parts:
                    data = data.get(part, "")
                    if isinstance(data, list):
                        if len(data)>=1:
                            data=data[0]
                doctor_formats[key] = data

        # 采用真truth
        truth_formats = truth[zylsh]
        if '患者基本信息' in zylsh_ziduans[zylsh]:
            truth_formats['患者基本信息'] = json.loads(truth_formats['患者基本信息'])
            truth_formats['患者基本信息']['住院号'] = doctor_formats['患者基本信息']['住院号']
            truth_formats['患者基本信息']['床号'] = doctor_formats['患者基本信息']['床号']
            truth_formats['患者基本信息']['姓名'] = doctor_formats['患者基本信息']['姓名']
        # print(json.dumps(doctor_formats,ensure_ascii = False,indent=2))

        for key in cyxj_format.keys():
            # 判断是否评分中的数据
            if key not in zylsh_ziduans[zylsh]:
                continue
            temp_dict = {}
            if key == '患者基本信息':
                model_formats[key] = json.dumps(model_formats[key],ensure_ascii = False)
                truth_formats[key] = json.dumps(truth_formats[key],ensure_ascii = False)
            temp_dict['pred'] = model_formats[key]
            temp_dict['output'] = truth_formats[key]
            temp_dict['instruction'] = source_content[zylsh][key]
            temp_dict['key'] = key
            temp_dict['zylsh'] = zylsh
            pred_datas.append(temp_dict)

    create_dirs(out_dir)
    with jsonlines.open(os.path.join(out_dir,out_name),'w') as f:
        for pred_data in pred_datas:
            f.write(pred_data)

In [51]:
keshi = 'ruxianwaike'
pred_dir = '/HL_user01/yc_ruxianwaike_test/2024_5_31出院小结完整演示_wjc版/model_generated_test'
gold_dir = '/HL_user01/yc_ruxianwaike_test/2024_5_31出院小结完整演示_wjc版/doctor_generated'
# zylshs = os.listdir(os.path.join(pred_dir,keshi))
# zylshs = np.loadtxt('/HL_user01/yc_ruxianwaike_test/2024_5_31出院小结完整演示_wjc版/流水号/ruxianwaike_新增源文件流水号_747.csv', delimiter=',',dtype=str)
# zylshs = list(zylshs)
zylsh_ziduans = read_json('/HL_user01/yc_ruxianwaike_test/2024_5_31出院小结完整演示_wjc版/评估/wjc_zylshs/ruxianwaike.json')
out_dir = os.path.join('2_emr_6/',keshi,'出院小结及子字段_test')
out_name = 'preds.json'
get_jsonlines(zylsh_ziduans,keshi,pred_dir,gold_dir,out_dir,out_name)

100%|██████████| 191/191 [00:00<00:00, 585.15it/s]


In [37]:
def save_json(save_path,content):
    with open(save_path,'w',encoding='utf8') as f:
        json.dump(content,f,ensure_ascii=False,indent=2)

def get_wjc_zylsh(keshi,out_dir,out_name):
    zylshs = {}
    output_zylsh_ziduans = {}
    pred_path = os.path.join(out_dir,out_name)
    with jsonlines.open(pred_path,'r') as f:
        for data in f:
            if 'output' not in data.keys():
                break
            # print(data['zylsh'])
            if data['zylsh'] not in zylshs.keys():
                zylshs[data['zylsh']] = []
                output_zylsh_ziduans[data['zylsh']] = {}
            zylshs[data['zylsh']].append(data['key'])
            output_zylsh_ziduans[data['zylsh']][data['key']] = data['output']
    a = np.array(list(zylshs.keys()))
    create_dirs('wjc_zylshs')
    save_json(f'./wjc_zylshs/{keshi}.json',zylshs)
    save_json(f'./wjc_zylshs/{keshi}_truth.json',output_zylsh_ziduans)
    np.savetxt(f'./wjc_zylshs/{keshi}.csv', a, fmt="%s", delimiter=',')


keshi = 'ruxianwaike'
out_dir = os.path.join('/HL_user01/0726_wjc_upload/每个科室单独的指标/2_emr_6',keshi,'出院小结及子字段_test')
out_name = 'preds.json'
get_wjc_zylsh(keshi,out_dir,out_name)

In [34]:
def get_our_zylsh(keshi,out_dir,out_name):
    zylshs = {}
    pred_path = os.path.join(out_dir,out_name)
    with jsonlines.open(pred_path,'r') as f:
        for data in f:
            if 'output' not in data.keys():
                break
            # print(data['zylsh'])
            if data['zylsh'] not in zylshs.keys():
                zylshs[data['zylsh']] = []
            zylshs[data['zylsh']].append(data['key'])
    a = np.array(list(zylshs.keys()))
    create_dirs('our_zylshs')
    save_json(f'./our_zylshs/{keshi}.json',zylshs)
    np.savetxt(f'./our_zylshs/{keshi}.csv', a, fmt="%s", delimiter=',')


keshi = 'ruxianwaike'
out_dir = os.path.join('./2_emr_6',keshi,'出院小结及子字段_test')
out_name = 'preds.json'
get_our_zylsh(keshi,out_dir,out_name)