In [1]:
import csv

condition_mapping_file = "../../resources/CCSCM.csv"
procedure_mapping_file = "../../resources/CCSPROC.csv"
drug_file = "../../resources/ATC.csv"

condition_dict = {}
with open(condition_mapping_file, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        condition_dict[row['code']] = row['name'].lower()

procedure_dict = {}
with open(procedure_mapping_file, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        procedure_dict[row['code']] = row['name'].lower()

drug_dict = {}
with open(drug_file, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row['level'] == '3.0':
            drug_dict[row['code']] = row['name'].lower()

In [2]:
import re 
from ChatGPT import ChatECNU
from ChatGPT import ChatECNU
import json

def extract_data_in_brackets(input_string):
    pattern = r"\[(.*?)\]"
    matches = re.findall(pattern, input_string)
    return matches

def divide_text(long_text, max_len=800):
    sub_texts = []
    start_idx = 0
    while start_idx < len(long_text):
        end_idx = start_idx + max_len
        sub_text = long_text[start_idx:end_idx]
        sub_texts.append(sub_text)
        start_idx = end_idx
    return sub_texts

def filter_triples(triples):
    chatgpt = ChatECNU()
    response = chatgpt.chat(
        f"""
            I have a list of triples. I want to select 50 most important triples from the list.
            The importance of a triple is based on how you think it will help imrpove healthcare prediction tasks (e.g., drug recommendation, mortality prediction, readmission prediction …).
            If you think a triple is important, please keep it. Otherwise, please remove it.
            You can also add triples from your background knowledge.
            The total size of the updated list should be below 50.

            triples: {triples}
            updates:
        """
        )
    # json_string = str(response)
    # json_data = json.loads(json_string)
    # 修复：直接访问content属性
    if response is None:
        print("警告: ChatECNU返回None，返回原始triples")
        return triples

    # filtered_triples = extract_data_in_brackets(json_data['content'])
    filtered_triples = extract_data_in_brackets(response.content)
    return filtered_triples


In [3]:
from ChatGPT import ChatECNU
import json

def graph_gen(term: str, mode: str):
    if mode == "condition":
        example = \
        """
        Example:
        prompt: systemic lupus erythematosus
        updates: [[systemic lupus erythematosus, is an, autoimmune condition], [systemic lupus erythematosus, may cause, nephritis], [anti-nuclear antigen, is a test for, systemic lupus erythematosus], [systemic lupus erythematosus, is treated with, steroids], [methylprednisolone, is a, steroid]]
        """
    elif mode == "procedure":
        example = \
        """
        Example:
        prompt: endoscopy
        updates: [[endoscopy, is a, medical procedure], [endoscopy, used for, diagnosis], [endoscopic biopsy, is a type of, endoscopy], [endoscopic biopsy, can detect, ulcers]]
        """
    elif mode == "drug":
        example = \
        """
        Example:
        prompt: iobenzamic acid
        updates: [[iobenzamic acid, is a, drug], [iobenzamic acid, may have, side effects], [side effects, can include, nausea], [iobenzamic acid, used as, X-ray contrast agent], [iobenzamic acid, formula, C16H13I3N2O3]]
        """
    chatgpt = ChatECNU()
    response = chatgpt.chat(
        f"""
            Given a prompt (a medical condition/procedure/drug), extrapolate as many relationships as possible of it and provide a list of updates.
            The relationships should be helpful for healthcare prediction (e.g., drug recommendation, mortality prediction, readmission prediction …)
            Each update should be exactly in format of [ENTITY 1, RELATIONSHIP, ENTITY 2]. The relationship is directed, so the order matters.
            Both ENTITY 1 and ENTITY 2 should be noun.
            Any element in [ENTITY 1, RELATIONSHIP, ENTITY 2] should be conclusive, make it as short as possible.
            Do this in both breadth and depth. Expand [ENTITY 1, RELATIONSHIP, ENTITY 2] until the size reaches 100.

            {example}

            prompt: {term}
            updates:
        """
        )
    # json_string = str(response)
    # json_data = json.loads(json_string)
    # 修复：直接访问content属性
    if response is None:
        print(f"警告: ChatECNU返回None，当前term: {term}")
        return ""

    # triples = extract_data_in_brackets(json_data['content'])
    triples = extract_data_in_brackets(response.content)
    outstr = ""
    for triple in triples:
        outstr += triple.replace('[', '').replace(']', '').replace(', ', '\t') + '\n'

    return outstr

In [4]:
## Future work - Including Clinical Notes
# import json

# with open('../../clinical_notes/subject_text_dict.json', 'r') as f:
#     subject_text_dict = json.load(f)

In [5]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import os


from ChatGPT import ChatECNU

def test_chatecnu():
    """测试ChatECNU是否能正常工作"""
    print("开始测试ChatECNU...")
    
    try:
        # 初始化ChatECNU客户端
        chat = ChatECNU(model="ecnu-max")
        print("✓ ChatECNU客户端初始化成功")
        
        # 测试1: 简单对话
        print("\n测试1: 简单对话")
        test_message = "你好，请简单介绍一下你自己。"
        print(f"发送消息: {test_message}")
        
        response = chat.chat(test_message)
        if response:
            print(f"✓ 收到回复: {response.content}")
        else:
            print("✗ 未收到回复")
            return False
            
        # 测试2: 获取可用模型
        print("\n测试2: 获取可用模型")
        models = chat.get_available_models()
        if models:
            print(f"✓ 可用模型: {models}")
        else:
            print("✗ 无法获取模型列表")
            
        # 测试3: 设置系统消息
        print("\n测试3: 设置系统消息")
        chat.clear_messages()
        chat.set_system_message("你是一个友好的AI助手，请用简洁的方式回答问题。")
        
        response2 = chat.chat("请用一句话介绍Python编程语言。")
        if response2:
            print(f"✓ 系统消息设置成功，回复: {response2.content}")
        else:
            print("✗ 系统消息设置失败")
            
        print("\n🎉 ChatECNU测试完成！")
        return True
        
    except FileNotFoundError as e:
        print(f"✗ 文件未找到错误: {e}")
        print("请确保 resources/ecnu.key 文件存在")
        return False
        
    except ImportError as e:
        print(f"✗ 导入错误: {e}")
        print("请确保已安装openai库: pip install openai")
        return False
        
    except Exception as e:
        print(f"✗ 测试过程中出现错误: {e}")
        return False

def check_prerequisites():
    """检查运行前提条件"""
    print("检查运行前提条件...")
    
    # 检查ecnu.key文件
    key_file = "../../resources/ecnu.key"
    if not os.path.exists(key_file):
        print(f"✗ 未找到API密钥文件: {key_file}")
        return False
    else:
        print(f"✓ 找到API密钥文件: {key_file}")
    
    # 检查openai库
    try:
        import openai
        print(f"✓ openai库已安装，版本: {openai.__version__}")
    except ImportError:
        print("✗ 未安装openai库，请运行: pip install openai")
        return False
    
    return True

if __name__ == "__main__":
    print("=" * 50)
    print("ChatECNU 功能测试脚本")
    print("=" * 50)
    
    # 检查前提条件
    if not check_prerequisites():
        print("\n前提条件检查失败，请解决上述问题后重试。")
        sys.exit(1)
    
    # 运行测试
    print("\n" + "=" * 30)
    success = test_chatecnu()
    
    if success:
        print("\n✅ 所有测试通过！ChatECNU工作正常。")
    else:
        print("\n❌ 测试失败，请检查配置和网络连接。")
        sys.exit(1)

ChatECNU 功能测试脚本
检查运行前提条件...
✓ 找到API密钥文件: ../../resources/ecnu.key
✓ openai库已安装，版本: 1.97.1

开始测试ChatECNU...
✓ ChatECNU客户端初始化成功

测试1: 简单对话
发送消息: 你好，请简单介绍一下你自己。
✓ 收到回复: 你好！我是ChatECNU，由华东师范大学开发的智能助手。很高兴为你服务！我可以帮助你解答问题、提供信息、辅助学习等。作为华东师范大学开发的AI，我特别关注教育领域的需求。请问今天有什么可以帮你的吗？

测试2: 获取可用模型
✓ 可用模型: ['ChatECNU', 'ecnu-embedding-small', 'ecnu-max', 'ecnu-plus', 'ecnu-image', 'DALL-E-3', 'ecnu-vl', 'ecnu-rerank', 'ecnu-reasoner', 'gpt-4', 'ecnu-reasoner-lite', 'educhat-psychology', 'educhat-general', 'ecnu-turbo', 'InnoSpark', 'InnoSpark-R', 'educhat-r1', 'ChatECNU-app', 'educhat-r1-app', 'deepseekv3-app', 'image-app', 'deepseek-chat-app', 'Qwen3-32B-app', 'Qwen2-VL-app']

测试3: 设置系统消息
✓ 系统消息设置成功，回复: Python是一种简洁易读的高级编程语言，适合快速开发和跨平台应用。

🎉 ChatECNU测试完成！

✅ 所有测试通过！ChatECNU工作正常。


In [6]:
import time
import random

def write_failure_log(term:str,mode:str,error:str="ChatECNU returned None"):

    # 记录失败案例
            failure_log = {
                "term": term,
                "mode": mode,
                "error": error
            }
            
            # 保存到失败日志文件
            log_file = "../../logs/failed_requests.json"
            os.makedirs(os.path.dirname(log_file), exist_ok=True)
            
            try:
                with open(log_file, 'r', encoding='utf-8') as f:
                    failures = json.load(f)
            except (FileNotFoundError, json.JSONDecodeError):
                failures = []
            
            failures.append(failure_log)
            
            with open(log_file, 'w', encoding='utf-8') as f:
                json.dump(failures, f, ensure_ascii=False, indent=2)
            
            print(f"❌ 失败记录已保存: {term} ({mode})")

def graph_gen_with_retry(term: str, mode: str, max_retries=3, delay_range=(1, 5)):
    """带重试机制的graph_gen函数"""
    
    for attempt in range(max_retries):
        try:
            result = graph_gen(term, mode)
            
            if result:  # 如果成功获得结果
                if attempt > 0:
                    print(f"✅ 重试成功 (第{attempt + 1}次尝试): {term}")
                return result
            else:
                if attempt < max_retries - 1:
                    delay = random.uniform(*delay_range)
                    print(f"⏳ 第{attempt + 1}次尝试失败，{delay:.1f}秒后重试: {term}")
                    time.sleep(delay)
                else:
                    print(f"❌ 所有重试都失败了: {term}")
                    write_failure_log(term,mode)
                    
        except Exception as e:
            if attempt < max_retries - 1:
                delay = random.uniform(*delay_range)
                print(f"⚠️ 第{attempt + 1}次尝试出错，{delay:.1f}秒后重试: {term} - {str(e)}")
                time.sleep(delay)
            else:
                print(f"❌ 所有重试都出错了: {term} - {str(e)}")
                write_failure_log(term,mode,str(e))
    
    return ""

In [10]:
from tqdm import tqdm
import os

for key in tqdm(condition_dict.keys()):
    file = f'../../graphs/condition/CCSCM/{key}.txt'
    if os.path.exists(file):
        continue
        with open(file=file, mode="r", encoding='utf-8') as f:
            prev_triples = f.read()
        if len(prev_triples.split('\n')) < 100:
            outstr = graph_gen_with_retry(term=condition_dict[key], mode="condition")
            outfile = open(file=file, mode='w', encoding='utf-8')
            outstr = prev_triples + outstr
            # print(outstr)
            outfile.write(outstr)
    else:
        outstr = graph_gen_with_retry(term=condition_dict[key], mode="condition")
        outfile = open(file=file, mode='w', encoding='utf-8')
        outstr = outstr
        # print(outstr)
        outfile.write(outstr)

100%|██████████| 285/285 [12:32<00:00,  2.64s/it]


In [7]:
from tqdm import tqdm
import os

for key in tqdm(procedure_dict.keys()):
    file = f'../../graphs/procedure/CCSPROC/{key}.txt'
    if os.path.exists(file):
        continue
        with open(file=file, mode="r", encoding='utf-8') as f:
            prev_triples = f.read()
        if len(prev_triples.split('\n')) < 150:
            outstr = graph_gen_with_retry(term=procedure_dict[key], mode="procedure")
            outfile = open(file=file, mode='w', encoding='utf-8')
            outstr = prev_triples + outstr
            # print(outstr)
            outfile.write(outstr)
    else:
        outstr = graph_gen_with_retry(term=procedure_dict[key], mode="procedure")
        outfile = open(file=file, mode='w', encoding='utf-8')
        outstr = outstr
        # print(outstr)
        outfile.write(outstr)

100%|██████████| 231/231 [45:40<00:00, 11.86s/it]


In [None]:
from tqdm import tqdm
import os

for key in tqdm(drug_dict.keys()):
    file = f'../../graphs/drug/ATC5/{key}.txt'
    if os.path.exists(file):
        with open(file=file, mode="r", encoding='utf-8') as f:
            prev_triples = f.read()
        if len(prev_triples.split('\n')) < 150:
            outstr = graph_gen_with_retry(term=drug_dict[key], mode="drug")
            outfile = open(file=file, mode='w', encoding='utf-8')
            outstr = prev_triples + outstr
            # print(outstr)
            outfile.write(outstr)
        # continue
    else:
        outstr = graph_gen_with_retry(term=drug_dict[key], mode="drug")
        outfile = open(file=file, mode='w', encoding='utf-8')
        outstr = outstr
        # print(outstr)
        outfile.write(outstr)

In [8]:
from tqdm import tqdm
import os

for key in tqdm(drug_dict.keys()):
    file = f'../../graphs/drug/ATC3/{key}.txt'
    if os.path.exists(file):
        with open(file=file, mode="r", encoding='utf-8') as f:
            prev_triples = f.read()
        if len(prev_triples.split('\n')) < 150:
            outstr = graph_gen_with_retry(term=drug_dict[key], mode="drug")
            outfile = open(file=file, mode='w', encoding='utf-8')
            outstr = prev_triples + outstr
            # print(outstr)
            outfile.write(outstr)
        # continue
    else:
        outstr = graph_gen_with_retry(term=drug_dict[key], mode="drug")
        outfile = open(file=file, mode='w', encoding='utf-8')
        outstr = outstr
        # print(outstr)
        outfile.write(outstr)

100%|██████████| 269/269 [1:44:38<00:00, 23.34s/it]
