In [1]:
import csv

condition_mapping_file = "../../resources/CCSCM.csv"
procedure_mapping_file = "../../resources/CCSPROC.csv"
drug_file = "../../resources/ATC.csv"

condition_dict = {}
with open(condition_mapping_file, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        condition_dict[row['code']] = row['name'].lower()

procedure_dict = {}
with open(procedure_mapping_file, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        procedure_dict[row['code']] = row['name'].lower()

drug_dict = {}
with open(drug_file, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if row['level'] == '3.0':
            drug_dict[row['code']] = row['name'].lower()

In [2]:
import re 
from ChatGPT import ChatECNU
from ChatGPT import ChatECNU
import json

def extract_data_in_brackets(input_string):
    pattern = r"\[(.*?)\]"
    matches = re.findall(pattern, input_string)
    return matches

def divide_text(long_text, max_len=800):
    sub_texts = []
    start_idx = 0
    while start_idx < len(long_text):
        end_idx = start_idx + max_len
        sub_text = long_text[start_idx:end_idx]
        sub_texts.append(sub_text)
        start_idx = end_idx
    return sub_texts

def filter_triples(triples):
    chatgpt = ChatECNU()
    response = chatgpt.chat(
        f"""
            I have a list of triples. I want to select 50 most important triples from the list.
            The importance of a triple is based on how you think it will help imrpove healthcare prediction tasks (e.g., drug recommendation, mortality prediction, readmission prediction ‚Ä¶).
            If you think a triple is important, please keep it. Otherwise, please remove it.
            You can also add triples from your background knowledge.
            The total size of the updated list should be below 50.

            triples: {triples}
            updates:
        """
        )
    # json_string = str(response)
    # json_data = json.loads(json_string)
    # ‰øÆÂ§çÔºöÁõ¥Êé•ËÆøÈóÆcontentÂ±ûÊÄß
    if response is None:
        print("Ë≠¶Âëä: ChatECNUËøîÂõûNoneÔºåËøîÂõûÂéüÂßãtriples")
        return triples

    # filtered_triples = extract_data_in_brackets(json_data['content'])
    filtered_triples = extract_data_in_brackets(response.content)
    return filtered_triples


In [3]:
import time
import random

def write_failure_log(term:str,mode:str,error:str="ChatECNU returned None"):

    # ËÆ∞ÂΩïÂ§±Ë¥•Ê°à‰æã
            failure_log = {
                "term": term,
                "mode": mode,
                "error": error
            }
            
            # ‰øùÂ≠òÂà∞Â§±Ë¥•Êó•ÂøóÊñá‰ª∂
            log_file = "../../logs/failed_requests.json"
            os.makedirs(os.path.dirname(log_file), exist_ok=True)
            
            try:
                with open(log_file, 'r', encoding='utf-8') as f:
                    failures = json.load(f)
            except (FileNotFoundError, json.JSONDecodeError):
                failures = []
            
            failures.append(failure_log)
            
            with open(log_file, 'w', encoding='utf-8') as f:
                json.dump(failures, f, ensure_ascii=False, indent=2)
            
            print(f"‚ùå Â§±Ë¥•ËÆ∞ÂΩïÂ∑≤‰øùÂ≠ò: {term} ({mode})")

def graph_gen_with_retry(term: str, mode: str, max_retries=3, delay_range=(1, 5)):
    """Â∏¶ÈáçËØïÊú∫Âà∂ÁöÑgraph_genÂáΩÊï∞"""
    
    for attempt in range(max_retries):
        try:
            result = graph_gen(term, mode)
            
            if result:  # Â¶ÇÊûúÊàêÂäüËé∑ÂæóÁªìÊûú
                if attempt > 0:
                    print(f"‚úÖ ÈáçËØïÊàêÂäü (Á¨¨{attempt + 1}Ê¨°Â∞ùËØï): {term}")
                return result
            else:
                if attempt < max_retries - 1:
                    delay = random.uniform(*delay_range)
                    print(f"‚è≥ Á¨¨{attempt + 1}Ê¨°Â∞ùËØïÂ§±Ë¥•Ôºå{delay:.1f}ÁßíÂêéÈáçËØï: {term}")
                    time.sleep(delay)
                else:
                    print(f"‚ùå ÊâÄÊúâÈáçËØïÈÉΩÂ§±Ë¥•‰∫Ü: {term}")
                    write_failure_log(term,mode)
                    
        except Exception as e:
            if attempt < max_retries - 1:
                delay = random.uniform(*delay_range)
                print(f"‚ö†Ô∏è Á¨¨{attempt + 1}Ê¨°Â∞ùËØïÂá∫ÈîôÔºå{delay:.1f}ÁßíÂêéÈáçËØï: {term} - {str(e)}")
                time.sleep(delay)
            else:
                print(f"‚ùå ÊâÄÊúâÈáçËØïÈÉΩÂá∫Èîô‰∫Ü: {term} - {str(e)}")
                write_failure_log(term,mode,str(e))
    
    return ""

In [None]:
from ChatGPT import ChatECNU
import json

def graph_gen(term: str, mode: str):
    if mode == "condition":
        example = \
        """
        Example:
        prompt: systemic lupus erythematosus
        updates: [[systemic lupus erythematosus, is an, autoimmune condition], [systemic lupus erythematosus, may cause, nephritis], [anti-nuclear antigen, is a test for, systemic lupus erythematosus], [systemic lupus erythematosus, is treated with, steroids], [methylprednisolone, is a, steroid]]
        """
    elif mode == "procedure":
        example = \
        """
        Example:
        prompt: endoscopy
        updates: [[endoscopy, is a, medical procedure], [endoscopy, used for, diagnosis], [endoscopic biopsy, is a type of, endoscopy], [endoscopic biopsy, can detect, ulcers]]
        """
    elif mode == "drug":
        example = \
        """
        Example:
        prompt: iobenzamic acid
        updates: [[iobenzamic acid, is a, drug], [iobenzamic acid, may have, side effects], [side effects, can include, nausea], [iobenzamic acid, used as, X-ray contrast agent], [iobenzamic acid, formula, C16H13I3N2O3]]
        """
    chatgpt = ChatECNU()
    response = chatgpt.chat(
        f"""
            Given a prompt (a medical condition/procedure/drug), extrapolate as many relationships as possible of it and provide a list of updates.
            The relationships should be helpful for healthcare prediction (e.g., drug recommendation, mortality prediction, readmission prediction ‚Ä¶)
            Each update should be exactly in format of [ENTITY 1, RELATIONSHIP, ENTITY 2]. The relationship is directed, so the order matters.
            Both ENTITY 1 and ENTITY 2 should be noun.
            Any element in [ENTITY 1, RELATIONSHIP, ENTITY 2] should be conclusive, make it as short as possible.
            Do this in both breadth and depth. Expand [ENTITY 1, RELATIONSHIP, ENTITY 2] until the size reaches 100.

            {example}

            prompt: {term}
            updates:
        """
        )
    # json_string = str(response)
    # json_data = json.loads(json_string)
    # ‰øÆÂ§çÔºöÁõ¥Êé•ËÆøÈóÆcontentÂ±ûÊÄß
    if response is None:
        print(f"Ë≠¶Âëä: ChatECNUËøîÂõûNoneÔºåÂΩìÂâçterm: {term}")
        return ""

    # triples = extract_data_in_brackets(json_data['content'])
    triples = extract_data_in_brackets(response.content)
    outstr = ""
    for triple in triples:
        outstr += triple.replace('[', '').replace(']', '').replace(', ', '\t') + '\n'

    return outstr

In [5]:
## Future work - Including Clinical Notes
# import json

# with open('../../clinical_notes/subject_text_dict.json', 'r') as f:
#     subject_text_dict = json.load(f)

In [6]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import os


from ChatGPT import ChatECNU

def test_chatecnu():
    """ÊµãËØïChatECNUÊòØÂê¶ËÉΩÊ≠£Â∏∏Â∑•‰Ωú"""
    print("ÂºÄÂßãÊµãËØïChatECNU...")
    
    try:
        # ÂàùÂßãÂåñChatECNUÂÆ¢Êà∑Á´Ø
        chat = ChatECNU(model="ecnu-max")
        print("‚úì ChatECNUÂÆ¢Êà∑Á´ØÂàùÂßãÂåñÊàêÂäü")
        
        # ÊµãËØï1: ÁÆÄÂçïÂØπËØù
        print("\nÊµãËØï1: ÁÆÄÂçïÂØπËØù")
        test_message = "‰Ω†Â•ΩÔºåËØ∑ÁÆÄÂçï‰ªãÁªç‰∏Ä‰∏ã‰Ω†Ëá™Â∑±„ÄÇ"
        print(f"ÂèëÈÄÅÊ∂àÊÅØ: {test_message}")
        
        response = chat.chat(test_message)
        if response:
            print(f"‚úì Êî∂Âà∞ÂõûÂ§ç: {response.content}")
        else:
            print("‚úó Êú™Êî∂Âà∞ÂõûÂ§ç")
            return False
            
        # ÊµãËØï2: Ëé∑ÂèñÂèØÁî®Ê®°Âûã
        print("\nÊµãËØï2: Ëé∑ÂèñÂèØÁî®Ê®°Âûã")
        models = chat.get_available_models()
        if models:
            print(f"‚úì ÂèØÁî®Ê®°Âûã: {models}")
        else:
            print("‚úó Êó†Ê≥ïËé∑ÂèñÊ®°ÂûãÂàóË°®")
            
        # ÊµãËØï3: ËÆæÁΩÆÁ≥ªÁªüÊ∂àÊÅØ
        print("\nÊµãËØï3: ËÆæÁΩÆÁ≥ªÁªüÊ∂àÊÅØ")
        chat.clear_messages()
        chat.set_system_message("‰Ω†ÊòØ‰∏Ä‰∏™ÂèãÂ•ΩÁöÑAIÂä©ÊâãÔºåËØ∑Áî®ÁÆÄÊ¥ÅÁöÑÊñπÂºèÂõûÁ≠îÈóÆÈ¢ò„ÄÇ")
        
        response2 = chat.chat("ËØ∑Áî®‰∏ÄÂè•ËØù‰ªãÁªçPythonÁºñÁ®ãËØ≠Ë®Ä„ÄÇ")
        if response2:
            print(f"‚úì Á≥ªÁªüÊ∂àÊÅØËÆæÁΩÆÊàêÂäüÔºåÂõûÂ§ç: {response2.content}")
        else:
            print("‚úó Á≥ªÁªüÊ∂àÊÅØËÆæÁΩÆÂ§±Ë¥•")
            
        print("\nüéâ ChatECNUÊµãËØïÂÆåÊàêÔºÅ")
        return True
        
    except FileNotFoundError as e:
        print(f"‚úó Êñá‰ª∂Êú™ÊâæÂà∞ÈîôËØØ: {e}")
        print("ËØ∑Á°Æ‰øù resources/ecnu.key Êñá‰ª∂Â≠òÂú®")
        return False
        
    except ImportError as e:
        print(f"‚úó ÂØºÂÖ•ÈîôËØØ: {e}")
        print("ËØ∑Á°Æ‰øùÂ∑≤ÂÆâË£ÖopenaiÂ∫ì: pip install openai")
        return False
        
    except Exception as e:
        print(f"‚úó ÊµãËØïËøáÁ®ã‰∏≠Âá∫Áé∞ÈîôËØØ: {e}")
        return False

def check_prerequisites():
    """Ê£ÄÊü•ËøêË°åÂâçÊèêÊù°‰ª∂"""
    print("Ê£ÄÊü•ËøêË°åÂâçÊèêÊù°‰ª∂...")
    
    # Ê£ÄÊü•ecnu.keyÊñá‰ª∂
    key_file = "../../resources/ecnu.key"
    if not os.path.exists(key_file):
        print(f"‚úó Êú™ÊâæÂà∞APIÂØÜÈí•Êñá‰ª∂: {key_file}")
        return False
    else:
        print(f"‚úì ÊâæÂà∞APIÂØÜÈí•Êñá‰ª∂: {key_file}")
    
    # Ê£ÄÊü•openaiÂ∫ì
    try:
        import openai
        print(f"‚úì openaiÂ∫ìÂ∑≤ÂÆâË£ÖÔºåÁâàÊú¨: {openai.__version__}")
    except ImportError:
        print("‚úó Êú™ÂÆâË£ÖopenaiÂ∫ìÔºåËØ∑ËøêË°å: pip install openai")
        return False
    
    return True

if __name__ == "__main__":
    print("=" * 50)
    print("ChatECNU ÂäüËÉΩÊµãËØïËÑöÊú¨")
    print("=" * 50)
    
    # Ê£ÄÊü•ÂâçÊèêÊù°‰ª∂
    if not check_prerequisites():
        print("\nÂâçÊèêÊù°‰ª∂Ê£ÄÊü•Â§±Ë¥•ÔºåËØ∑Ëß£ÂÜ≥‰∏äËø∞ÈóÆÈ¢òÂêéÈáçËØï„ÄÇ")
        sys.exit(1)
    
    # ËøêË°åÊµãËØï
    print("\n" + "=" * 30)
    success = test_chatecnu()
    
    if success:
        print("\n‚úÖ ÊâÄÊúâÊµãËØïÈÄöËøáÔºÅChatECNUÂ∑•‰ΩúÊ≠£Â∏∏„ÄÇ")
    else:
        print("\n‚ùå ÊµãËØïÂ§±Ë¥•ÔºåËØ∑Ê£ÄÊü•ÈÖçÁΩÆÂíåÁΩëÁªúËøûÊé•„ÄÇ")
        sys.exit(1)

ChatECNU ÂäüËÉΩÊµãËØïËÑöÊú¨
Ê£ÄÊü•ËøêË°åÂâçÊèêÊù°‰ª∂...
‚úì ÊâæÂà∞APIÂØÜÈí•Êñá‰ª∂: ../../resources/ecnu.key
‚úì openaiÂ∫ìÂ∑≤ÂÆâË£ÖÔºåÁâàÊú¨: 1.97.1

ÂºÄÂßãÊµãËØïChatECNU...
‚úì ChatECNUÂÆ¢Êà∑Á´ØÂàùÂßãÂåñÊàêÂäü

ÊµãËØï1: ÁÆÄÂçïÂØπËØù
ÂèëÈÄÅÊ∂àÊÅØ: ‰Ω†Â•ΩÔºåËØ∑ÁÆÄÂçï‰ªãÁªç‰∏Ä‰∏ã‰Ω†Ëá™Â∑±„ÄÇ
‚úì Êî∂Âà∞ÂõûÂ§ç: ÊÇ®Â•ΩÔºÅÊàëÊòØChatECNUÔºåÁî±Âçé‰∏úÂ∏àËåÉÂ§ßÂ≠¶ÂºÄÂèëÁöÑ‰∏ÄÊ¨æÊô∫ËÉΩÂØπËØùÊúçÂä°„ÄÇÂæàÈ´òÂÖ¥‰∏∫ÊÇ®Êèê‰æõÂ∏ÆÂä©„ÄÇ‰Ωú‰∏∫‰∏ÄÊ¨æÊ†°Âõ≠Êô∫ËÉΩÂä©ÊâãÔºåÊàëÂèØ‰ª•‰∏∫ÊÇ®Ëß£Á≠îÂêÑÁßçÈóÆÈ¢òÔºåÂåÖÊã¨Â≠¶‰π†ÁßëÁ†î„ÄÅÊ†°Âõ≠ÁîüÊ¥ª„ÄÅÊïôËÇ≤Âí®ËØ¢Á≠âÊñπÈù¢„ÄÇËØ∑ÈóÆÊúâ‰ªÄ‰πàÊàëÂèØ‰ª•Â∏ÆÊÇ®ÁöÑÂêóÔºü

ÊµãËØï2: Ëé∑ÂèñÂèØÁî®Ê®°Âûã
‚úì ÂèØÁî®Ê®°Âûã: ['ChatECNU', 'ecnu-embedding-small', 'ecnu-max', 'ecnu-plus', 'ecnu-image', 'DALL-E-3', 'ecnu-vl', 'ecnu-rerank', 'ecnu-reasoner', 'gpt-4', 'ecnu-reasoner-lite', 'educhat-psychology', 'educhat-general', 'ecnu-turbo', 'InnoSpark', 'InnoSpark-R', 'educhat-r1', 'ChatECNU-app', 'educhat-r1-app', 'deepseekv3-app', 'image-app', 'deepseek-chat-app', 'Qwen3-32B-app', '

In [7]:
import time
import random

def write_failure_log(term:str,mode:str,error:str="ChatECNU returned None"):

    # ËÆ∞ÂΩïÂ§±Ë¥•Ê°à‰æã
            failure_log = {
                "term": term,
                "mode": mode,
                "error": error
            }
            
            # ‰øùÂ≠òÂà∞Â§±Ë¥•Êó•ÂøóÊñá‰ª∂
            log_file = "../../logs/failed_requests.json"
            os.makedirs(os.path.dirname(log_file), exist_ok=True)
            
            try:
                with open(log_file, 'r', encoding='utf-8') as f:
                    failures = json.load(f)
            except (FileNotFoundError, json.JSONDecodeError):
                failures = []
            
            failures.append(failure_log)
            
            with open(log_file, 'w', encoding='utf-8') as f:
                json.dump(failures, f, ensure_ascii=False, indent=2)
            
            print(f"‚ùå Â§±Ë¥•ËÆ∞ÂΩïÂ∑≤‰øùÂ≠ò: {term} ({mode})")

def graph_gen_with_retry(term: str, mode: str, max_retries=3, delay_range=(1, 5)):
    """Â∏¶ÈáçËØïÊú∫Âà∂ÁöÑgraph_genÂáΩÊï∞"""
    
    for attempt in range(max_retries):
        try:
            result = graph_gen(term, mode)
            
            if result:  # Â¶ÇÊûúÊàêÂäüËé∑ÂæóÁªìÊûú
                if attempt > 0:
                    print(f"‚úÖ ÈáçËØïÊàêÂäü (Á¨¨{attempt + 1}Ê¨°Â∞ùËØï): {term}")
                return result
            else:
                if attempt < max_retries - 1:
                    delay = random.uniform(*delay_range)
                    print(f"‚è≥ Á¨¨{attempt + 1}Ê¨°Â∞ùËØïÂ§±Ë¥•Ôºå{delay:.1f}ÁßíÂêéÈáçËØï: {term}")
                    time.sleep(delay)
                else:
                    print(f"‚ùå ÊâÄÊúâÈáçËØïÈÉΩÂ§±Ë¥•‰∫Ü: {term}")
                    write_failure_log(term,mode)
                    
        except Exception as e:
            if attempt < max_retries - 1:
                delay = random.uniform(*delay_range)
                print(f"‚ö†Ô∏è Á¨¨{attempt + 1}Ê¨°Â∞ùËØïÂá∫ÈîôÔºå{delay:.1f}ÁßíÂêéÈáçËØï: {term} - {str(e)}")
                time.sleep(delay)
            else:
                print(f"‚ùå ÊâÄÊúâÈáçËØïÈÉΩÂá∫Èîô‰∫Ü: {term} - {str(e)}")
                write_failure_log(term,mode,str(e))
    
    return ""

In [8]:
from tqdm import tqdm
import os

for key in tqdm(condition_dict.keys()):
    file = f'../../graphs/condition/CCSCM/{key}.txt'
    if os.path.exists(file):
        with open(file=file, mode="r", encoding='utf-8') as f:
            prev_triples = f.read()
        if len(prev_triples.split('\n')) < 100:
            outstr = graph_gen_with_retry(term=condition_dict[key], mode="condition")
            outfile = open(file=file, mode='w', encoding='utf-8')
            outstr = prev_triples + outstr
            # print(outstr)
            outfile.write(outstr)
    else:
        outstr = graph_gen_with_retry(term=condition_dict[key], mode="condition")
        outfile = open(file=file, mode='w', encoding='utf-8')
        outstr = outstr
        # print(outstr)
        outfile.write(outstr)

  1%|          | 2/285 [01:11<2:44:54, 34.96s/it]

Ë∞ÉÁî®ECNU APIÊó∂Âá∫Èîô: 'NoneType' object is not subscriptable
Ë≠¶Âëä: ChatECNUËøîÂõûNoneÔºåË∑≥Ëøáterm: acute myocardial infarction
‚è≥ Á¨¨1Ê¨°Â∞ùËØïÂ§±Ë¥•Ôºå2.8ÁßíÂêéÈáçËØï: acute myocardial infarction


  1%|          | 3/285 [03:03<5:29:43, 70.15s/it]

‚úÖ ÈáçËØïÊàêÂäü (Á¨¨2Ê¨°Â∞ùËØï): acute myocardial infarction


  1%|‚ñè         | 4/285 [04:02<4:44:01, 60.65s/it]


KeyboardInterrupt: 

In [None]:
from tqdm import tqdm
import os

for key in tqdm(procedure_dict.keys()):
    file = f'../../graphs/procedure/CCSPROC/{key}.txt'
    if os.path.exists(file):
        with open(file=file, mode="r", encoding='utf-8') as f:
            prev_triples = f.read()
        if len(prev_triples.split('\n')) < 150:
            outstr = graph_gen_with_retry(term=procedure_dict[key], mode="procedure")
            outfile = open(file=file, mode='w', encoding='utf-8')
            outstr = prev_triples + outstr
            # print(outstr)
            outfile.write(outstr)
    else:
        outstr = graph_gen_with_retry(term=procedure_dict[key], mode="procedure")
        outfile = open(file=file, mode='w', encoding='utf-8')
        outstr = outstr
        # print(outstr)
        outfile.write(outstr)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 231/231 [47:01<00:00, 12.21s/it] 


In [None]:
from tqdm import tqdm
import os

for key in tqdm(drug_dict.keys()):
    file = f'../../graphs/drug/ATC5/{key}.txt'
    if os.path.exists(file):
        with open(file=file, mode="r", encoding='utf-8') as f:
            prev_triples = f.read()
        if len(prev_triples.split('\n')) < 150:
            outstr = graph_gen_with_retry(term=drug_dict[key], mode="drug")
            outfile = open(file=file, mode='w', encoding='utf-8')
            outstr = prev_triples + outstr
            # print(outstr)
            outfile.write(outstr)
        # continue
    else:
        outstr = graph_gen_with_retry(term=drug_dict[key], mode="drug")
        outfile = open(file=file, mode='w', encoding='utf-8')
        outstr = outstr
        # print(outstr)
        outfile.write(outstr)

In [None]:
from tqdm import tqdm
import os

for key in tqdm(drug_dict.keys()):
    file = f'../../graphs/drug/ATC3/{key}.txt'
    if os.path.exists(file):
        with open(file=file, mode="r", encoding='utf-8') as f:
            prev_triples = f.read()
        if len(prev_triples.split('\n')) < 150:
            outstr = graph_gen_with_retry(term=drug_dict[key], mode="drug")
            outfile = open(file=file, mode='w', encoding='utf-8')
            outstr = prev_triples + outstr
            # print(outstr)
            outfile.write(outstr)
        # continue
    else:
        outstr = graph_gen_with_retry(term=drug_dict[key], mode="drug")
        outfile = open(file=file, mode='w', encoding='utf-8')
        outstr = outstr
        # print(outstr)
        outfile.write(outstr)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 269/269 [44:31<00:00,  9.93s/it] 
