In [13]:
import dspy
import json
from typing import List, Dict, Optional

# The system prompt is implicitly handled by the DSPy Signature's docstring.
# We define the desired input and output fields for the model.
class Graph(dspy.Signature):
    """
    You are a network graph maker who extracts terms and their relations from a given context.
    Extract the ontology of terms mentioned in the context. These terms should represent the key concepts.
    Terms may include objects, entities, locations, organizations, persons, conditions, acronyms, documents, services, or concepts.
    Terms should be as atomistic as possible.
    Think about how these terms can have one-on-one relations with other terms mentioned in the same sentence or paragraph.
    Find the relation between each such related pair of terms.

    Format your output as a list of JSON objects. Each element of the list contains a pair of terms
    and the relation between them.
    """
    
    context = dspy.InputField(
        desc="The context chunk from which to extract the graph."
    )
    graph_json = dspy.OutputField(
        desc="A list of json objects, like the following: "
             '[{"node_1": "A concept", "node_2": "A related concept", "edge": "relationship between the two concepts"}]'
    )

def construct_graph_from_text(text: str, llm: dspy.LM) -> Optional[List[Dict[str, str]]]:
    """
    Constructs a network graph in JSON format from text using an Ollama LLM with DSPy.

    Args:
        text (str): The input text to process.
        llm (dspy.LM): An initialized DSPy language model (e.g., dspy.OllamaLocal).

    Returns:
        Optional[List[Dict[str, str]]]: A list of dictionaries representing the graph,
                                         or None if validation fails.
    """
    # Configure DSPy to use the provided language model.
    dspy.settings.configure(lm=llm)

    # Define the generator module with a ChainOfThought pattern.
    generator = dspy.ChainOfThought(Graph)
    
    # Run the model to get the result.
    result = generator(context=text)

    try:
        # The output from the model is a string that needs to be parsed into JSON.
        graph_data = json.loads(result.graph_json)

        # Basic validation to ensure the output is a list of dictionaries
        # with the required keys.
        if not isinstance(graph_data, list):
            print("Validation Error: Output is not a list.")
            return None

        for item in graph_data:
            if not isinstance(item, dict) or not all(key in item for key in ["node_1", "node_2", "edge"]):
                print(f"Validation Error: Item '{item}' is missing required keys.")
                return None
        
        return graph_data

    except (json.JSONDecodeError, TypeError) as e:
        print(f"Error decoding or validating JSON: {e}")
        return None


In [18]:

try:
    ## Mistral try?
    ollama_lm = dspy.LM('ollama_chat/deepseek-r1:32b', api_base='http://localhost:11434', api_key='')
except Exception as e:
    print(f"Failed to initialize Ollama. Please ensure Ollama is running and accessible.")
    print(f"Error: {e}")
    ollama_lm = None

In [19]:
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
from mimic.orm_create.mimiciv_v3_orm import PreprocessedRevisedNote, Base

DB_URI = "postgresql://postgres:password@localhost:5432/mimicIV_v3"
engine = create_engine(DB_URI)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
discharge_notes = session.query(PreprocessedRevisedNote).all()

In [20]:
discharge_notes[0].text

' \n                        Unit    \n \n Admission Date                 Discharge Date    \n \n Date Birth                sex    F \n \n Service MEDICINE \n \n Allergies \n Known Allergies Adverse Drug reaction \n \n attending \n \n Chief Complaint \n worsen ABD distension pain \n \n history Present Illness \n hcv cirrhosis c b ascite hiv ART h o IVDU COPD \n bioplar ptsd present OSH ED worsen abd \n distension past week  \n Pt report self discontinue lasix spirnolactone week \n ago feel like \n want chemical \n follow na restrict diet past week note \n have worsen abd distension discomfort \n deny edema SOB orthopnea deny f c n v d c \n dysuria food poisoning week ago eat stale \n cake n v 20 min food ingestion resolve \n day deny recent illness sick contact note \n notice gum bleed brush tooth \n recent week deny easy bruising melena BRBPR \n hemetesis hemoptysis hematuria  \n abd pain go OSH ED transfer \n care ED report pt brief period \n confusion recall ultrasound bloodwork \n o

In [21]:
import json
from tqdm import tqdm
import os

os.makedirs("revised_preprocessed_kgs", exist_ok=True)
failed = 0
for i in tqdm(range(len(discharge_notes))):
    if os.path.exists(f"revised_preprocessed_kgs/{discharge_notes[i].row_id}.json"): continue
    input_text = discharge_notes[i].text
    json_graph_output = construct_graph_from_text(input_text, ollama_lm)
    
    # Print the validated JSON output.
    if json_graph_output:
        json_ouput = json.dumps(json_graph_output, indent=4)
        with open(f"revised_preprocessed_kgs/{discharge_notes[i].row_id}.json", 'w') as f:
            f.write(json_ouput)
    else:
        failed += 1
        print(f"Could not generate or validate the graph from the provided text. Overall failed: {failed}")
        

  0%|          | 56/331793 [15:04<2491:49:23, 27.04s/it]

Validation Error: Item '{'node_2': 'HTN', 'edge': 'related to'}' is missing required keys.
Could not generate or validate the graph from the provided text. Overall failed: 1


  0%|          | 114/331793 [1:12:05<5498:06:17, 59.68s/it]

Validation Error: Item '{'node_1': 'Blood culture', 'edge': 'test performed'}' is missing required keys.
Could not generate or validate the graph from the provided text. Overall failed: 2


  0%|          | 148/331793 [1:44:55<3918:22:24, 42.53s/it]


RuntimeError: Both structured output format and JSON mode failed. Please choose a model that supports `response_format` argument. Original error: litellm.APIConnectionError: Ollama_chatException - [WinError 10061] Es konnte keine Verbindung hergestellt werden, da der Zielcomputer die Verbindung verweigerte