In [1]:
import json
import pandas as pd
from srsly import read_jsonl,read_json,write_jsonl,write_json
from os.path import join, exists
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
prompt_dict = read_json(join("..",'prompts','prompt_rachel_v4.json'))

In [3]:
def build_prompt_from_dict(prompt_dict):
    """
    Builds a prompt string directly from a dictionary object,
    without examples and input text.

    Args:
        prompt_dict (dict): A dictionary containing the prompt components.
            Expected keys: 'system_prompt', 'query', 'instructions',
            'format_instruction'.

    Returns:
        str: The complete prompt string.
    """
    system_prompt = prompt_dict.get('system_prompt', '')
    query = prompt_dict.get('query', '')
    instructions = prompt_dict.get('instructions', [])
    format_instruction = prompt_dict.get('format_instruction', '')

    prompt_string = ""
    if system_prompt:
        prompt_string += f"System Prompt: {system_prompt}\n\n"
    if query:
        prompt_string += f"Query: {query}\n\n"
    if instructions:
        prompt_string += "Instructions:\n"
        for instruction in instructions:
            prompt_string += f"- {instruction}\n"
        prompt_string += "\n"
    if format_instruction:
        prompt_string += f"Format Instruction: {format_instruction}\n\n"
        
    return prompt_string


In [4]:
print(build_prompt_from_dict(prompt_dict))

System Prompt: You are a helpful and accurate information extraction assistant.

Query: From the following medical text extract the following details: 1. identify whether any diagnosis from our list of Rheumatic Diseases is confirmed for the patient in the text 2. extract the specific words that indicate the diagnosis.

Instructions:
- The list of Rheumatic Diseases that you need to identify diagnoses for includes: 'Rheumatoid arthritis'
- Focus on identifying diagnoses that are confirmed by the physician.
- - Consider a diagnosis as set, even when the physician states it is a working diagnosis.
- - Do not consider the diagnosis as confirmed when the physician describes it as probable or likely.
- - Do not consider the diagnosis as confirmed when the physician describes it a differential diagnosis (dd).
- - Do not consider the diagnosis as confirmed when the physician describes it in conjunction with another overlapping diagnosis or when it is stated as a mixed clinical picture.
- - A 

In [5]:
def build_llm_prompt(sample_prompt: dict, examples: list = None, input: str = None) -> str:
    """
    Reads a sample prompt and examples from JSON files and builds a prompt for an LLM.

    Args:
        sample_prompt_path: The path to the JSON file containing the sample prompt.
        examples_path: The path to the JSON file containing the examples.

    Returns:
        A string containing the formatted LLM prompt.  Returns an error message
        string if there are file errors.
    """
    # try:
    #     sample_prompt = read_json(sample_prompt_path)
    # except FileNotFoundError:
    #     return f"Error: Sample prompt file not found at {sample_prompt_path}"
    # except json.JSONDecodeError:
    #     return f"Error: Could not decode JSON from sample prompt file at {sample_prompt_path}"

    # try:
    #     examples = read_json(examples_path)
    # except FileNotFoundError:
    #     return f"Error: Examples file not found at {examples_path}"
    # except json.JSONDecodeError:
    #     return f"Error: Could not decode JSON from examples file at {examples_path}"

    prompt = sample_prompt['system_prompt'] + "\n\n"
    prompt +=  sample_prompt['query'] + "\n\n"
    # prompt += "Instructions:\n"
    for instruction in sample_prompt['instructions']:
        prompt += "- " + instruction + "\n\n"
        
    if examples is not None:

        prompt += "Here are some examples for text that do or do not contain explicit rhuematic disease diagnosis:\n"
        for example in examples:
            prompt += example + "\n"
            
    prompt += "\n" + sample_prompt['format_instruction'] + "\n\n"
    
    
    
    if input is not None:
    
        prompt += "\nMedical text: " + input + "\n"

    return prompt

In [7]:
from ollama import chat

In [8]:
model = "deepseek-r1:32b"

In [9]:
import re
import json
def extract_results(llm_output, think_token=r'</think>'):
    think = llm_output.split(think_token)
    output = think[1].strip()
    think = think[0].strip()
    

    # Remove the backticks and potential "json" prefix
    # cleaned_string = output.replace('```', '').replace('json', '').strip()
    data_dictionary = {}
    match = re.search(r'\{(.*?)\}', output, re.DOTALL)

    if match:
        json_string = "{" + match.group(1) + "}"
        try:
            data_dictionary = json.loads(json_string)
            # print(data_dictionary)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
            print("Could not parse the following JSON string:", json_string)
    else:
        print("No JSON object found at the end of the string.")
        
   
    data_dictionary['output'] = output 
    data_dictionary['think'] = think
    return data_dictionary

In [10]:
sample = "1 . oligo - artritis van beide polsen bij bekende seropositieve ra , dd activiteit ra , inflammatoire ( secundaire ) artrose "

In [11]:
prompt = build_llm_prompt(prompt_dict,input = sample)

In [13]:
print(prompt)

You are a helpful and accurate information extraction assistant.

From the following medical text extract the following details: 1. identify whether any diagnosis from our list of Rheumatic Diseases is confirmed for the patient in the text 2. extract the specific words that indicate the diagnosis.

- The list of Rheumatic Diseases that you need to identify diagnoses for includes: 'Rheumatoid arthritis'

- Focus on identifying diagnoses that are confirmed by the physician.

- - Consider a diagnosis as set, even when the physician states it is a working diagnosis.

- - Do not consider the diagnosis as confirmed when the physician describes it as probable or likely.

- - Do not consider the diagnosis as confirmed when the physician describes it a differential diagnosis (dd).

- - Do not consider the diagnosis as confirmed when the physician describes it in conjunction with another overlapping diagnosis or when it is stated as a mixed clinical picture.

- - A previous or historic rheumatic

In [14]:
from ollama import generate

In [None]:
prompt = build_llm_prompt(prompt_dict,sample)


In [None]:
sample_response = generate(model=model, prompt=prompt# Combine prompt and input
        )

In [None]:
extract_results(sample_response.response)

{'diagnosis': 'none',
 'is_diagnosis_given': 0,
 'confidence_level': 3,
 'output': 'The text mentions \'seropositive RA\' but in a context that suggests it\'s part of a differential diagnosis. Additionally, it references secondary inflammatory osteoarthritis, which complicates the confirmation of Rheumatoid Arthritis.\n\n```json\n{\n    "diagnosis": "none",\n    "is_diagnosis_given": 0,\n    "confidence_level": 3\n}\n```',
 'think': "<think>\nOkay, I need to help extract information from a medical text. The task is to check if 'Rheumatoid arthritis' is confirmed as a diagnosis. Let me go through the steps carefully.\n\nFirst, I read the instructions again. I have to look for any mention of Rheumatic Diseases, specifically checking if 'Rheumatoid arthritis' is confirmed. The confirmation can be even if it's a working diagnosis but not if it's probable, likely, differential, or part of another condition. Also, previous diagnoses count unless rejected.\n\nLooking at the provided text, I s