In [2]:

import os
import pandas as pd
import nltk
from bert_score import score
from pprint import pprint
from botocore.exceptions import ClientError
import openai
from difflib import SequenceMatcher
import re
import mammoth
import openai
from dotenv import load_dotenv



#nltk.download('punkt')

In [1]:
# Run all the necessary files: evaluation methods, 
%run "evaluation_methods.ipynb"
%run "visualisation.ipynb"

API parameters for openai gpt models

In [3]:
# Load environment variables from .env file
load_dotenv()

openai.api_key = os.environ.get('OPENAI_API_KEY')
openai.organization = 'org-WD3x2XVqrr4UO8u1zjuYSyXZ' 

##### Parameters that require setting
- Choose medication out of vancomycin, monofer, amikacin or phenytoin 
- Ensure goldstandard is set to false unless the medications Voriconazole or Aminophyline are being input
- Set whether evaluation should occur, if not, only the improved versions will be generated with no metrics

In [None]:

root_file = 'C:\\Users\\dedbl\\Documents\\NHSPolicy\\'
drug  =  'amiodarone'  #'meropenem' # NED MEDS:  #vancomycin #monofer  #amikacin  #phenytoin       OLD MEDS:  #'Voriconazole' ##Aminophyline' #'Tigecycline' #'Rifampicin'
goldstandard = False # does a manually improved version exist? This is used in the development stage.
evaluate = True
summarise = True  # whether a overview summary of text will be taken before hardcoded refinement

#### Convert and reformat .docx to .md format so headings and sections can be extracted
- removes unwanted graphics and links
- changes ___ to ###

In [158]:
with open(root_file+"medications\\"+drug+".docx", "rb") as docx_file:
    result = mammoth.convert_to_markdown(docx_file)
with open('new_version_prompt_markdown_formatting.txt', "r") as prompt_formatting_file:
    prompt_formatting_instructions = prompt_formatting_file.read()

full_prompt_formatting = prompt_formatting_instructions + result.value

response = openai.ChatCompletion.create(
        model = "gpt-4",
        messages=[{"role": "user", "content": full_prompt_formatting}],
        max_tokens = 2500,
        )
gpt_output_formatting = response["choices"][0]["message"]["content"]

markdown_file_name = drug + "_gpt_formatted.md"
with open(markdown_file_name, "w") as output_markdown_file:
    output_markdown_file.write(gpt_output_formatting)

In [5]:
def extract_headings_and_sections(markdown_text, column):
    headings_and_sections = {}
    headings = []
    sections = []
    current_heading = None

    # Use regular expressions to find headings and corresponding sections
    pattern = r'^\s*(#{2,3})\s*(.*)'
    matches = re.finditer(pattern, markdown_text, re.MULTILINE)

    for match in matches:
        level = len(match.group(1))
        heading_text = match.group(2).strip()

        # Use the heading text as the key and initialize the value as an empty string
        headings_and_sections[heading_text] = ""


        # Set the current heading to the current level
        current_heading = heading_text

        # Find the end of the section by looking for the next heading or the end of the text
        next_heading = re.search(r'^\s*(#{2,3})\s*(.*)', markdown_text[match.end():], re.MULTILINE)

        if next_heading:
            # The section ends when the next heading begins
            end_of_section = next_heading.start() + match.end()
        else:
            # If there is no next heading, the section goes until the end of the text
            end_of_section = len(markdown_text)

        # Extract the section content and store it under the current heading
        section_content = markdown_text[match.end():end_of_section].strip()
        headings_and_sections[current_heading] = section_content
        headings.append(current_heading)
        sections.append(section_content)
        df = pd.DataFrame({'headings': headings, 'column': sections})

    return headings_and_sections, df

The *create_df_medicine* method converts the .md format into a dataframe, by extracting all the headings and their corresponding contents

In [6]:
def create_df_medicine(drug = drug, goldstandard_file = None):
    markdown_file =root_file+ drug+'_gpt_formatted.md'
    with open(markdown_file, 'r') as file:
            markdown_content = file.read()
    sections, df = extract_headings_and_sections(markdown_content, 'original')
    print(df['headings'])

    if goldstandard == True:
        gs_df = pd.read_csv(root_file +  goldstandard_file) # will be something like ''gs_V.csv''
        print(gs_df['headings'])
        # Merge the two dataframes on 'common_column'
        df = df.merge(gs_df, on='headings', how='inner', suffixes=('original', 'goldstandard'))
        print(df['headings'])
    return df


##### Generate the policy taking the prompt as input

In [24]:
def generate_policy_chat(prompt):
        response = openai.ChatCompletion.create(
        model = "gpt-4-0314",#"gpt-4",   # we use a fixed model to prevent updates changing how the prompts operate
        messages=[{"role": "user", "content": prompt}],
        max_tokens = 2500,
        temperature = 0 #is zero to ensure the model is deterministic as possible
        )
        text = response["choices"][0]["message"]["content"]
        return text

##### Generate a version of the policy that has improved markdown

In [23]:
def generate_markdown(prompt):
        response = openai.ChatCompletion.create(
        model = "gpt-4-0314",#"gpt-4",   # we use a fixed model to prevent updates changing how the prompts operate
        messages=[{"role": "user", "content": prompt}],
        max_tokens = 2500,
        temperature = 1 #is zero to ensure the model is deterministic as possible
        )
        text = response["choices"][0]["message"]["content"]
        return text

##### Main method to run policy generation

In [26]:
def whole_examples_general_instruction(df, markdown_prompt = 'convert to markdown', evaluate = True, summarise = True):
    results_df = pd.DataFrame(columns= ['curr_policy','plain_gpt_policy', 'markdown_gpt_policy', 'BERT_plain', 'BERT_markdown', 'BERT_gs', 'ori_fk','plain_gpt_fk','markdown_gpt_fk','gs_fk','plain_missing_entity', 'plain_extra_entity','markdown_missing_entity', 'markdown_extra_entity'])
    with open('prompts//overview.txt', 'r') as file:  #plain_text_comments.txt   #reduced_general_comments_v2
      overview_prompt = file.read()
    with open('prompts//experimental_comments.txt', 'r') as file:  #plain_text_comments.txt   #reduced_general_comments_v2
      refine_prompt = file.read()
    with open('prompts//markdown_comments.txt', 'r') as file:     #reduced_general_comments_v2
      markdown_prompt = file.read()

    with open('prompts//volume_suggestions.txt', 'r') as file:     #reduced_general_comments_v2
      volume_prompt = file.read()
    for ind in df.index:
        if len(df['column'][ind]) > 1:  #if not empty
           # print("row")
            row = []
            heading =  df['headings'][ind]
            original_policy = df['column'][ind]
            print("SECTION")
            print(heading)

           # volume_suggestions = generate_policy_chat(volume_prompt + original_policy)
            #print("Suggestions for volumes", volume_suggestions)


            print('ORIGINAL')
            print(original_policy)
            if summarise:
              print("Summarising text before refinement")
            # OVERALL
              overall_prompt =  overview_prompt + original_policy #  original_policy + examples
              overall_text = generate_policy_chat(overall_prompt)
              controlled_prompt = refine_prompt + overall_text
              language_text = generate_policy_chat(controlled_prompt)
            else:
              overall_prompt =  refine_prompt + original_policy
              language_text = generate_policy_chat(overall_prompt)



            print("GPT_PLAIN ", language_text)
            gold_standard = 'Does not exist'
            print('MARKDOWN')
            markdown_text = generate_markdown(markdown_prompt + language_text)
            print(markdown_text)
            
            if evaluate == True:
                print("Evaluation")
                row = evaluation(original_policy, language_text, markdown_text, gold_standard)
                print("results ", row)
                results_df = pd.concat([results_df, pd.Series(row, index=results_df.columns)],ignore_index=True)   #, ignore_index=True)
            else:
                print("No evaluation occured")
    return results_df
   


### Rather than running through a whole medicine guideline, below are some example versions that can be tested from a variety of different monographs

In [30]:
import pandas as pd


uni = '''
CAUTION: Unimycin may be administered as a loading dose (see 'other comments' section below) followed by a smaller maintenance dose. Double check the correct dose has been prescribed.

Loading dose by IV injection / short IV infusion: Dilute and give slowly over at least 20 minutes, using an infusion pump, at a rate not exceeding 25mg per minute.(1,5) If acute adverse effects occur, slow the rate or stop the infusion for 5-10 minutes.(1b) Can be given undiluted via a central venous access device.(10)(11)
Maintenance dose by continuous IV infusion: Dilute and administer using an infusion pump. The initial maintenance dose should not exceed 500-700micrograms/kg/hour (300micrograms/kg/hour in older patients).(5)

Adjust the rate and duration of the maintenance infusion according to plasma-unimycine level and individual patient requirements.



'''



am = '''
1. This product contains benzyl alcohol, which has been associated with a fatal toxic syndrome in preterm neonates. It should not be used in neonates unless there is no alternative.

2. This product contains polysorbate 80 (Tween 80) as an exipient which may cause anaphylaxis.

3. In children the hypotension that occurs with intravenous amiodarone is related to the rate of administration and probably also the solvents (polysorbate 80 and benzyl alcohol) which causes histamine release, rather than the drug itself.

4. Contains 18.7mg iodine per 50mg ampoule. Pre-filled syringe contains 112mg of iodine per 300mg.

5. It is recommended amiodarone infusion is given through a non-DEHP (a component of PVC) -containing administration set as amiodarone may cause DEHP to leach out. However, the clinical significance of this is uncertain. Leaching of DEHP increases at lower flow rates, which is seen in paediatrics. Some references recommend avoiding continuous infusions in patients under 3 years old.

6. Store at room temperature (maximum. 25oC) in the outer carton to protect from light. Do not refrigerate or freeze.

7. Light protection is not necessary during administration, but avoid exposure to direct sunlight.
'''

mer = '''
Note: Some brands of meropenem are difficult to reconstitute\. After adding the diluent shake the vial until the solution is clear\. If this takes more than a minute, shake the vial in the palm of the hand for another minute, then allow to stand for 1 minute\. Repeat until all the powder has dissolved\.  

IV injection:  
Reconstitute vial with water for injections to give a concentration of 50mg in 1mL, by adding:

- 10mL to a 500mg vial
- 20mL to a 1g vial\.  

Short IV infusion:

1. Reconstitute vial with sodium chloride 0\.9%, glucose 5% or water for injections, adding\.
      - 10mL to a 500mg vial
      - 20mL to a 1g vial\.
2. Dilute further with sodium chloride 0\.9% or glucose 5%, to a final concentration of between 1mg to 20mg in 1mL\. Suggested dilutions:
      - 500mg in 50mL
      - 1g in 100mL
      - 2g in 100mL
3. If adding a 1g or 2g dose to a 100mL bag, first remove a volume of fluid equal to the volume of the reconstituted dose from the bag and discard\.

Extended or continuous infusion:

1. Reconstitute each 1g vial with 20mL sodium chloride 0\.9% or water for injections\.
2. Dilute further with sodium chloride 0\.9% to one of the following **suggested critical care standard concentrations** suitable for administration via a central or peripheral access device:\(10\)
      - 1g in 100mL \(10mg in 1mL\)
      - 2g in 100mL \(20mg in 1mL\)
3. If adding a 1g or 2g dose to a 100mL bag, first remove a volume of fluid equal to the volume of the reconstituted dose from the bag and discard\.

In fluid restriction
Suggested minimum dilution: 1g in 10mL\.

'''


vor = '''
Displacement value for voriconazole powder: 200mg displaces 1mL.(1) Taking into account the displacement value reconstitute with 19mL of water for injections or sodium chloride 0.9% to obtain a 10mg in 1mL solution.

Gently swirl to ensure the powder has completely dissolved and no particulates are visible. DO NOT SHAKE. Discard if vacuum does not pull the diluent into the vial.(1) Requires further dilution before administration.(1). Dilute the 10mg in 1mL reconstituted solution with sodium chloride 0.9% or glucose 5% to give a final concentration of 0.5mg (500micrograms) to 5mg in 1mL.(1)
'''
stu = '''Administering via IV: Do not administer this medication intravenously through a line concurrently employed for an infusion comprising a medicinal additive without initially discontinuing the ongoing infusion. Rinse the line before and after the injection.(1)
IV Drip: British manufacturers have indicated against co-infusing esomeprazole with any other medication.(1) Nonetheless, there exists restricted data endorsing Y-site harmonization with certain medications at specific concentrations (reach out to the pharmacy for additional details).
• ceftaroline fosamil, ceftolozane-tazobactam, cisatracurium
• meropenem-vaborbactam, metoprolol
• Plasma-Lyte 148 (pH 7.4)
• tedizolid.(4)

In the United Kingdom, the sole approved diluent for esomeprazole infusion is 0.9% sodium chloride. Nevertheless, 5% glucose and Hartmann's solution also demonstrate compatibility (seek pharmacy input for more information as the stability duration of prepared infusions varies).(4)

'''


verb = '''Administering via IV: Do not administer this medication intravenously through a line concurrently employed for an infusion comprising a medicinal additive without initially discontinuing the ongoing infusion. Rinse the line before and after the injection.(1)
IV Drip: British manufacturers have indicated against co-infusing esomeprazole with any other medication.(1) Nonetheless, there exists restricted data endorsing Y-site harmonization with certain medications at specific concentrations (reach out to the pharmacy for additional details).
• ceftaroline fosamil, ceftolozane-tazobactam, cisatracurium
• meropenem-vaborbactam, metoprolol
• Plasma-Lyte 148 (pH 7.4)
• tedizolid.(4)

In the United Kingdom, the sole approved diluent for esomeprazole infusion is 0.9% sodium chloride. Nevertheless, 5% glucose and Hartmann's solution also demonstrate compatibility (seek pharmacy input for more information as the stability duration of prepared infusions varies).(4)'''

data = {'headings': ['sample'],
        'column': [mer]}

example_df = pd.DataFrame(data)

Unnamed: 0,headings,column
0,uni,\nNote: Some brands of meropenem are difficult...
1,stupid,Administering via IV: Do not administer this m...


### MAIN CODE:
This will format the original document, improve the text and save the result as a csv file

In [31]:
drug_df =  create_df_medicine(drug)
example_df

example_results_general = whole_examples_general_instruction(evaluate = False, df = example_df, markdown_prompt = 'Convert the text to markdown, avoid excessive use of BOLD, do not add extra text', summarise = True) # Tigecycline # Voriconazole

# Save results
example_results_general.to_csv('results.csv', index=False)

SECTION
uni
ORIGINAL

Note: Some brands of meropenem are difficult to reconstitute\. After adding the diluent shake the vial until the solution is clear\. If this takes more than a minute, shake the vial in the palm of the hand for another minute, then allow to stand for 1 minute\. Repeat until all the powder has dissolved\.  

IV injection:  
Reconstitute vial with water for injections to give a concentration of 50mg in 1mL, by adding:

- 10mL to a 500mg vial
- 20mL to a 1g vial\.  

Short IV infusion:

1. Reconstitute vial with sodium chloride 0\.9%, glucose 5% or water for injections, adding\.
      - 10mL to a 500mg vial
      - 20mL to a 1g vial\.
2. Dilute further with sodium chloride 0\.9% or glucose 5%, to a final concentration of between 1mg to 20mg in 1mL\. Suggested dilutions:
      - 500mg in 50mL
      - 1g in 100mL
      - 2g in 100mL
3. If adding a 1g or 2g dose to a 100mL bag, first remove a volume of fluid equal to the volume of the reconstituted dose from the bag and 