In [89]:


##### RESOURCES #####
# REST API description: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference
# Quotas and request limits: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/quotas-limits
# pricing: https://azure.microsoft.com/de-de/pricing/details/cognitive-services/openai-service/
# pricing calculator: https://azure.microsoft.com/de-de/pricing/calculator

import os
import requests
import json
import pandas as pd
from readability import Readability
import textstat
import nltk
from bert_score import score
from IPython.display import display, HTML
import webbrowser
import markdown
import logging
from pprint import pprint
import boto3
from botocore.exceptions import ClientError
import openai
from difflib import SequenceMatcher
import spacy
import re
import mammoth
from nltk.stem import PorterStemmer
from dotenv import load_dotenv
import os





#nltk.download('punkt')

API parameters for aws-comprehend and openai-gpt4

In [90]:
#setup aws comprehend api
endpoint_url = 'https://comprehend.eu-west-2.amazonaws.com'
s3_client = boto3.client('comprehend', region_name='eu-west-2')#,endpoint_url=endpoint_url)
medical_client = boto3.client(service_name='comprehendmedical',  region_name='eu-west-2')#,endpoint_url=endpoint_url)

In [91]:
#load_dotenv()

openai.api_key = os.environ.get('OPENAI_API_KEY')
openai.organization = 'org-WD3x2XVqrr4UO8u1zjuYSyXZ' 

### Evaluation Methodology
Methods for entity, keyphrase checking, similarity metric

In [92]:
def find_missing(string_list, target_string):
    missing_strings = []
    for string in string_list:
        if string not in target_string:
            missing_strings.append(string)
    return missing_strings

#### Entities

In [93]:

def remove_stems(text):
    # Tokenize the input text into individual words
    words = nltk.word_tokenize(text)

    # Initialize the PorterStemmer
    stemmer = PorterStemmer()

    # Stem each word in the text
    stemmed_words = [stemmer.stem(word) for word in words]

    # Reconstruct the stemmed words back into a string
    stemmed_text = " ".join(stemmed_words)

    return stemmed_text

In [94]:
def remove_non_alphanumeric(text):
    # Define a regular expression pattern to match non-alphanumeric characters (letters, numbers, and % sign)
    pattern = r'[^a-zA-Z0-9\s.%]+'

    # Use re.sub() to replace the matching pattern with an empty string
    cleaned_text = re.sub(pattern, '', text)

    return cleaned_text

In [95]:
# this uses amazon comprehend to check that entities are retained in the improved versionbb
def standard_entity_check(text):
    print("Checking this text for entities: ", text)
    entity_text = []
    print("Detecting  general entities.")
   # response = medical_client.detect_entities_v2(Text=text)#, LanguageCode='en')
    response = s3_client.detect_entities(Text=text, LanguageCode='en')
    entities = response['Entities']
    print(entities)
    for entity in entities:
       # print(f'Type: {entity["Type"]}, Text: {entity["Text"]}')
        entity_text.append(entity["Text"].lower()) #make sure decapitalised
    return entity_text, entities

In [96]:
#Medical Named Entity and Relationship Extraction (NERe)
# this uses amazon comprehend to check that entities are retained in the improved versionbb
def medical_entity_check(text):
    print("Checking this text for entities: ", text)
    entity_text = []
    print("Detecting medical entities.")
    response = medical_client.detect_entities_v2(Text=text)
    entities = response['Entities']
  #  print(entities)
    for entity in entities:
        print(f'Type: {entity["Type"]}, Text: {entity["Text"]}')
        entity_text.append(entity["Text"].lower()) #make sure decapitalised
    return entity_text, entities

In [97]:
def extact_all_entities(text):
     standard_ent_text, standard_entities = standard_entity_check(text)
     print("Standard entities ", standard_ent_text)
     med_ent_text, med_entities = medical_entity_check(text)
     print("Medical entities ", med_ent_text)
     all_entities = standard_ent_text + med_ent_text
     return all_entities


In [98]:
def reformat(entity_list):
    entity_list = list(map(remove_non_alphanumeric, entity_list))
    entity_list = list(map(str.lower, entity_list))
    entity_list = list(map(remove_stems, entity_list))
    return entity_list



In [99]:
def find_strings_not_in_text(main_text, string_list):
    strings_not_in_text = []
    
    for string in string_list:
        if string not in main_text:
            strings_not_in_text.append(string)
    
    return strings_not_in_text

In [100]:
def evaluate_entities(original, new):
    all_ori_entities = extact_all_entities(original)
    all_new_entities = extact_all_entities(new)
    
    # process entities to remove stems, capitalisation and grammatical symbols
    all_ori_entities = reformat(all_ori_entities)
    all_new_entities = reformat(all_new_entities)

    
    original_text = remove_non_alphanumeric(original.lower())
    original_text = remove_stems(original_text)

    new_text = remove_non_alphanumeric(new.lower())
    new_text = remove_stems(new_text)


   # missing = find_missing(new_text, all_ori_entities) 
    missing = find_strings_not_in_text(new_text, all_new_entities)
    print("OLD", all_ori_entities)
    print("NEW", all_ori_entities)
   # extra = find_missing(all_new_entities, all_ori_entities)
    extra = find_strings_not_in_text(original_text, all_new_entities)
    return all_ori_entities, all_new_entities, missing, extra

#### Relations

In [101]:
# need to increase the max tokens
def generate_relations(section, entities):
    print("GENERATING")
    prompt = 'Given the following text, and entities. Generate the relations of these entities and provide this as a list. '+  section +  entities
    gpt_policy = openai.Completion.create(model="text-davinci-003", prompt=prompt, max_tokens=3000)
   # print(gpt_policy)
    text = gpt_policy['choices'][0]['text']
    print(text)
    return text

#### Key phrases

In [102]:
def extract_key_phrases(text):
    response = s3_client.detect_key_phrases(Text=text, LanguageCode='en')
    key_phrases = [phrase['Text'] for phrase in response['KeyPhrases']]
    return key_phrases

In [103]:
def evaluate_keyphrases(original, new):
    all_ori_kp = extract_key_phrases(original)
    all_new_kp = extract_key_phrases(new)
    missing = find_missing(all_ori_kp, all_new_kp)
    extra = find_missing(new, all_new_kp)
    return all_ori_kp, all_new_kp, missing, extra

#### Similarity

In [104]:
# can be used to capture element/character similarity but not semantic
def similarity(entity1, entity2):
    # Calculate similarity ratio between two entities or keyphrases
    return SequenceMatcher(None, entity1, entity2).ratio()

In [105]:
def evaluate_BERT(original, new):
    P, R, F1 = score([original], [new], lang="en", verbose=False)
 #   row.append(F1.item())
    print("BERT F1 is ", round(F1.item()),2)
    return round(F1.item(),2)


### Readability

In [106]:
def evaluate_flesch_kincaid(text):
    fk = textstat.flesch_kincaid_grade(text)
    return round(fk,2)


GPT detecting extra - hallucinatory information

original = 'ECG, blood pressure and respiratory rate continuously during administration. Reduce rate of administration if bradycardia or hypotension occurs. Cardiac resuscitation equipment should be available. Monitor injection site during and for 72 hours following administration. Therapeutic drug level monitoring is required.'
hallucination = '**Do not infuse with any other medicines.** ECG, blood pressure and respiratory rate continuously during administration. Reduce rate of administration if bradycardia or hypotension occurs. Cardiac resuscitation equipment should be available. Monitor injection site during and for 72 hours following administration. Therapeutic drug level monitoring is required.'
gpt_markdown = hallucination 
goldstandard = 'None'

In [139]:
def hallucination_check(original, new):
        prompt = '''You are a quality control expert checking whether extra meaning
        has been added to an improved version of text generated using GPT. You check by comparing
        the original and new text. Output 'yes' or 'no'.
        If 'yes', then write 'extra content has been added' followed with the text which is additional content in quotes.
        The original text: ''' + original + ' The new text: ' + new
        response = openai.ChatCompletion.create(
        model = "gpt-4-0314",#"gpt-4",   # we use a fixed model to prevent updates changing how the prompts operate
        messages=[{"role": "user", "content": prompt}],
        max_tokens = 2500,
        temperature = 0 #is zero to ensure the model is deterministic as possible
        )
        text = response["choices"][0]["message"]["content"]
        return text

In [140]:
#output = hallucination_check(original,hallucination)
#print(output)

Yes, extra content has been added: "Do not infuse with any other medicines."


### Main evaluation method

In [116]:
#hallucination_eval

['Suitable diluents: sodium chloride 0.9% or glucose 5%.(1). Amikacin solution may darken to a pale yellow colour when diluted; this does not affect potency.(4) ',
 'Suitable diluents: sodium chloride 0.9 % or glucose 5 %. Amikacin solution may darken to a pale yellow colour when diluted due to natural chemical reactions; however, rigorous testing ensures that these reactions do not compromise its potency.',
 'Suitable diluents: sodium chloride 0.9 % or glucose 5 %. Amikacin solution may darken to a pale yellow colour when diluted due to natural chemical reactions; however, rigorous testing ensures that these reactions do not compromise its potency.',
 0.95,
 0.95,
 0.79,
 6.4,
 10.3,
 10.3,
 -3.5,
 [],
 [],
 [],
 []]

In [109]:

# evaluation metrics include BERT for semantic similarity; and FK
def evaluation(original, gpt_plain, gpt_markdown,gold_standard):
    row = []
    if len(gpt_plain) < 1:
        gpt_plain = 'Empty'
    if len(gpt_markdown) < 1:
        gpt_markdown = 'Empty'
    if len(gold_standard) < 1:
        gold_standard = 'Empty'
    
    ### CLEAN: get rid of unwanted grammar during evaluation as it impacts entity and keyphrase checks
   # original = original.replace('\n', '')
   # gpt_plain = gpt_plain.replace('\n', '')

   # manual_new = manual_new.lower().replace('\n', '')

    ### BERT SCORE #######
    print("###      COMPUTING BERT SCORE        ###")
    gs_BERT = evaluate_BERT(original, gold_standard)
    gptplain_BERT = evaluate_BERT(original,gpt_plain)
    markdown_BERT = evaluate_BERT(original,gpt_markdown)


    ###  FLESCH-KINCAIRD ######
    fk_ori = evaluate_flesch_kincaid(original)
    fk_gpt =evaluate_flesch_kincaid(gpt_plain)
    fk_mark =evaluate_flesch_kincaid(gpt_markdown)
    fk_gs =evaluate_flesch_kincaid(gold_standard)

        

    print("Flesch-Kincaid Scores, Original, Plain, Markdown, goldstandard are: ", fk_ori, fk_gpt, fk_mark, fk_gs)
  
    ### KEY PHRASE AND ENTITY CHECKING ##### 
    all_ori_entities, all_plain_entities, plain_missing_e, plain_extra_e = evaluate_entities(original, gpt_plain)
    all_ori_entities, all_markdown_entities, markdown_missing_e, markdown_extra_e = evaluate_entities(original, gpt_markdown)
    all_ori_entities, all_gs_entities, gs_missing_e, gs_extra_e = evaluate_entities(original, gold_standard)
  


 
    ### RELATION CHECKING
    #adapt formattings
    all_ori_entities = ";".join(all_ori_entities)
    all_plain_entities = ";".join(all_plain_entities)
    ori_relations = generate_relations(original, all_ori_entities)
    plain_new_relations = generate_relations(gpt_plain, all_plain_entities)


    row.extend([original, gpt_plain, gpt_markdown])
    row.extend([gptplain_BERT, markdown_BERT, gs_BERT])
    row.extend([fk_ori, fk_gpt, fk_mark,fk_gs])
    row.extend([plain_missing_e, plain_extra_e,markdown_missing_e, markdown_extra_e])

    

    return row