In [1]:
import pandas as pd 
import base64
import vertexai
from vertexai.generative_models import GenerativeModel, Part, SafetySetting, FinishReason
import vertexai.preview.generative_models as generative_models


def generate(input_text, safety_settings, generation_config):
    vertexai.init(project="mtrx-wg2-modeling-dev-9yj", location="us-east1")
    model = GenerativeModel(
        "gemini-1.5-flash-001",
    )
    responses = model.generate_content(
      [input_text],
      generation_config=generation_config,
      safety_settings=safety_settings,
      stream=True,
    )
    
    resText = ""
    for response in responses:
        resText+=response.text
        
    return resText


#############################################
## GEMINI STUFF #############################
#############################################
generation_config = {
    "max_output_tokens": 8192,
    "temperature": 1,
    "top_p": 0.95,
}

safety_settings = [
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_ONLY_HIGH
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_ONLY_HIGH
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_ONLY_HIGH
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_ONLY_HIGH
    ),
]

drugs_to_contraindications = pd.read_excel("../contraindicationList.xlsx")

contraindicationsData = list(drugs_to_contraindications['contraindications'])
activeIngredientsData = list(drugs_to_contraindications['active ingredient'])
print(len(contraindicationsData), ' contraindications sections found')




50378  contraindications sections found


In [None]:
#############################################
## MAIN SECTION #############################
#############################################

diseasesContraindicated = []
therapyActiveIngredients = []
originalText = []
n_contraindications = len(contraindicationsData)
for index, item in enumerate(contraindicationsData):
    print(index, " / ", n_contraindications)
    if index >= 0:
        try:
            input_text = "Produce a list of diseases contraindicated for in the following contraindications list:\n" + item +  "Please format the list as [\'item1\', \'item2\', ... ,\'itemN\']. Do not inlude any other text in the response. If no diseases are contraindicated for, return an empty list as \'[]\'. If the therapy is preventative, add the tag (preventative) to the item. If the drug is only used for diagnostic purposes, return \'diagnostic/contrast/radiolabel\'."
            response = generate(input_text, safety_settings, generation_config)
            print(response)
            diseasesContraindicated.append(response)
            therapyActiveIngredients.append(activeIngredientsData[index])
            originalText.append(item)
        except:
            diseasesContraindicated.append("LLM ingest returned error")
            therapyActiveIngredients.append(activeIngredientsData[index])
            originalText.append(item)


data = pd.DataFrame({'active ingredient(s)':therapyActiveIngredients,'original text':originalText, 'diseases contraindicated for ': diseasesContraindicated})
data.to_excel("drugs_to_contraindications.xlsx")



0  /  50378




['Anuria (preventative)'] 

1  /  50378
['Anuria (preventative)']

2  /  50378
['known hypersensitivity or allergy to TOLINASE', 'diabetic ketoacidosis (with or without coma)', 'Type I diabetes (preventative)'] 

3  /  50378
['hypersensitivity to salts of diatrizoic acid', 'anuria (preventative)', 'dehydration (preventative)', 'azotemia (preventative)'] 

4  /  50378
['hypersensitivity to meperidine', 'monoamine oxidase (MAO) inhibitor use (within 14 days)', 'preexisting hyperphenylalaninemia (preventative)'] 

5  /  50378
['carcinoma of the breast (preventative)', 'carcinoma of the prostate gland (preventative)'] 

6  /  50378
['myelography (preventative)', 'examination of dorsal cysts (preventative)', 'examination of sinuses that communicate with the subarachnoid space (preventative)', 'epidural injection (preventative)', 'urography (preventative)', 'large dose vascular procedures (preventative)'] 

7  /  50378
['Hepatitis (preventative)', 'Cirrhosis (preventative)']

8  /  50378
9  