In [None]:
# imports needed for google colab
!pip install -q langchain langchain_core langchain_openai

In [None]:
# import libraries
from langchain_core.prompts import PromptTemplate
from langchain.chains import SequentialChain, LLMChain
from langchain_openai import ChatOpenAI

import pandas as pd
import os
import re

from google.colab import drive
from google.colab import userdata

In [None]:
# mount the drive
drive.mount('/content/drive')

In [None]:
# get openai api key
openai_api_key = userdata.get('open_ai_project_nurse_notes')

In [None]:
def generate_notes(system_role_prompt, note_query_prompt, input_data, openai_api_key, temperature, model, completions, filename):
  # generate and save notes

  def parse_and_clean_reports(results):
    # parse and clean reports from openai completions
    reports = []
    for result in results:
        splits = result['output'].split('***')
        for split in splits:
          more_split = split.split('"')
          for another_split in more_split:
            another_split = another_split.replace('\n', '')
            if not ":" in another_split:
              if not '"' in another_split:
                if re.search('[a-zA-Z]', another_split):
                  reports.append(another_split)
    return reports

  def save_reports(reports, needs, filename):
    # save the responses to a CSV file
    df = pd.DataFrame(reports, columns=['report'])
    df['needs'] = needs
    try:
        df.to_csv(filename, index=False)
        print(f"Reports saved successfully to {filename}")
    except Exception as e:
        print(f"Failed to save reports: {str(e)}")

  data_dir = '/content/drive/MyDrive/Colab Notebooks/SyntheticNotes/SyntheticNotes/data/gpt3'
  report_filepath = os.path.join(data_dir, f'{filename}_{input_data["needs"]}.csv')

  # initialize openai model
  llm = ChatOpenAI(api_key=openai_api_key, temperature=temperature, model=model)

  # create prompts
  role_prompt = PromptTemplate(template=system_role_prompt['message'], input_variables=system_role_prompt['inputs'])
  note_prompt = PromptTemplate(template=note_query_prompt['message'], input_variables=note_query_prompt['inputs'])

  # create llmchains
  role_chain = LLMChain(llm=llm, prompt=role_prompt, output_key = "intermediate_output")
  note_chain = LLMChain(llm=llm, prompt=note_prompt, output_key = "output")
  # create sequentialchain
  sequential_chain = SequentialChain(
      chains=[role_chain, note_chain], input_variables = (system_role_prompt['inputs'] + note_query_prompt['inputs']), output_variables = ["output"]
  )

  # generate a number of different responses
  results = []
  for _ in range(completions):
    response = sequential_chain.invoke(input_data)
    results.append(response)

  # clean results
  results = parse_and_clean_reports(results)
  # save results
  save_reports(results, input_data["needs"], report_filepath)


In [None]:

system_role_prompt = {'message':
                      '''
                      You are a specialist in generating fictitious data for natural language processing projects in healthcare.
                      You speak the language of a nurse in an {nationality} nursing home. Namely, you speak {language}.
                      ''' ,
                      'inputs':["nationality", "language"]}

note_query_prompt = {'message':
                      '''
                      This is an example of a nurse note for a patient in a day: "{example_note}"

                      Other reports may include: washing, dressing, brushing teeth, getting ready for the day, getting ready for the night, showering, cleaning dental prostheses, or assistance after incontinence.
                      Other reports could include: what the client has or has not eaten, what help is needed with eating (full help, encouragement, adapted cutlery or cup), choking, keeping hydration and nutrition lists.
                      Other reports could include: Organised activities, getting visitors, browsing through a magazine, interacting with fellow residents. Keep in mind that these are reports from people in a nursing home, with severe disabilities, so social interaction and activities are limited. Usually it involves sociability, but not always.
                      Other reports may include, for example: oedema, pressure ulcers, peeling, redness and itching of the skin. Nails that are too long, blemishes.
                      Other reports could include, for example: care plan discussions, minor medical complaints, family requests, ordering medication.
                      Reports can be, for example, about: restlessness and wandering at night, sleeping well, going to the toilet at night, phoning, lying crookedly in bed.
                      Reports may include: agitation, restlessness, apathy, confusion; usually the confusion is subtle, but sometimes more intense.
                      Reports may include, for example: pain, tightness of breath, nausea, diarrhoea, back pain, palliative care; usually the complaints are subtle, but sometimes more severe.
                      Other reports can be about, for example: walking aids, the wheelchair, falls, fall incidents, transfers, lifts.
                      Most reports are about everyday things, so not everything is a serious incident.

                      Make up {number_of_reports} such reports for {number_of_reports} residents with {needs} palliative care needs. Return only the reports, with each report separated by "***" and nothing else. Vary the sentence structure and style.
                      ''' ,
                      'inputs':["example_note", "number_of_reports", "needs"]}

In [None]:
DATA_LOADING_DIR = '/content/drive/MyDrive/Colab Notebooks/SyntheticNotes/SyntheticNotes'
dataframe = pd.read_excel(f'{DATA_LOADING_DIR}/fake_notes.xlsx')
completions = 25
model = 'gpt-3.5-turbo-0125'
temperature = 1.1

for index, row in dataframe.iterrows():
  input_data = {'nationality': 'Irish', 'language': 'Hiberno-English', 'example_note': row.Note, 'number_of_reports': 25, 'needs': row.Needs}
  generate_notes(system_role_prompt, note_query_prompt, input_data, openai_api_key, temperature, model, completions, index % 5)