<a href="https://colab.research.google.com/github/ekrombouts/GenCareAI/blob/main/scripts/120_GenerateClientScenarios.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/ekrombouts/GenCareAI/blob/main/scripts/110_GenerateClientScenarios.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GenCare AI: Generating client scenarios

**Author:** Eva Rombouts  
**Date:**   13-06-2024  
**Version:** 1.0

### Description
This scripts generates client scenarios based on profiles generated [here](https://colab.research.google.com/github/ekrombouts/GenCareAI/blob/main/scripts/110_GenerateClientProfiles.ipynb).

### Imports and constants

In [None]:
import os
# Determines the current environment (Google Colab or local)
def check_environment():
    try:
        import google.colab
        return "Google Colab"
    except ImportError:
        pass

    return "Local Environment"

In [None]:
# Installs and settings depending on the environment
# When running in CoLab, the Google drive is mounted and necessary packages are installed.
# Data paths are set and API keys retrieved

env = check_environment()

if env == "Google Colab":
    print("Running in Google Colab")
    !pip install -q langchain langchain_core langchain_openai langchain_community
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    DATA_DIR = '/content/drive/My Drive/Colab Notebooks/GenCareAI/data'
    OPENAI_API_KEY = userdata.get('GCI_OPENAI_API_KEY')
    HF_TOKEN = userdata.get('HF_TOKEN')
else:
    print("Running in Local Environment")
    # !pip install python-dotenv langchain langchain_core langchain-community langchain_openai
    DATA_DIR = '../data'
    from dotenv import load_dotenv
    load_dotenv()
    OPENAI_API_KEY = os.getenv('GCI_OPENAI_API_KEY')
    HF_TOKEN = os.getenv('HF_TOKEN')

In [None]:
from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.callbacks import get_openai_callback
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

import pandas as pd
from pprint import pprint
import random
import numpy as np

In [None]:
# Constants and Configurations
WARD_NAME = 'Tulip'
FN_PROFILES = os.path.join(DATA_DIR, f'gcai_client_profiles_{WARD_NAME}.csv')
FN_SCENARIOS = os.path.join(DATA_DIR, f'gcai_client_scenarios_{WARD_NAME}.csv')

MODEL_SCENARIOS = 'gpt-3.5-turbo-0125'
TEMP = 1.1

complications_library = ["gewichtsverlies", "algehele achteruitgang", "decubitus", "UWI", "pneumonie", "delier", "verergering van onderliggende lichamelijke klachten", "verbetering van de klachten", "overlijden", "valpartij"]

### Data

In [None]:
# Load the client profiles
df = pd.read_csv(FN_PROFILES)

In [None]:
# Pydantic models
class ClientScenario(BaseModel):
    month: str = Field(description="Volgnummer van de maand")
    journey: str = Field(description="Beschrijving van de gebeurtenissen en zorg")

class ClientScenarios(BaseModel):
    scenario: List[ClientScenario]

### Functions

In [None]:
def display_profile(row):
    profile = (
        f"Naam: {row['naam']}\n"
        f"Type Dementie: {row['type_dementie']}\n"
        f"Lichamelijke klachten: {row['somatiek']}\n"
        f"ADL: {row['adl']}\n"
        f"Mobiliteit: {row['mobiliteit']}\n"
        f"Cognitie / gedrag: {row['gedrag']}"
    )
    return profile

In [None]:
# Function to determine the number of months of the scenario
def determine_duration(mean=6, std_dev=2):
    return int(np.round(np.random.normal(mean, std_dev)))

In [None]:
def generate_scenarios(df, chain):
    scenario_list = []
    for _, row in df.iterrows():
        client_profile = display_profile(row)
        print(f"Generating scenario for client: {row['naam']}")
        num_months = determine_duration()
        num_complications = random.choice([1, 2])
        chosen_complications = random.sample(complications_library, num_complications)
        complications = ", ".join(chosen_complications)
        result = chain.invoke({"client_profile": client_profile, "num_months": str(num_months), "complications": complications})
        for scenario in result.scenario:
            scenario_list.append((row['client_id'], scenario.month, scenario.journey, complications, num_months))
    return scenario_list

In [None]:
def save_data(df, file_path):
    print(f"Data saved successfully to {file_path}.")
    df.to_csv(file_path, index=False)

In [None]:
def main(df, chain, file_path):
    if os.path.exists(file_path):
        print("Data file found. Loading data...")
        return pd.read_csv(file_path)
    else:
        print("Data file not found. Generating new data...")
        with get_openai_callback() as cb:
            scenario_data = generate_scenarios(df, chain)
            print(cb)
        df_scenarios = pd.DataFrame(scenario_data, columns=['client_id', 'month', 'journey', 'complications', 'num_months'])
        save_data(df_scenarios, file_path)
        return df_scenarios

### Model initialization

In [None]:
# Initialize OpenAI Chat model
model = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=TEMP, model=MODEL_SCENARIOS)

In [None]:
# Set up a parser to handle the output and inject instructions into the prompt template
pyd_parser = PydanticOutputParser(pydantic_object=ClientScenarios)

### Prompt template

In [None]:
# Define the prompt template
PT_scenario = PromptTemplate(
    template="""
Dit is het profiel van een fictieve client in het verpleeghuis:
---
{client_profile}
---

Schrijf in een tijdlijn het beloop van zijn/haar verblijf in het verpleeghuis gedurende {num_months} maanden.
Verwerk de volgende complicatie(s) hierin: {complications}.
Hou wijzigingen subtiel. Vermijd al te grote dramatiek.
Vermijd het noemen van de naam.

{format_instructions}
""",
    input_variables=["client_profile", "num_months", "complications"],
    partial_variables={"format_instructions": pyd_parser.get_format_instructions()},
)

P_scenario = PT_scenario.format(client_profile="client profiel",
                                      num_months = 6,
                                      complications = "complicatie(s)")
print(P_scenario)

In [None]:
# Create a chain of operations: prompt template -> model -> output parser
chain_scenario = PT_scenario | model | pyd_parser

### Main workflow

In [None]:
if __name__ == "__main__":
    df_scenarios = main(df=df, chain=chain_scenario, file_path=FN_SCENARIOS)
    df_scenarios.head(24)