<a href="https://colab.research.google.com/github/ekrombouts/GenCareAI/blob/olympia/notebooks/100_note_generation/120_GenerateClientScenarios.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GenCare AI: Generating subscenarios

**Author:** Eva Rombouts  
**Date:**   02-10-2024  
**Updated:**   
**Version:** 0.1

### Description
This notebook generates detailed subscenarios for clients in a nursing home setting based on their profiles and main scenarios. The subscenarios are derived by dividing each period in the main scenario into four shorter periods, each representing a week of care for the client. The output is intended for use in fictive care reports.

The script uses a pre-defined client profile and main scenario as input, processes them through a language model, and outputs a list of subscenarios. 

In [None]:
!pip install GenCareAI
from GenCareAI.GenCareAIUtils import GenCareAISetup

setup = GenCareAISetup()

if setup.environment == 'Colab':
        !pip install -q langchain langchain_core langchain_openai langchain_community

In [2]:
from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.callbacks import get_openai_callback
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI

import os
import re
import pandas as pd
import pickle


In [3]:
# Constants and Configurations
ward_name = 'Athena'
fn_profiles = setup.get_file_path(f'data/gcai_client_profiles_{ward_name}.csv')
fn_scenarios = setup.get_file_path(f'data/gcai_client_scenarios_{ward_name}.csv')
fn_subscenarios = setup.get_file_path(f'data/gcai_client_subscenarios_{ward_name}.csv')
fn_raw_results = setup.get_file_path('data/raw_results.pkl')

# model = 'gpt-4o-mini-2024-07-18'
model = 'gpt-3.5-turbo-0125'
temp = 1.1

verbose = False
sample_client = 2

In [4]:
# Load the client profiles
df_scenarios = pd.read_csv(fn_scenarios)
df_profiles = pd.read_csv(fn_profiles)

In [None]:
df_profiles.info()
df_scenarios.info()

In [6]:
# Pydantic models
class ClientSubScenario(BaseModel):
    period: int = Field(description="Volgnummer van de periode")
    sub_period: int = Field(description="Volgnummer van de sub-periode")
    events_description: str = Field(description="Beschrijving van de gebeurtenissen en zorg")

class ClientSubScenarios(BaseModel):
    scenario: List[ClientSubScenario]

In [7]:
# Initialize model and parser
model = ChatOpenAI(api_key=setup.get_openai_key(), temperature=temp, model=model)
pyd_parser = PydanticOutputParser(pydantic_object=ClientSubScenarios)
format_instructions = pyd_parser.get_format_instructions()

In [8]:
def display_profile(row):
    # Format the client's profile for display
    return (
        f"Naam: {row['naam']}\n"
        f"Type Dementie: {row['type_dementie']}\n"
        f"Lichamelijke klachten: {row['somatiek']}\n"
        f"ADL: {row['adl']}\n"
        f"Mobiliteit: {row['mobiliteit']}\n"
        f"Cognitie/gedrag: {row['gedrag']}"
    )

if verbose:
    sample_profile = display_profile(df_profiles.iloc[sample_client])
    print(sample_profile)

In [9]:
def display_scenario(row, df_scenarios):
    # Format the client's scenario for display
    client_id = row['client_id']
    return ("\n".join(f"{r['period']}: {r['events_description']}" for index, r in df_scenarios[df_scenarios['client_id'] == client_id].iterrows()))

if verbose:
    sample_scenario = display_scenario(df_profiles.iloc[sample_client], df_scenarios=df_scenarios)
    print(sample_scenario)

In [10]:
# Define the prompt template
template="""
Help me bij het schrijven van een scenario van een fictieve client die is opgenomen in het verpleeghuis. Deze client is ernstig beperkt, vanwege een dementie of vanwege onderliggend lichamelijk lijden. Het scenario speelt zich af in het verpleeghuis, dus over het algemeen kabbelt de zorg rustig door. Vermijd dramatiek. Vermijd het noemen van de naam. Maak de wijzigingen subtieler dan je normaal zou doen.

OPDRACHT: Schrijf een scenario van onderstaand profiel en hoofdscenario, waarbij je elke periode uit het hoofdscenario opdeelt in 4 kortere perioden (subscenario's) die elk een week van de client in het verpleeghuis beschrijven. Deze scenario's zullen later worden gebruikt om fictieve zorgrapportages te schrijven. 

PROFIEL
{client_profile}

HOOFDSCENARIO
{scenario}

{format_instructions}
"""

prompt_template = PromptTemplate(
    template=template,
    input_variables=["client_profile", "scenario"],
    partial_variables={"format_instructions": format_instructions},
)

if verbose:
    print(prompt_template.format(client_profile=sample_profile,
                                 scenario = sample_scenario))


In [11]:
# Create a chain
chain = prompt_template | model | pyd_parser

In [12]:
if verbose: 
    with get_openai_callback() as cb:
        result = chain.invoke({"client_profile": sample_profile, "scenario": sample_scenario})
    print(cb)

In [None]:
# if verbose:
#     print(result.scenario[0].events_description)
#     # print(type(result))
#     print(100*'*')
#     # print(vars(result))
#     # print(100*'*')
#     print(result.scenario)

result.scenario

In [None]:
# Generate and save subscenarios
if not os.path.exists(fn_subscenarios):
    print("Data file not found. Generating new data...")

    def generate_subscenarios(df_profiles, df_scenarios, chain):
        subscenario_list = []
        raw_results_list = []  
        for _, row in df_profiles.iterrows():
            client_profile = display_profile(row=row)
            client_scenario = display_scenario(row=row, df_scenarios=df_scenarios)
            print(f"Generating detailed scenario for client: {row['naam']}")

            try:
                result = chain.invoke({"client_profile": client_profile, "scenario": client_scenario})
            except Exception as e:
                print(f"Error encountered: {e}. Retrying...")
                result = chain.invoke({"client_profile": client_profile, "scenario": client_scenario})
                print("Retry successful")
            raw_results_list.append(result)

            for scenario in result.scenario:
                subscenario_list.append((row['client_id'], scenario.period, scenario.sub_period, scenario.events_description))
        return subscenario_list, raw_results_list

    with get_openai_callback() as cb:
        subscenario_data, raw_results_data = generate_subscenarios(
            df_profiles=df_profiles,
            df_scenarios=df_scenarios,
            chain=chain
        )
        print(cb)

    # Save the generated subscenarios
    df_subscenarios = pd.DataFrame(
        subscenario_data, 
        columns=['client_id', 'period', 'sub_period', 'events_description']
    )
    df_subscenarios.to_csv(fn_subscenarios, index=False)
    print(f"Data saved successfully to {fn_scenarios}.")

    # Save raw results
    with open(fn_raw_results, 'wb') as f:
        pickle.dump(raw_results_data, f)
    print(f"Raw results saved successfully to {fn_raw_results}.")

else:
    print("Data file found. Loading data...")
    df_subscenarios = pd.read_csv(fn_subscenarios)
    with open(fn_raw_results, 'rb') as f:
        raw_results_data = pickle.load(f)


In [None]:
raw_results_data[0].scenario

In [None]:
def extract_period_number(period_str):
    period_str = str(period_str).lower()
    
    # Words to numbers mapping
    word_to_num = {
        'eerste': 1,
        'tweede': 2,
        'derde': 3,
        'vierde': 4,
        'vijfde': 5,
        'zesde': 6,
        'zevende': 7,
        'achtste': 8,
        'negende': 9,
        'tiende': 10,
    }
    
    match = re.search(r'\d+', period_str)
    if match:
        return int(match.group())
    else:
        for word, num in word_to_num.items():
            if word in period_str:
                return num
    return None

# Add numeric period numbers to the DataFrames
df_scenarios['period_num'] = df_scenarios['period'].apply(extract_period_number)
df_subscenarios['period_num'] = df_subscenarios['period'].astype(int)

# Count subscenarios per client and period
subscenario_counts = df_subscenarios.groupby(['client_id', 'period_num']).size().reset_index(name='subscenario_count')

# Merge counts with the original scenarios DataFrame
df_merged = pd.merge(df_scenarios, subscenario_counts, on=['client_id', 'period_num'], how='left')
df_merged['subscenario_count'] = df_merged['subscenario_count'].fillna(0).astype(int)

# Check for periods that do not have exactly 4 subscenarios
incorrect_counts = df_merged[df_merged['subscenario_count'] != 4]

if incorrect_counts.empty:
    print("Alle periodes hebben precies 4 subscenario's in df_subscenarios.")
else:
    print("De volgende periodes hebben geen 4 subscenario's:")
    print(incorrect_counts[['client_id', 'period', 'subscenario_count']])

In [None]:
df_subscenarios['client_id'].value_counts()