# Berkeley DBT
### Pioneering the future of mental health virtual assistants
Hector Rincon | Robert Mueller | John Van | Andrew Loeber

## Imports

In [2]:
import pandas as pd
import os
import json
import ast

from evaluation.evaluation_pipeline import run_evaluations

## Generate Conversations
Steps:
- Open terminal
- Navigate to /dbt-llm/src/convo_generation directory
- Run `generate_conversations.py` in the command line

As an example, here's the terminal command you'd use to generate 10-message conversations for the first 5 patient profiles, then save the results to `my_test.pickle`:

`python generate_conversations.py --limit 5 --n_msgs 10 --output my_test.pickle`


## Evaluations

### Read in generated conversations

In [3]:
# Replace below with the name of the pickle file generated
CONVO_GEN_FILENAME = 'result'

CONVO_GEN_DIRPATH = os.path.join('.', 'convo_generation', 'output')
CONVO_GEN_PICKLE_PATH = os.path.join(CONVO_GEN_DIRPATH, f'{CONVO_GEN_FILENAME}.pickle')
print(CONVO_GEN_PICKLE_PATH)

.\convo_generation\output\result.pickle


In [4]:
# Read in conversations pickle file
convo_dict = {}
for key, value in pd.read_pickle(CONVO_GEN_PICKLE_PATH).items():
    convo_dict[value['id']] = value['result']['eval_messages']

# Create csv to run evaluations on
convo_df = pd.DataFrame([convo_dict]).T.reset_index().rename(columns={'index': 'conversation_id', 0: 'conversation'})
CONVO_GEN_CSV_PATH = os.path.join(CONVO_GEN_DIRPATH, f'{CONVO_GEN_FILENAME}.csv')
convo_df.to_csv(CONVO_GEN_CSV_PATH)

### Adherence

In [5]:
EVAL_DIRPATH = os.path.join('.', 'evaluation', 'output')
ADHERENCE_EVAL_FILEPATH = os.path.join(EVAL_DIRPATH, f'{CONVO_GEN_FILENAME}_adherence_results.json')

# run_evaluations expects a csv with the following format in each row: {id, text conversation}
run_evaluations(CONVO_GEN_CSV_PATH, ADHERENCE_EVAL_FILEPATH, 'adherence')

2024-04-14 19:21:23,810 - evaluation_pipeline - INFO: Running conversation: (1/2)
2024-04-14 19:21:23,810 - adherence_evaluation - INFO: Running iteration 0
2024-04-14 19:21:23,811 - adherence_evaluation - INFO: Running criterion: Organize by Targets (1/23)
2024-04-14 19:21:26,357 - adherence_evaluation - INFO: Running criterion: Emotion Focus (2/23)
2024-04-14 19:21:34,225 - adherence_evaluation - INFO: Running criterion: Describe Specifically (3/23)
2024-04-14 19:21:35,931 - adherence_evaluation - INFO: Running criterion: Chain Analysis (4/23)
2024-04-14 19:21:37,356 - adherence_evaluation - INFO: Running criterion: Teach New Information (5/23)
2024-04-14 19:21:40,653 - adherence_evaluation - INFO: Running criterion: Generate Solutions (6/23)
2024-04-14 19:21:44,014 - adherence_evaluation - INFO: Running criterion: Activate New Behavior (7/23)
2024-04-14 19:21:44,543 - adherence_evaluation - INFO: Running criterion: Provide Coaching Feedback (8/23)
2024-04-14 19:21:47,073 - adherence

In [6]:
with open(ADHERENCE_EVAL_FILEPATH) as adherence_results_json:
    adherence_results_dict = json.load(adherence_results_json)

# converting json dataset from dictionary to dataframe
adherence_results_df = pd.DataFrame(columns = ['conversation_id', 'criterion', 'score'])
for item in adherence_results_dict:
    for r, v in item['result'].items():
        adherence_results_df.loc[len(adherence_results_df)] = {'conversation_id': item['conversation_id'], 'criterion': r, 'score': v}

adherence_results_df

Unnamed: 0,conversation_id,criterion,score
0,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Organize by Targets,1.0
1,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Emotion Focus,1.0
2,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Describe Specifically,1.0
3,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Chain Analysis,0.0
4,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Teach New Information,1.0
5,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Generate Solutions,0.8
6,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Activate New Behavior,1.0
7,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Provide Coaching Feedback,1.0
8,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Generalize New Learning,1.0
9,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Commitment and Troubleshooting,1.0


### Skill Presence

In [7]:
SKILL_EVAL_FILEPATH = os.path.join(EVAL_DIRPATH, f'{CONVO_GEN_FILENAME}_skill_presence_results.json')

run_evaluations(CONVO_GEN_CSV_PATH, SKILL_EVAL_FILEPATH, 'skill_presence')

2024-04-14 19:41:27,554 - skill_presence_evaluation - INFO: Launching skill presence evaluation with following skill ids: ['M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'IE1', 'IE2', 'IE3', 'IE4', 'IE5', 'IE6', 'IE7', 'IE8', 'IE9', 'IE10', 'IE11', 'ER1', 'ER2', 'ER3', 'ER4', 'ER5', 'ER6', 'ER7', 'ER8', 'ER9', 'ER10', 'DT1', 'DT2', 'DT3', 'DT4', 'DT5', 'DT6', 'DT7', 'DT8', 'DT9', 'DT10', 'DT11', 'DT12', 'DT13', 'DT14', 'DT15', 'DT16']
2024-04-14 19:41:27,555 - evaluation_pipeline - INFO: Running conversation: (1/2)
2024-04-14 19:41:27,557 - evaluation_pipeline - INFO: Running conversation: (2/2)


In [8]:
with open(SKILL_EVAL_FILEPATH) as skill_presence_results_json:
    skill_presence_results_dict = json.load(skill_presence_results_json)

# converting json dataset from dictionary to dataframe
skills_detected_df = pd.DataFrame.from_records(
    skill_presence_results_dict
).set_index('conversation_id'
            ).drop(
    'evaluation_name',
    axis=1,
).rename(
    {'result': 'detected_skill_ids'},
    axis=1,
)

skills_detected_df

Unnamed: 0_level_0,detected_skill_ids
conversation_id,Unnamed: 1_level_1
bc97e560-2f17-40c6-bbbc-cf1455e5536b,[DT5]
817c2b6f-51bf-401c-8b06-cdf9f83b5948,[DT11]


In [9]:
prompts_df = pd.read_csv(
    './data/prompts_with_ids.tsv',
    delimiter='\t',
).rename(
    {
        'Initial Message':  'initial_message',
        'Target Skill IDs': 'target_skill_ids',
        'id':               'conversation_id',
    },
    axis=1,
).set_index(
    'conversation_id'
)
prompts_df['target_skill_ids'] = prompts_df['target_skill_ids'].apply(ast.literal_eval)
prompts_df

Unnamed: 0_level_0,Title,initial_message,target_skill_ids
conversation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bc97e560-2f17-40c6-bbbc-cf1455e5536b,Solo Wake Planning,My grandmother passed away two days ago and ev...,[M1]
817c2b6f-51bf-401c-8b06-cdf9f83b5948,Midnight Mind Marathons,I have a hard time falling asleep at night. I ...,[M2]
a26af177-cf8b-41f0-b286-3a1f536544af,Awaking Exhausted,I have been ruminating before falling asleep a...,"[M3, ER8]"
4f57e28b-1c11-437c-a067-a725c3b3dd32,Distracted Amongst Friends,"Oftentimes when I'm with my friends, I am sitt...",[M4]
8ad366a9-643d-4a93-8716-f7df4447d79e,Tears Trump Talk,Everytime I try to bring up a concern with my ...,[M5]
...,...,...,...
1551bb94-6070-498f-a011-9407ed545c45,Sick of Sedentary,My partner and I are pretty sedentary. The doc...,[DT15]
6ac0e128-495a-4244-b60e-642e11cca7ea,Exam Theft Auto,I was running late for my final so instead of ...,[IE9]
711d32a0-4594-498f-9a2b-5f93f2346da7,Reading Pace Pressure,I wanted to pick up the habit of reading. I fo...,[IE10]
31ee6726-f6d0-453f-9482-4e36b8dbdc04,Looming Layoff,There have been rounds of layoffs happening at...,"[ER4, DT7]"


In [11]:
convo_df = pd.read_csv(
    CONVO_GEN_CSV_PATH
).set_index(
    'conversation_id'
).drop(
    'Unnamed: 0',
    axis=1,
)
convo_df

Unnamed: 0_level_0,conversation
conversation_id,Unnamed: 1_level_1
bc97e560-2f17-40c6-bbbc-cf1455e5536b,Therapist: Hello! It's wonderful to have you h...
817c2b6f-51bf-401c-8b06-cdf9f83b5948,Therapist: Hello there! I’m glad you’ve come t...


In [12]:
skills_presence_df = skills_detected_df.join(
    [prompts_df, convo_df],
)[['initial_message', 'conversation', 'target_skill_ids', 'detected_skill_ids']]
skills_presence_df

Unnamed: 0_level_0,initial_message,conversation,target_skill_ids,detected_skill_ids
conversation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bc97e560-2f17-40c6-bbbc-cf1455e5536b,My grandmother passed away two days ago and ev...,Therapist: Hello! It's wonderful to have you h...,[M1],[DT5]
817c2b6f-51bf-401c-8b06-cdf9f83b5948,I have a hard time falling asleep at night. I ...,Therapist: Hello there! I’m glad you’ve come t...,[M2],[DT11]
