# Berkeley DBT
### A mental health chatbot
Hector Rincon | Robert Mueller | John Van | Andrew Loeber

## Imports

In [17]:
# Gerneal imports
import pandas as pd
import pickle
import os
import logging
from openai import AzureOpenAI
import time
import tiktoken
from dotenv import load_dotenv
import sys
import json
import ast
sys.path.append(os.path.join(sys.path[0], '..'))

# DBT imports
from utils.openai_api import get_openai_response, get_openai_response_content
from evaluation.evaluation_pipeline import run_evaluations

## Generate Conversations
- Run generate_conversations.py in the command line

In [2]:
os.environ['OPENAI_API_KEY'] = '12858f5908ee4b17b986977b35d604dd'
os.environ['MODEL_DEPLOYMENT'] = 'gpt4-1106'
os.environ['OAI_ENDPOINT'] = 'dbt-openai-usea2-assistants'
#logging.basicConfig(level=logging.DEBUG)

## Evaluations

In [3]:
# Read in conversations pickle file
convo_dict = {}
for key, value in pd.read_pickle('./convo_generation/output/20240306_convos_full.pickle').items():
    convo_dict[value['id']] = value['result']['eval_messages']

# Create csv to run evaluations on
convo_df = pd.DataFrame([convo_dict]).T.reset_index().rename(columns={'index': 'conversation_id', 0: 'conversation'})
convo_df.to_csv('./convo_generation/output/20240306_convos_full.csv')

### Adherence

In [6]:
# run_evaluations expects a csv with the following format in each row: {id, text conversation]

run_evaluations('./convo_generation/output/20240306_convos_full.csv', './evaluation/output/20240306_adherence_results.json', 'adherence')

2024-03-29 16:02:38,785 - evaluation_pipeline - INFO: Running conversation: (1/81)
2024-03-29 16:02:38,785 - adherence_evaluation - INFO: Running iteration 0
2024-03-29 16:02:38,786 - adherence_evaluation - INFO: Running criterion: Organize by Targets (1/23)
2024-03-29 16:02:39,550 - adherence_evaluation - INFO: Running criterion: Emotion Focus (2/23)
2024-03-29 16:02:40,161 - adherence_evaluation - INFO: Running criterion: Describe Specifically (3/23)
2024-03-29 16:02:40,675 - adherence_evaluation - INFO: Running criterion: Chain Analysis (4/23)
2024-03-29 16:02:41,312 - adherence_evaluation - INFO: Running criterion: Teach New Information (5/23)
2024-03-29 16:02:41,799 - adherence_evaluation - INFO: Running criterion: Generate Solutions (6/23)
2024-03-29 16:02:42,269 - adherence_evaluation - INFO: Running criterion: Activate New Behavior (7/23)
2024-03-29 16:02:43,237 - adherence_evaluation - INFO: Running criterion: Provide Coaching Feedback (8/23)
2024-03-29 16:02:43,754 - adherenc

In [55]:
adherence_results = './evaluation/output/20240306_adherence_results.json'
with open(adherence_results) as adherence_results_json:
    adherence_results_dict = json.load(adherence_results_json)

# converting json dataset from dictionary to dataframe
adherence_results_df = pd.DataFrame(columns = ['conversation_id', 'criterion', 'score'])
for item in adherence_results_dict:
    for r, v in item['result'].items():
        adherence_results_df.loc[len(adherence_results_df)] = {'conversation_id': item['conversation_id'], 'criterion': r, 'score': v}

In [56]:
adherence_results_df

Unnamed: 0,conversation_id,criterion,score
0,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Organize by Targets,1.0
1,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Emotion Focus,1.0
2,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Describe Specifically,1.0
3,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Chain Analysis,0.0
4,bc97e560-2f17-40c6-bbbc-cf1455e5536b,Teach New Information,1.0
...,...,...,...
1858,feed36a3-d156-49b7-8f8d-d174472daa0d,Direct Confrontation,0.4
1859,feed36a3-d156-49b7-8f8d-d174472daa0d,Unorthodox Irreverence,1.0
1860,feed36a3-d156-49b7-8f8d-d174472daa0d,Balanced Style and Strategies,1.0
1861,feed36a3-d156-49b7-8f8d-d174472daa0d,Model Dialectical Thinking,0.8


### Skill Presence

In [4]:
run_evaluations('./convo_generation/output/20240306_convos_full.csv', './evaluation/output/20240306_skill_presence_results.json', 'skill_presence')

2024-04-02 01:58:30,547 - skill_presence_evaluation - INFO: Launching skill presence evaluation with following skill ids: ['M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'IE1', 'IE2', 'IE3', 'IE4', 'IE5', 'IE6', 'IE7', 'IE8', 'IE9', 'IE10', 'IE11', 'ER1', 'ER2', 'ER3', 'ER4', 'ER5', 'ER6', 'ER7', 'ER8', 'ER9', 'ER10', 'DT1', 'DT2', 'DT3', 'DT4', 'DT5', 'DT6', 'DT7', 'DT8', 'DT9', 'DT10', 'DT11', 'DT12', 'DT13', 'DT14', 'DT15', 'DT16']
2024-04-02 01:58:30,548 - evaluation_pipeline - INFO: Running conversation: (1/81)
2024-04-02 01:58:30,551 - evaluation_pipeline - INFO: Running conversation: (2/81)
2024-04-02 01:58:30,553 - evaluation_pipeline - INFO: Running conversation: (3/81)
2024-04-02 01:58:30,556 - evaluation_pipeline - INFO: Running conversation: (4/81)
2024-04-02 01:58:30,557 - evaluation_pipeline - INFO: Running conversation: (5/81)
2024-04-02 01:58:30,560 - evaluation_pipeline - INFO: Running conversation: (6/81)
2024-04-02 01:58:30,563 - evaluation_pipeline - INFO: Ru

In [6]:
skill_presence_results = './evaluation/output/20240306_skill_presence_results.json'
with open(skill_presence_results) as skill_presence_results_json:
    skill_presence_results_dict = json.load(skill_presence_results_json)

In [7]:
skill_presence_results_dict

[{'conversation_id': 'bc97e560-2f17-40c6-bbbc-cf1455e5536b',
  'evaluation_name': 'Skill Presence',
  'result': ['DT3']},
 {'conversation_id': '817c2b6f-51bf-401c-8b06-cdf9f83b5948',
  'evaluation_name': 'Skill Presence',
  'result': []},
 {'conversation_id': 'a26af177-cf8b-41f0-b286-3a1f536544af',
  'evaluation_name': 'Skill Presence',
  'result': []},
 {'conversation_id': '4f57e28b-1c11-437c-a067-a725c3b3dd32',
  'evaluation_name': 'Skill Presence',
  'result': ['M8']},
 {'conversation_id': '8ad366a9-643d-4a93-8716-f7df4447d79e',
  'evaluation_name': 'Skill Presence',
  'result': ['M3', 'IE2']},
 {'conversation_id': 'b652dc24-20e4-413c-b2b0-e9729d7d5c8c',
  'evaluation_name': 'Skill Presence',
  'result': ['M8']},
 {'conversation_id': '9c8ef3c4-2935-4ca8-b4a6-16984ae499be',
  'evaluation_name': 'Skill Presence',
  'result': ['ER7']},
 {'conversation_id': '011948a4-a9e8-4259-85b3-e2e50fd42e83',
  'evaluation_name': 'Skill Presence',
  'result': ['DT3']},
 {'conversation_id': 'c2289ef0

In [33]:
# converting json dataset from dictionary to dataframe
skills_detected_df = pd.DataFrame.from_records(
    skill_presence_results_dict
).set_index('conversation_id'
).drop(
    'evaluation_name',
    axis=1,
).rename(
    {'result': 'detected_skill_ids'},
    axis=1,
)

skills_detected_df

Unnamed: 0_level_0,detected_skill_ids
conversation_id,Unnamed: 1_level_1
bc97e560-2f17-40c6-bbbc-cf1455e5536b,[DT3]
817c2b6f-51bf-401c-8b06-cdf9f83b5948,[]
a26af177-cf8b-41f0-b286-3a1f536544af,[]
4f57e28b-1c11-437c-a067-a725c3b3dd32,[M8]
8ad366a9-643d-4a93-8716-f7df4447d79e,"[M3, IE2]"
...,...
1551bb94-6070-498f-a011-9407ed545c45,"[M3, IE2]"
6ac0e128-495a-4244-b60e-642e11cca7ea,[IE3]
711d32a0-4594-498f-9a2b-5f93f2346da7,[DT7]
31ee6726-f6d0-453f-9482-4e36b8dbdc04,[ER4]


In [18]:
prompts_df = pd.read_csv(
    './data/prompts_with_ids.tsv',
    delimiter='\t',
)
prompts_df['target_skill_ids'] = prompts_df['Target Skill IDs'].apply(ast.literal_eval)
prompts_df

Unnamed: 0,Initial Message,Target Skill IDs,id,target_skill_ids
0,My grandmother passed away two days ago and ev...,['M1'],bc97e560-2f17-40c6-bbbc-cf1455e5536b,[M1]
1,I have a hard time falling asleep at night. I ...,['M2'],817c2b6f-51bf-401c-8b06-cdf9f83b5948,[M2]
2,I have been ruminating before falling asleep a...,"['M3', 'ER8']",a26af177-cf8b-41f0-b286-3a1f536544af,"[M3, ER8]"
3,"Oftentimes when I'm with my friends, I am sitt...",['M4'],4f57e28b-1c11-437c-a067-a725c3b3dd32,[M4]
4,Everytime I try to bring up a concern with my ...,['M5'],8ad366a9-643d-4a93-8716-f7df4447d79e,[M5]
...,...,...,...,...
77,My partner and I are pretty sedentary. The doc...,['DT15'],1551bb94-6070-498f-a011-9407ed545c45,[DT15]
78,I was running late for my final so instead of ...,['IE9'],6ac0e128-495a-4244-b60e-642e11cca7ea,[IE9]
79,I wanted to pick up the habit of reading. I fo...,['IE10'],711d32a0-4594-498f-9a2b-5f93f2346da7,[IE10]
80,There have been rounds of layoffs happening at...,"['ER4', 'DT7']",31ee6726-f6d0-453f-9482-4e36b8dbdc04,"[ER4, DT7]"


## Analysis

In [57]:
# Summary Statistics
adherence_results_df.describe()

Unnamed: 0,score
count,1863.0
mean,0.68964
std,0.434811
min,0.0
25%,0.2
50%,1.0
75%,1.0
max,1.0
