In [1]:
import os

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "4"

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from openai import OpenAI
from tqdm import tqdm
import pandas as pd
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


### Get the CounselChat Dataset

In [4]:
dataset_name = "nbertagnolli/counsel-chat"
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=42)

Repo card metadata block was not found. Setting CardData to empty.


In [5]:
dataset_df = dataset.to_pandas()
dataset_df.head()

Unnamed: 0,questionID,questionTitle,questionText,questionLink,topic,therapistInfo,therapistURL,answerText,upvotes,views
0,569,How do you get over someone cheating on you wh...,My boyfriend of five years told me he cheated ...,https://counselchat.com/questions/how-do-you-g...,intimacy,"Lynda MartensMarriage & Family Therapist, MSc,...",https://counselchat.com/therapists/ms-lynda-ma...,"Hi Michigan,This is a common issue. How do you...",2,130
1,787,Is it acceptable for me to find another home f...,My boyfriend moved in with me a few months ago...,https://counselchat.com/questions/is-it-accept...,relationships,"Sherry Katz, LCSWCouples and Family Therapist,...",https://counselchat.com/therapists/sherry-katz...,Every living creature needs time and support t...,2,66
2,229,Am I somehow stressing myself out?,"I think I'm annoying and bothersome, but my gi...",https://counselchat.com/questions/am-i-somehow...,anxiety,"Lynda MartensMarriage & Family Therapist, MSc,...",https://counselchat.com/therapists/ms-lynda-ma...,"""Where you put your attention is where you wil...",2,271
3,933,How would I know if I have the right therapist?,How do you know you have the right therapist f...,https://counselchat.com/questions/how-would-i-...,counseling-fundamentals,"Jane M Floyd, PsyDOur life is shaped by our mi...",https://counselchat.com/therapists/jane-m-floy...,A therapist should be willing to have an initi...,0,29
4,929,Why can't I stop these thoughts?,I keep having these random thoughts that I don...,https://counselchat.com/questions/why-can-t-i-...,diagnosis,"Sherry Katz, LCSWCouples and Family Therapist,...",https://counselchat.com/therapists/sherry-katz...,Those critical thoughts most likely came into...,0,740


In [6]:
dataset_df_top_votes = dataset_df.groupby('questionID').apply(lambda x: x.sort_values('upvotes', ascending=False).iloc[0], include_groups=False).reset_index()
dataset_df_top_votes

Unnamed: 0,questionID,questionTitle,questionText,questionLink,topic,therapistInfo,therapistURL,answerText,upvotes,views
0,0,Do I have too many issues for counseling?,I have so many issues to address. I have a his...,https://counselchat.com/questions/do-i-have-to...,depression,Jennifer MolinariHypnotherapist & Licensed Cou...,https://counselchat.com/therapists/jennifer-mo...,It is very common for people to have multiple ...,3,1971
1,1,My apartment manager won't let me keep an emot...,I have been diagnosed with general anxiety and...,https://counselchat.com/questions/my-apartment...,depression,"Eric Ström, JD, MA, LMHCAttorney & Licensed Me...",https://counselchat.com/therapists/eric-str-m-...,"This can be a difficult situation. Typically,...",2,1026
2,2,I feel like my mother doesn't support me,My mother is combative with me when I say I do...,https://counselchat.com/questions/i-feel-like-...,depression,Dr. Meredyth LawrynceServing Clients Nationwide,https://counselchat.com/therapists/dr-meredyth...,Do you live with your mom and have constant in...,2,187
3,3,Why do I feel like I don't belong anywhere?,There are many people willing to lovingly prov...,https://counselchat.com/questions/why-do-i-fee...,depression,Dr. Meredyth LawrynceServing Clients Nationwide,https://counselchat.com/therapists/dr-meredyth...,I truly understand what you are saying. I want...,1,62
4,4,How can I help my girlfriend?,My girlfriend just quit drinking and she becam...,https://counselchat.com/questions/how-can-i-he...,depression,"Kristi King-Morgan, LMSWSocial Worker, Psychot...",https://counselchat.com/therapists/kristi-king...,You're probably not going to like my answer.Yo...,3,824
...,...,...,...,...,...,...,...,...,...,...
935,935,How does counseling help people?,Does counseling really do anything that can he...,https://counselchat.com/questions/how-does-cou...,counseling-fundamentals,Traci LowenthalLGBTQAffirmative Therapist,https://counselchat.com/therapists/traci-lowen...,Hi! Great question. I believe counseling doe...,3,1007
936,936,How does someone approach a counselor?,How does a person start the counseling process?,https://counselchat.com/questions/how-does-som...,counseling-fundamentals,Cimberly R. NeskerRegistered Psychotherapist (...,https://counselchat.com/therapists/cimberly-r-...,"Hello, There are many ways to approach a couns...",3,2597
937,937,What happens in a counseling session?,"After first meeting the client, what is the pr...",https://counselchat.com/questions/what-happens...,counseling-fundamentals,2nd Chance Counseling Service Online Addiction...,https://counselchat.com/therapists/2nd-chance-...,A good therapist will discuss what brought you...,0,36
938,938,What are the skills needed by a counselor?,What are the basic skills a good counselor nee...,https://counselchat.com/questions/what-are-the...,counseling-fundamentals,Kevin MimmsBuilding Better Connections,https://counselchat.com/therapists/kevin-mimms,"Here is a list of skills I use every session, ...",0,123


In [7]:
dataset_df_top_votes['question'] = dataset_df_top_votes['questionText'] + " " + dataset_df_top_votes['questionTitle']
dataset_df_top_votes

Unnamed: 0,questionID,questionTitle,questionText,questionLink,topic,therapistInfo,therapistURL,answerText,upvotes,views,question
0,0,Do I have too many issues for counseling?,I have so many issues to address. I have a his...,https://counselchat.com/questions/do-i-have-to...,depression,Jennifer MolinariHypnotherapist & Licensed Cou...,https://counselchat.com/therapists/jennifer-mo...,It is very common for people to have multiple ...,3,1971,I have so many issues to address. I have a his...
1,1,My apartment manager won't let me keep an emot...,I have been diagnosed with general anxiety and...,https://counselchat.com/questions/my-apartment...,depression,"Eric Ström, JD, MA, LMHCAttorney & Licensed Me...",https://counselchat.com/therapists/eric-str-m-...,"This can be a difficult situation. Typically,...",2,1026,I have been diagnosed with general anxiety and...
2,2,I feel like my mother doesn't support me,My mother is combative with me when I say I do...,https://counselchat.com/questions/i-feel-like-...,depression,Dr. Meredyth LawrynceServing Clients Nationwide,https://counselchat.com/therapists/dr-meredyth...,Do you live with your mom and have constant in...,2,187,My mother is combative with me when I say I do...
3,3,Why do I feel like I don't belong anywhere?,There are many people willing to lovingly prov...,https://counselchat.com/questions/why-do-i-fee...,depression,Dr. Meredyth LawrynceServing Clients Nationwide,https://counselchat.com/therapists/dr-meredyth...,I truly understand what you are saying. I want...,1,62,There are many people willing to lovingly prov...
4,4,How can I help my girlfriend?,My girlfriend just quit drinking and she becam...,https://counselchat.com/questions/how-can-i-he...,depression,"Kristi King-Morgan, LMSWSocial Worker, Psychot...",https://counselchat.com/therapists/kristi-king...,You're probably not going to like my answer.Yo...,3,824,My girlfriend just quit drinking and she becam...
...,...,...,...,...,...,...,...,...,...,...,...
935,935,How does counseling help people?,Does counseling really do anything that can he...,https://counselchat.com/questions/how-does-cou...,counseling-fundamentals,Traci LowenthalLGBTQAffirmative Therapist,https://counselchat.com/therapists/traci-lowen...,Hi! Great question. I believe counseling doe...,3,1007,Does counseling really do anything that can he...
936,936,How does someone approach a counselor?,How does a person start the counseling process?,https://counselchat.com/questions/how-does-som...,counseling-fundamentals,Cimberly R. NeskerRegistered Psychotherapist (...,https://counselchat.com/therapists/cimberly-r-...,"Hello, There are many ways to approach a couns...",3,2597,How does a person start the counseling process...
937,937,What happens in a counseling session?,"After first meeting the client, what is the pr...",https://counselchat.com/questions/what-happens...,counseling-fundamentals,2nd Chance Counseling Service Online Addiction...,https://counselchat.com/therapists/2nd-chance-...,A good therapist will discuss what brought you...,0,36,"After first meeting the client, what is the pr..."
938,938,What are the skills needed by a counselor?,What are the basic skills a good counselor nee...,https://counselchat.com/questions/what-are-the...,counseling-fundamentals,Kevin MimmsBuilding Better Connections,https://counselchat.com/therapists/kevin-mimms,"Here is a list of skills I use every session, ...",0,123,What are the basic skills a good counselor nee...


In [8]:
dataset_df_final = dataset_df_top_votes[['topic', 'question', 'answerText']]
dataset_df_final

Unnamed: 0,topic,question,answerText
0,depression,I have so many issues to address. I have a his...,It is very common for people to have multiple ...
1,depression,I have been diagnosed with general anxiety and...,"This can be a difficult situation. Typically,..."
2,depression,My mother is combative with me when I say I do...,Do you live with your mom and have constant in...
3,depression,There are many people willing to lovingly prov...,I truly understand what you are saying. I want...
4,depression,My girlfriend just quit drinking and she becam...,You're probably not going to like my answer.Yo...
...,...,...,...
935,counseling-fundamentals,Does counseling really do anything that can he...,Hi! Great question. I believe counseling doe...
936,counseling-fundamentals,How does a person start the counseling process...,"Hello, There are many ways to approach a couns..."
937,counseling-fundamentals,"After first meeting the client, what is the pr...",A good therapist will discuss what brought you...
938,counseling-fundamentals,What are the basic skills a good counselor nee...,"Here is a list of skills I use every session, ..."


### OpenAI Configuration and Responses

In [9]:
with open("../../api.key", 'r') as file:
    openai_api_key = file.read()

openai_client = OpenAI(api_key=openai_api_key)

### OpenAI Base Model

In [10]:
system_prompt_qa = 'You are an expert mental-health counsellor'
user_prompt_qa = '''A patient is suffering from ill mental health. The patient writes the following thoughts on a social media platform:

{question}

You need to respond to the user in a way that improves their overall mental health. You must return response in a json serializable format as following {{response: response_text}}
'''

In [11]:
def get_openai_response(system_prompt: str, user_prompt: str) -> str:
        
    completion = openai_client.chat.completions.create(
    model="gpt-4o",
    temperature=0,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
        ]
    )

    openai_response = completion.choices[0].message.content
    
    return openai_response

Get GPT Responses

In [None]:
# gpt_responses = []
# for index, row in tqdm(dataset_df_final.iterrows(), total=len(dataset_df_final)):
#     question_input = row['question']
#     gpt_resp = get_openai_response(system_prompt=system_prompt_qa, user_prompt_qa.format(question = question_input))
#     try:
#         gpt_answer = json.loads(gpt_resp.split("```")[1].replace('json',''))['response']
#         gpt_responses.append(gpt_answer)
#     except:
#         gpt_responses.append(gpt_resp)

# with open('response_generation_data/openai_que_resp.pkl', 'wb') as file:
#     pickle.dump(gpt_responses, file)

In [None]:
with open('response_generation_data/openai_que_resp.pkl', 'rb') as file:
    gpt_responses = pickle.load(file)

In [14]:
dataset_df_final['gpt_responses'] = gpt_responses
dataset_df_final.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset_df_final['gpt_responses'] = gpt_responses


Unnamed: 0,topic,question,answerText,gpt_responses
0,depression,I have so many issues to address. I have a his...,It is very common for people to have multiple ...,"First, I want to acknowledge your incredible s..."
1,depression,I have been diagnosed with general anxiety and...,"This can be a difficult situation. Typically,...",I'm sorry to hear about the difficulties you'r...
2,depression,My mother is combative with me when I say I do...,Do you live with your mom and have constant in...,I'm sorry to hear that you're going through th...
3,depression,There are many people willing to lovingly prov...,I truly understand what you are saying. I want...,Thank you for sharing your feelings. It's not ...
4,depression,My girlfriend just quit drinking and she becam...,You're probably not going to like my answer.Yo...,It's great that you want to support your girlf...


### OpenAI Fine-Tuned Model Response

In [15]:
def get_openai_response_finetuned(system_prompt: str, user_prompt: str) -> str:
        
    completion = openai_client.chat.completions.create(
    model="ft:gpt-4o-2024-08-06:university-of-texas-at-austin:counselchat-clean:BE3PqwuO",
    temperature=0,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
        ],
    max_tokens=2048
    )

    openai_response = completion.choices[0].message.content
    
    return openai_response

In [None]:
# gpt_responses_ft = []
# system_prompt_qa = 'You are an expert mental health professional trained to counsel and guide patients suffering from ill mental-health'
# for index, row in tqdm(dataset_df_final.iterrows(), total=len(dataset_df_final)):
#     question_input = row['question']
#     try:
#         gpt_resp = get_openai_response_finetuned(system_prompt=system_prompt_qa, user_prompt=question_input)
#         gpt_responses_ft.append(gpt_resp)
#     except:
#         gpt_responses_ft.append('')
        
# with open('response_generation_data/openai_ft_que_resp.pkl', 'wb') as file:
#     pickle.dump(gpt_responses_ft, file)

In [None]:
with open('response_generation_data/openai_ft_que_resp.pkl', 'rb') as file:
    gpt_responses_ft = pickle.load(file)

In [18]:
dataset_df_final['gpt_responses_ft'] = gpt_responses_ft
dataset_df_final.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset_df_final['gpt_responses_ft'] = gpt_responses_ft


Unnamed: 0,topic,question,answerText,gpt_responses,gpt_responses_ft
0,depression,I have so many issues to address. I have a his...,It is very common for people to have multiple ...,"First, I want to acknowledge your incredible s...","Hi, and thanks for your question. I think it's..."
1,depression,I have been diagnosed with general anxiety and...,"This can be a difficult situation. Typically,...",I'm sorry to hear about the difficulties you'r...,I would suggest that you contact your local fa...
2,depression,My mother is combative with me when I say I do...,Do you live with your mom and have constant in...,I'm sorry to hear that you're going through th...,"Hi, and thanks for your question. I can see ho..."
3,depression,There are many people willing to lovingly prov...,I truly understand what you are saying. I want...,Thank you for sharing your feelings. It's not ...,I wonder if you are feeling like you don't bel...
4,depression,My girlfriend just quit drinking and she becam...,You're probably not going to like my answer.Yo...,It's great that you want to support your girlf...,"Hi, and thanks for your question. I can unders..."


### Inferencing from LLAMA Base Model

In [19]:
with open('hf_token.key', 'r') as f:
    hf_token = f.read()

model_id = "meta-llama/Llama-3.2-1B-Instruct"

In [20]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
tokenizer.model_max_length = 2048

In [21]:
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto") # Must be float32 for MacBooks!
model.config.pad_token_id = tokenizer.pad_token_id # Updating the model config to use the special pad token

In [22]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [23]:
def get_llama_response(question_inputs: str):
    
    llama_inputs = [[{"role": "user", "content": question}] for question in question_inputs]

    texts = tokenizer.apply_chat_template(llama_inputs, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(texts, padding="longest", truncation=True, return_tensors="pt")
    inputs = {key: val.to(model.device) for key, val in inputs.items()}
    temp_texts = tokenizer.batch_decode(inputs['input_ids'], skip_special_tokens=True)
    
    gen_tokens = model.generate(
        **inputs, 
        max_new_tokens=2048, 
        pad_token_id=tokenizer.pad_token_id, 
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.7,
        # top_p=0.9
    )

    gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
    gen_text = [i[len(temp_texts[idx]):] for idx, i in enumerate(gen_text)]
    
    return gen_text

In [24]:
batch_size = 100
question_list = dataset_df_final['question'].to_list()
batch_indices = np.arange(0, len(question_list), batch_size)
if batch_indices[-1] != len(question_list):
    batch_indices = np.append(batch_indices, len(question_list))

In [None]:
# llama_responses_base = []
# for i in tqdm(range(0, len(batch_indices) - 1)):
#     questions_input = question_list[batch_indices[i]:batch_indices[i+1]]
#     llama_resp = get_llama_response(questions_input)
#     llama_responses_base = llama_responses_base + llama_resp

# with open('response_generation_data/llama_que_resp_base.pkl', 'wb') as file:
#     pickle.dump(llama_responses_base, file)

In [None]:
with open('response_generation_data/llama_que_resp_base.pkl', 'rb') as file:
    llama_responses_base = pickle.load(file)

In [27]:
dataset_df_final['llama_responses_base'] = llama_responses_base
dataset_df_final.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset_df_final['llama_responses_base'] = llama_responses_base


Unnamed: 0,topic,question,answerText,gpt_responses,gpt_responses_ft,llama_responses_base
0,depression,I have so many issues to address. I have a his...,It is very common for people to have multiple ...,"First, I want to acknowledge your incredible s...","Hi, and thanks for your question. I think it's...",I'm so sorry to hear that you're going through...
1,depression,I have been diagnosed with general anxiety and...,"This can be a difficult situation. Typically,...",I'm sorry to hear about the difficulties you'r...,I would suggest that you contact your local fa...,I can sense your frustration and concern. It's...
2,depression,My mother is combative with me when I say I do...,Do you live with your mom and have constant in...,I'm sorry to hear that you're going through th...,"Hi, and thanks for your question. I can see ho...",It's not uncommon for family members to strugg...
3,depression,There are many people willing to lovingly prov...,I truly understand what you are saying. I want...,Thank you for sharing your feelings. It's not ...,I wonder if you are feeling like you don't bel...,Feeling like you don't belong in your current ...
4,depression,My girlfriend just quit drinking and she becam...,You're probably not going to like my answer.Yo...,It's great that you want to support your girlf...,"Hi, and thanks for your question. I can unders...",It's great that you want to support your girlf...


### Inferencing from LLaMA Fine-Tune Model

In [28]:
model_id = "llama32-sft-fine-tune-counselchat"

In [29]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = "left"
tokenizer.model_max_length = 2048

In [30]:
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto") # Must be float32 for MacBooks!
model.config.pad_token_id = tokenizer.pad_token_id # Updating the model config to use the special pad token

In [31]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [32]:
def get_llama_response(question_inputs: str):
    
    llama_inputs = [[{"role": "user", "content": question}] for question in question_inputs]

    texts = tokenizer.apply_chat_template(llama_inputs, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(texts, padding="longest", truncation=True, return_tensors="pt")
    inputs = {key: val.to(model.device) for key, val in inputs.items()}
    temp_texts = tokenizer.batch_decode(inputs['input_ids'], skip_special_tokens=True)
    
    gen_tokens = model.generate(
        **inputs, 
        max_new_tokens=2048, 
        pad_token_id=tokenizer.pad_token_id, 
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.7,
        # top_p=0.9
    )

    gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
    gen_text = [i[len(temp_texts[idx]):] for idx, i in enumerate(gen_text)]
    
    return gen_text

In [33]:
batch_size = 100
question_list = dataset_df_final['question'].to_list()
batch_indices = np.arange(0, len(question_list), batch_size)
if batch_indices[-1] != len(question_list):
    batch_indices = np.append(batch_indices, len(question_list))

In [None]:
# llama_responses = []
# for i in tqdm(range(0, len(batch_indices) - 1)):
#     questions_input = question_list[batch_indices[i]:batch_indices[i+1]]
#     llama_resp = get_llama_response(questions_input)
#     llama_responses = llama_responses + llama_resp

# with open('response_generation_data/llama_que_resp.pkl', 'wb') as file:
#     pickle.dump(llama_responses, file)

In [None]:
with open('response_generation_data/llama_que_resp.pkl', 'rb') as file:
    llama_responses = pickle.load(file)

In [36]:
dataset_df_final['llama_responses'] = llama_responses
dataset_df_final.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset_df_final['llama_responses'] = llama_responses


Unnamed: 0,topic,question,answerText,gpt_responses,gpt_responses_ft,llama_responses_base,llama_responses
0,depression,I have so many issues to address. I have a his...,It is very common for people to have multiple ...,"First, I want to acknowledge your incredible s...","Hi, and thanks for your question. I think it's...",I'm so sorry to hear that you're going through...,No. I think that's a common misconception tha...
1,depression,I have been diagnosed with general anxiety and...,"This can be a difficult situation. Typically,...",I'm sorry to hear about the difficulties you'r...,I would suggest that you contact your local fa...,I can sense your frustration and concern. It's...,I can understand how frustrating it must be to...
2,depression,My mother is combative with me when I say I do...,Do you live with your mom and have constant in...,I'm sorry to hear that you're going through th...,"Hi, and thanks for your question. I can see ho...",It's not uncommon for family members to strugg...,I am sorry to hear about the difficulties you ...
3,depression,There are many people willing to lovingly prov...,I truly understand what you are saying. I want...,Thank you for sharing your feelings. It's not ...,I wonder if you are feeling like you don't bel...,Feeling like you don't belong in your current ...,Hello! I'm so sorry you're feeling this way. F...
4,depression,My girlfriend just quit drinking and she becam...,You're probably not going to like my answer.Yo...,It's great that you want to support your girlf...,"Hi, and thanks for your question. I can unders...",It's great that you want to support your girlf...,I am sorry to hear that you and your girlfrien...
