### Response Generation

This file will generate the responses for CounselChat questions using 4 models

1. Base GPT (gpt-4o)
2. Fine-Tuned GPT (gpt-4o)
3. Base LLaMA (LLaMA-3.2 3B Instruct)
4. Fine-Tuned LLaMA

In [1]:
import os

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from openai import OpenAI
from tqdm import tqdm
import pandas as pd
import pickle

### Read the Processed CounselChat Dataset

In [None]:
dataset_name = "nbertagnolli/counsel-chat"
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=42)

In [None]:
dataset_df = dataset.to_pandas()
dataset_df.head()

In [None]:
dataset_df_top_votes = dataset_df.groupby('questionID').apply(lambda x: x.sort_values('upvotes', ascending=False).iloc[0], include_groups=False).reset_index()
dataset_df_top_votes

In [None]:
dataset_df_top_votes['question'] = dataset_df_top_votes['questionText'] + " " + dataset_df_top_votes['questionTitle']
dataset_df_top_votes

In [None]:
dataset_df_final = dataset_df_top_votes[['topic', 'question', 'answerText']]
dataset_df_final

### OpenAI Configuration and Responses

In [None]:
with open("../../api.key", 'r') as file:
    openai_api_key = file.read()

openai_client = OpenAI(api_key=openai_api_key)

### OpenAI Base Model

In [None]:
system_prompt_qa = 'You are an expert mental-health counsellor'
user_prompt_qa = '''A patient is suffering from ill mental health. The patient writes the following thoughts on a social media platform:

{question}

You need to respond to the user in a way that improves their overall mental health. You must return response in a json serializable format as following {{response: response_text}}
'''

In [None]:
def get_openai_response(system_prompt: str, user_prompt: str) -> str:
        
    completion = openai_client.chat.completions.create(
    model="gpt-4o",
    temperature=0,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
        ]
    )

    openai_response = completion.choices[0].message.content
    
    return openai_response

Get GPT Responses

In [None]:
# gpt_responses = []
# for index, row in tqdm(dataset_df_final.iterrows(), total=len(dataset_df_final)):
#     question_input = row['question']
#     gpt_resp = get_openai_response(system_prompt=system_prompt_qa, user_prompt_qa.format(question = question_input))
#     try:
#         gpt_answer = json.loads(gpt_resp.split("```")[1].replace('json',''))['response']
#         gpt_responses.append(gpt_answer)
#     except:
#         gpt_responses.append(gpt_resp)

# with open('response_generation_data/openai_que_resp.pkl', 'wb') as file:
#     pickle.dump(gpt_responses, file)

In [None]:
with open('response_generation_data/openai_que_resp.pkl', 'rb') as file:
    gpt_responses = pickle.load(file)

In [None]:
dataset_df_final['gpt_responses'] = gpt_responses
dataset_df_final.head()

### OpenAI Fine-Tuned Model Response

In [None]:
def get_openai_response_finetuned(system_prompt: str, user_prompt: str) -> str:
        
    completion = openai_client.chat.completions.create(
    model="ft:gpt-4o-2024-08-06:university-of-texas-at-austin:counselchat-clean:BE3PqwuO",
    temperature=0,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
        ],
    max_tokens=2048
    )

    openai_response = completion.choices[0].message.content
    
    return openai_response

In [None]:
# gpt_responses_ft = []
# system_prompt_qa = 'You are an expert mental health professional trained to counsel and guide patients suffering from ill mental-health'
# for index, row in tqdm(dataset_df_final.iterrows(), total=len(dataset_df_final)):
#     question_input = row['question']
#     try:
#         gpt_resp = get_openai_response_finetuned(system_prompt=system_prompt_qa, user_prompt=question_input)
#         gpt_responses_ft.append(gpt_resp)
#     except:
#         gpt_responses_ft.append('')
        
# with open('response_generation_data/openai_ft_que_resp.pkl', 'wb') as file:
#     pickle.dump(gpt_responses_ft, file)

In [None]:
with open('response_generation_data/openai_ft_que_resp.pkl', 'rb') as file:
    gpt_responses_ft = pickle.load(file)

In [None]:
dataset_df_final['gpt_responses_ft'] = gpt_responses_ft
dataset_df_final.head()

### Inferencing from LLAMA Base Model

In [None]:
with open('hf_token.key', 'r') as f:
    hf_token = f.read()

model_id = "meta-llama/Llama-3.2-3B-Instruct"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
tokenizer.model_max_length = 2048

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto") # Must be float32 for MacBooks!
model.config.pad_token_id = tokenizer.pad_token_id # Updating the model config to use the special pad token

In [None]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [None]:
def get_llama_response(question_inputs: str):
    
    llama_inputs = [[{"role": "user", "content": question}] for question in question_inputs]

    texts = tokenizer.apply_chat_template(llama_inputs, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(texts, padding="longest", truncation=True, return_tensors="pt")
    inputs = {key: val.to(model.device) for key, val in inputs.items()}
    temp_texts = tokenizer.batch_decode(inputs['input_ids'], skip_special_tokens=True)
    
    gen_tokens = model.generate(
        **inputs, 
        max_new_tokens=2048, 
        pad_token_id=tokenizer.pad_token_id, 
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.7,
        # top_p=0.9
    )

    gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
    gen_text = [i[len(temp_texts[idx]):] for idx, i in enumerate(gen_text)]
    
    return gen_text

In [None]:
batch_size = 100
question_list = dataset_df_final['question'].to_list()
batch_indices = np.arange(0, len(question_list), batch_size)
if batch_indices[-1] != len(question_list):
    batch_indices = np.append(batch_indices, len(question_list))

In [None]:
# llama_responses_base = []
# for i in tqdm(range(0, len(batch_indices) - 1)):
#     questions_input = question_list[batch_indices[i]:batch_indices[i+1]]
#     llama_resp = get_llama_response(questions_input)
#     llama_responses_base = llama_responses_base + llama_resp

# with open('response_generation_data/llama_que_resp_base.pkl', 'wb') as file:
#     pickle.dump(llama_responses_base, file)

In [None]:
with open('response_generation_data/llama_que_resp_base.pkl', 'rb') as file:
    llama_responses_base = pickle.load(file)

In [None]:
dataset_df_final['llama_responses_base'] = llama_responses_base
dataset_df_final.head()

### Inferencing from LLaMA Fine-Tune Model

In [None]:
model_id = "llama32-sft-fine-tune-counselchat"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = "left"
tokenizer.model_max_length = 2048

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto") # Must be float32 for MacBooks!
model.config.pad_token_id = tokenizer.pad_token_id # Updating the model config to use the special pad token

In [None]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [None]:
def get_llama_response(question_inputs: str):
    
    llama_inputs = [[{"role": "user", "content": question}] for question in question_inputs]

    texts = tokenizer.apply_chat_template(llama_inputs, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(texts, padding="longest", truncation=True, return_tensors="pt")
    inputs = {key: val.to(model.device) for key, val in inputs.items()}
    temp_texts = tokenizer.batch_decode(inputs['input_ids'], skip_special_tokens=True)
    
    gen_tokens = model.generate(
        **inputs, 
        max_new_tokens=2048, 
        pad_token_id=tokenizer.pad_token_id, 
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.7,
        # top_p=0.9
    )

    gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
    gen_text = [i[len(temp_texts[idx]):] for idx, i in enumerate(gen_text)]
    
    return gen_text

In [None]:
batch_size = 100
question_list = dataset_df_final['question'].to_list()
batch_indices = np.arange(0, len(question_list), batch_size)
if batch_indices[-1] != len(question_list):
    batch_indices = np.append(batch_indices, len(question_list))

In [None]:
# llama_responses = []
# for i in tqdm(range(0, len(batch_indices) - 1)):
#     questions_input = question_list[batch_indices[i]:batch_indices[i+1]]
#     llama_resp = get_llama_response(questions_input)
#     llama_responses = llama_responses + llama_resp

# with open('response_generation_data/llama_que_resp.pkl', 'wb') as file:
#     pickle.dump(llama_responses, file)

In [None]:
with open('response_generation_data/llama_que_resp.pkl', 'rb') as file:
    llama_responses = pickle.load(file)

In [None]:
dataset_df_final['llama_responses'] = llama_responses
dataset_df_final.head()