# Importing required libraries

In [None]:
!pip install transformers torch accelerate tensorflow-hub bert-tensorflow tensorflow tqdm bert-score

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM, Trainer, TrainingArguments, BertTokenizer, BertForSequenceClassification, MarianMTModel, MarianTokenizer, BertConfig
import torch
from transformers import BertTokenizer, BertForSequenceClassification, TFBertForSequenceClassification
from transformers import LlamaTokenizer, LlamaForCausalLM
from transformers import pipeline
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample
import pandas as pd
import tensorflow_hub as hub
import tensorflow as tf
from datetime import datetime
from torch.utils.data import DataLoader
import re
import nltk
from nltk.corpus import wordnet
import random
from tqdm import tqdm
import concurrent.futures
from nltk.tokenize import sent_tokenize
import matplotlib.pyplot as plt
import json
import numpy as np
from bert_score import score
from google.colab import drive
drive.mount("/content/drive")


# Importing user summaries and risk levels

In [None]:
summary_path = "/content/drive/My Drive/Diss_Dataset/summary_by_user.csv"
summary_df = pd.read_csv(summary_path)
summary_df.head()

user_risk_supportiveness_path = "/content/drive/My Drive/Diss_Dataset/user_risk_with_supportiveness.csv"
risk_supportiveness_df = pd.read_csv(user_risk_supportiveness_path)

# Formatting the summary with the risk level and number of supportive sentences

In [None]:
def create_summary(summary,userid,risk_supportiveness_df):
    risk_info = risk_supportiveness_df[risk_supportiveness_df['userid'] == userid]
    if not risk_info.empty:
        risk_val = risk_info['risk_rating'].values[0]
        risk = "Very High" if risk_val == 5 else "High" if risk_val == 4 else "Medium" if risk_val == 3 else "Low" if risk_val == 2 else "Very Low" if risk_val == 1 else "No"
        supportiveness_ratio = risk_info['supportiveness_ratio'].values[0]
    else:
        risk = "Unknown"
        supportiveness_ratio = "Unknown"

    supportive_percentage = (supportiveness_ratio * 100).round(2)

    full_summary = f"The author indicates {risk} suicidal risk, with {supportive_percentage}% of their sentences supporting other users. {summary}"
    return full_summary


# Clearing GPU RAM between models

In [None]:
import gc
del model1
del tokenizer1
torch.cuda.empty_cache()
gc.collect()

In [None]:
torch.cuda.empty_cache()

# Initialising a model

In [None]:
model_name1 = "/content/drive/My Drive/Diss_Dataset/DPO7b"
tokenizer1 = AutoTokenizer.from_pretrained(model_name1)
model1 = AutoModelForCausalLM.from_pretrained(model_name1, device_map="auto")

In [None]:
model_name1 = "/content/drive/My Drive/Diss_Dataset/Mistral7b"
tokenizer1 = AutoTokenizer.from_pretrained(model_name1)
model1 = AutoModelForCausalLM.from_pretrained(model_name1, torch_dtype=torch.bfloat16, device_map="auto")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Prompts for Tulu-2-DPO-7b

In [None]:
Zero_prompt_model1 = """
<|user|> In one paragraph, produce a response to the patient (referred to as the author), based on the provided summary.\n
Assume the patient is based in the UK with the following helplines:\n
Emergency Services: 999.
Samaritans helpline: 116 123.
SHOUT texting helpline: 85258.

Input summary to be used for response: {summary}\n

<|assistant|> Response:\n

"""

In [None]:
CoT_prompt_model1 = """
<|user|>
As a mental health expert, your task is to provide a positive, empathetic and supportative message to the patient (referred to as the author), directly from the provided summary.

Consider:
**
Consider these suggested response stragegies by potiential characteristics of the message, only include the response strategies most related to the patients's needs:
Loss of control:  Advising, Interpretation, Offering Group Support, Emotional Support.
Acute loneliness: Offering Group Support, Cogitive Change Inducement, Interpretation.
Emptiness: Persuasion, Interpretation, Cognitive Change Inducement, Advising.
Narcissistic wound: Empowerment, Interpretation, Persuasion, Emotional Support.
Irreversibility: Emotional Support, Advising, Persuasion, Offering Group Support.
Loss of energy: Empowerment, Emotional Support, Advising.
Emotional flooding: Persuasion, Empowerment, Advising.
Cognitive attribition: Cognitive change inducement, Persuasion, Empowerment.
Level of risk: Persuasion, Offering group support, Advising.

Also, referring the customer to get further help may be useful in extreme cases.
For this task, all referrals must go to UK helplines:

For urgent risk cases - Emergency Services: 999.
For high risk cases - Samaritans: 116 123.
For high risk cases where the patient might not want to call - Text SHOUT: 85258.
**

Input summary to be used for response: {summary}\n

<|assistant|> Response:\n

"""

# Creating responses for chosen model

In [None]:
def extract_assistant_response(text):
    assistant_marker = "Response:"
    if assistant_marker in text:
        summary = text.split(assistant_marker)[1].strip()
    else:
        summary = text.strip()
    response = summary.split('\n\n\n')[0].strip()

    return response

results = []

for user_id in summary_df['userid'].unique():
  risk_rating = risk_supportiveness_df[risk_supportiveness_df['userid'] == user_id]['risk_rating'].iloc[0]
  #if risk_rating > 2:
    print(user_id)
    summary = summary_df[summary_df['userid'] == user_id]['summary'].values[0]
    full_summary = create_summary(summary,user_id,risk_supportiveness_df)
    input_text_zero = Zero_prompt_model1.format(summary=full_summary)
    cot_text_zero = CoT_prompt_model1.format(summary=full_summary)
    input_zero = tokenizer1(input_text_zero, return_tensors="pt").to("cuda")
    cot_zero = tokenizer1(cot_text_zero, return_tensors="pt").to("cuda")
    outputs_zero = model1.generate(**input_zero, max_new_tokens=500)
    outputs_cot = model1.generate(**cot_zero, max_new_tokens=500)

    response_zero = extract_assistant_response(tokenizer1.decode(outputs_zero[0], skip_special_tokens=True))
    response_cot = extract_assistant_response(tokenizer1.decode(outputs_cot[0], skip_special_tokens=True))

    results.append({
        'user_id': user_id,
        'summary': full_summary,
        'response_zero': response_zero,
        'response_cot': response_cot
    })

In [None]:
print(results)

In [None]:
!pip install textstat

# Initialising and running response tests for Tulu Model


In [None]:
import textstat
from concurrent.futures import ThreadPoolExecutor
sentiment_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model_name)
empathy_model_name = "bdotloh/roberta-base-empathy"
empathy_tokenizer = AutoTokenizer.from_pretrained(empathy_model_name)
empathy_model = AutoModelForSequenceClassification.from_pretrained(empathy_model_name)
empathy_pipeline = pipeline("text-classification", model=empathy_model, tokenizer=empathy_tokenizer)

def flesch(response):
    return textstat.flesch_reading_ease(response), textstat.flesch_kincaid_grade(response)

def bert_score(summary, response):
    P, R, F1 = score([response], [summary], lang="en", verbose=False)
    return P.mean().item(), R.mean().item(), F1.mean().item()

def sentiment_analysis_batch(responses):
    return [result['score'] for result in sentiment_pipeline(responses, batch_size=len(responses))]

def empathy_detection_batch(responses):
    return [result['score'] for result in empathy_pipeline(responses, batch_size=len(responses))]


In [None]:
flesch_readability_zero, flesch_grade_zero = [], []
flesch_readability_cot, flesch_grade_cot = [], []
bert_precision_zero, bert_precision_cot = [], []
bert_recall_zero, bert_recall_cot = [], []
sentiment_zero, sentiment_cot = [], []
empathy_zero, empathy_cot = [], []
responses_zero = [result['response_zero'] for result in results]
responses_cot = [result['response_cot'] for result in results]
summaries = [result['summary'] for result in results]

with ThreadPoolExecutor() as executor:
    flesch_results_zero = list(executor.map(flesch, responses_zero))
    flesch_results_cot = list(executor.map(flesch, responses_cot))

flesch_readability_zero, flesch_grade_zero = zip(*flesch_results_zero)
flesch_readability_cot, flesch_grade_cot = zip(*flesch_results_cot)

with ThreadPoolExecutor() as executor:
    bert_scores_zero = list(executor.map(bert_score, summaries, responses_zero))
    bert_scores_cot = list(executor.map(bert_score, summaries, responses_cot))

bert_precision_zero, bert_recall_zero = zip(*[(bp[0], bp[1]) for bp in bert_scores_zero])
bert_precision_cot, bert_recall_cot = zip(*[(bp[0], bp[1]) for bp in bert_scores_cot])

sentiment_zero = sentiment_analysis_batch(responses_zero)
sentiment_cot = sentiment_analysis_batch(responses_cot)
empathy_zero = empathy_detection_batch(responses_zero)
empathy_cot = empathy_detection_batch(responses_cot)

for i, result in enumerate(results):
    result['flesch_readability_zero'] = flesch_readability_zero[i]
    result['flesch_grade_zero'] = flesch_grade_zero[i]
    result['flesch_readability_cot'] = flesch_readability_cot[i]
    result['flesch_grade_cot'] = flesch_grade_cot[i]
    result['bert_precision_zero'] = bert_precision_zero[i]
    result['bert_precision_cot'] = bert_precision_cot[i]
    result['bert_recall_zero'] = bert_recall_zero[i]
    result['bert_recall_cot'] = bert_recall_cot[i]
    result['sentiment_zero'] = sentiment_zero[i]
    result['sentiment_cot'] = sentiment_cot[i]
    result['empathy_zero'] = empathy_zero[i]
    result['empathy_cot'] = empathy_cot[i]


In [None]:
print("Zero Prompt Scores:")
print("Flesch Readability Score:", sum(flesch_readability_zero)/len(flesch_readability_zero))
print("Flesch Grade Level Score:", sum(flesch_grade_zero)/len(flesch_grade_zero))
print("BERT Precision Score:", sum(bert_precision_zero)/len(bert_precision_zero))
print("BERT Recall Score:", sum(bert_recall_zero)/len(bert_recall_zero))
print("Sentiment Score:", sum(sentiment_zero)/len(sentiment_zero))
print("Empathy Score:", sum(empathy_zero)/len(empathy_zero))

print("\nCoT Prompt Scores:")
print("Flesch Readability Score:", sum(flesch_readability_cot)/len(flesch_readability_cot))
print("Flesch Grade Level Score:", sum(flesch_grade_cot)/len(flesch_grade_cot))
print("BERT Precision Score:", sum(bert_precision_cot)/len(bert_precision_cot))
print("BERT Recall Score:", sum(bert_recall_cot)/len(bert_recall_cot))
print("Sentiment Score:", sum(sentiment_cot)/len(sentiment_cot))
print("Empathy Score:", sum(empathy_cot)/len(empathy_cot))

results_df = pd.DataFrame(results)
results_df.to_csv('/content/drive/My Drive/Diss_Dataset/results_chatbot3.csv', index=False)

# Prompts for Mistral-7b-instruct-v0.3 model

In [None]:
Zero_prompt_model1 = """
[INST]
In one paragraph, produce a response to the patient (referred to as the author), based on the provided summary.\n
Assume the patient is based in the UK with the following helplines:\n
Emergency Services: 999.
Samaritans helpline: 116 123.
SHOUT texting helpline: 85258.

#Input summary to be used for response: {summary}\n

Response:[/INST]

"""

In [None]:
CoT_prompt_model1 = """
[INST]
As a mental health expert, your task is to provide a positive, empathetic and supportative message to the patient (referred to as the author), directly from the provided summary.

#Consider:
**
Consider these suggested response stragegies by potiential characteristics of the message, only include the response strategies most related to the patients's needs:
Loss of control:  Advising, Interpretation, Offering Group Support, Emotional Support.
Acute loneliness: Offering Group Support, Cogitive Change Inducement, Interpretation.
Emptiness: Persuasion, Interpretation, Cognitive Change Inducement, Advising.
Narcissistic wound: Empowerment, Interpretation, Persuasion, Emotional Support.
Irreversibility: Emotional Support, Advising, Persuasion, Offering Group Support.
Loss of energy: Empowerment, Emotional Support, Advising.
Emotional flooding: Persuasion, Empowerment, Advising.
Cognitive attribition: Cognitive change inducement, Persuasion, Empowerment.
Level of risk: Persuasion, Offering group support, Advising.

Also, referring the customer to get further help may be useful in extreme cases.
For this task, all referrals must go to UK helplines:

For urgent risk cases - Emergency Services: 999.
For high risk cases - Samaritans: 116 123.
For high risk cases where the patient might not want to call - Text SHOUT: 85258.
**

#Input summary to be used for response:
{summary}\n
#Response: [/INST]

"""

# Initialising and running response tests for Mistral model

In [None]:
flesch_readability_zero, flesch_grade_zero = [], []
flesch_readability_cot, flesch_grade_cot = [], []
bert_precision_zero, bert_precision_cot = [], []
bert_recall_zero, bert_recall_cot = [], []
sentiment_zero, sentiment_cot = [], []
empathy_zero, empathy_cot = [], []
responses_zero = [result['response_zero'] for result in results]
responses_cot = [result['response_cot'] for result in results]
summaries = [result['summary'] for result in results]

with ThreadPoolExecutor() as executor:
    flesch_results_zero = list(executor.map(flesch, responses_zero))
    flesch_results_cot = list(executor.map(flesch, responses_cot))

flesch_readability_zero, flesch_grade_zero = zip(*flesch_results_zero)
flesch_readability_cot, flesch_grade_cot = zip(*flesch_results_cot)

with ThreadPoolExecutor() as executor:
    bert_scores_zero = list(executor.map(bert_score, summaries, responses_zero))
    bert_scores_cot = list(executor.map(bert_score, summaries, responses_cot))

bert_precision_zero, bert_recall_zero = zip(*[(bp[0], bp[1]) for bp in bert_scores_zero])
bert_precision_cot, bert_recall_cot = zip(*[(bp[0], bp[1]) for bp in bert_scores_cot])

sentiment_zero = sentiment_analysis_batch(responses_zero)
sentiment_cot = sentiment_analysis_batch(responses_cot)
empathy_zero = empathy_detection_batch(responses_zero)
empathy_cot = empathy_detection_batch(responses_cot)

for i, result in enumerate(results):
    result['flesch_readability_zero'] = flesch_readability_zero[i]
    result['flesch_grade_zero'] = flesch_grade_zero[i]
    result['flesch_readability_cot'] = flesch_readability_cot[i]
    result['flesch_grade_cot'] = flesch_grade_cot[i]
    result['bert_precision_zero'] = bert_precision_zero[i]
    result['bert_precision_cot'] = bert_precision_cot[i]
    result['bert_recall_zero'] = bert_recall_zero[i]
    result['bert_recall_cot'] = bert_recall_cot[i]
    result['sentiment_zero'] = sentiment_zero[i]
    result['sentiment_cot'] = sentiment_cot[i]
    result['empathy_zero'] = empathy_zero[i]
    result['empathy_cot'] = empathy_cot[i]


In [None]:
print("Zero Prompt Scores:")
print("Flesch Readability Score:", sum(flesch_readability_zero)/len(flesch_readability_zero))
print("Flesch Grade Level Score:", sum(flesch_grade_zero)/len(flesch_grade_zero))
print("BERT Precision Score:", sum(bert_precision_zero)/len(bert_precision_zero))
print("BERT Recall Score:", sum(bert_recall_zero)/len(bert_recall_zero))
print("Sentiment Score:", sum(sentiment_zero)/len(sentiment_zero))
print("Empathy Score:", sum(empathy_zero)/len(empathy_zero))

print("\nCoT Prompt Scores:")
print("Flesch Readability Score:", sum(flesch_readability_cot)/len(flesch_readability_cot))
print("Flesch Grade Level Score:", sum(flesch_grade_cot)/len(flesch_grade_cot))
print("BERT Precision Score:", sum(bert_precision_cot)/len(bert_precision_cot))
print("BERT Recall Score:", sum(bert_recall_cot)/len(bert_recall_cot))
print("Sentiment Score:", sum(sentiment_cot)/len(sentiment_cot))
print("Empathy Score:", sum(empathy_cot)/len(empathy_cot))

results_df = pd.DataFrame(results)
results_df.to_csv('/content/drive/My Drive/Diss_Dataset/results_chatbot2.csv', index=False)