In [16]:
import pandas as pd
import openai
import sacrebleu
from rouge_score import rouge_scorer
import matplotlib.pyplot as plt
from collections import Counter
############ 1. Data Preparation for Model Training by Combine the datasets and prepare them for training ############
# Load cleaned Counsel-Chat dataset
counsel_chat_data = pd.read_csv(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/counsel_chat_data_after_data_preparation.csv')
display(counsel_chat_data.info())
display(counsel_chat_data.head(4))
# Rename columns for consistency with previous combined data format
counsel_chat_data.rename(
    columns={'questionText': 'prompt', 'answerText': 'response'}, inplace=True)
############ 1. Generate Responses Using GPT-4 Model via OpenAI API ############

openai.api_key = 'sk-proj-6TLzq89EwvduXuziOOVDT3BlbkFJ1TqCRGrTSdnVF1oDysKl'


def generate_responses(data, model_name='gpt-4'):
    responses = []

    for index, row in data.iterrows():
        prompt = row['prompt']
        try:
            response = openai.ChatCompletion.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": "You are a mental health counselor."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=150,
                temperature=0.7
            )
            responses.append(response.choices[0].message['content'].strip())
        except Exception as e:
            print(f"Error generating response for index {index}: {e}")
            responses.append("")

    data['generated_response'] = responses
    data.to_csv('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/generated_responses.csv', index=False)
    return data


# Generate responses for the dataset
counsel_chat_data = generate_responses(counsel_chat_data)

############ 2. Evaluation of Generated Responses ############


def evaluate_responses(data):
    bleu_scores = []
    rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}

    for index, row in data.iterrows():
        reference = row['response']
        hypothesis = row['generated_response']

        try:
            # BLEU Score
            bleu_score = sacrebleu.sentence_bleu(
                hypothesis, [reference]).score / 100  # Normalize BLEU score
            bleu_scores.append(bleu_score)

            # ROUGE Score
            scorer = rouge_scorer.RougeScorer(
                ['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
            scores = scorer.score(reference, hypothesis)
            for key in scores:
                rouge_scores[key].append(scores[key].fmeasure)

            print(f"Processed index {index}: BLEU = {bleu_score}, ROUGE-1 = {scores['rouge1'].fmeasure}, ROUGE-2 = {
                  scores['rouge2'].fmeasure}, ROUGE-L = {scores['rougeL'].fmeasure}")

        except Exception as e:
            print(f"Error processing index {index}: {e}")

    evaluation_results = {
        'bleu': sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0,
        'rouge1': sum(rouge_scores['rouge1']) / len(rouge_scores['rouge1']) if rouge_scores['rouge1'] else 0,
        'rouge2': sum(rouge_scores['rouge2']) / len(rouge_scores['rouge2']) if rouge_scores['rouge2'] else 0,
        'rougeL': sum(rouge_scores['rougeL']) / len(rouge_scores['rougeL']) if rouge_scores['rougeL'] else 0
    }

    return evaluation_results


# Evaluate the model
evaluation_results = evaluate_responses(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/generated_responses.csv')
print("Evaluation Results:", evaluation_results)

############ 3. Analyze BLEU Score Distribution ############


def calculate_bleu_scores(data):
    bleu_scores = []

    for index, row in data.iterrows():
        reference = row['response']
        hypothesis = row['generated_response']
        bleu_score = sacrebleu.sentence_bleu(hypothesis, [reference]).score
        bleu_scores.append(bleu_score)

    data['bleu_score'] = bleu_scores
    return data


evaluated_data = calculate_bleu_scores(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/generated_responses.csv')

# Plot Distribution:
plt.figure(figsize=(10, 6))
plt.hist(evaluated_data['bleu_score'], bins=50,
         color='skyblue', edgecolor='black', alpha=0.5)
plt.title('Distribution of BLEU Scores')
plt.xlabel('BLEU Score')
plt.ylabel('Frequency')
plt.savefig('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/bleu_score_distribution.eps', format='eps', dpi=600)
plt.show()

############ 4. Length Analysis of Generated Responses ############

evaluated_data['reference_length'] = evaluated_data['response'].apply(len)
evaluated_data['generated_length'] = evaluated_data['generated_response'].apply(
    len)

plt.figure(figsize=(10, 6))
plt.scatter(evaluated_data.index,
            evaluated_data['reference_length'], color='blue', label='Reference Response Length')
plt.scatter(evaluated_data.index,
            evaluated_data['generated_length'], color='orange', label='Generated Response Length')

plt.xlabel('Data Index')
plt.ylabel('Response Length')
plt.title('Length Comparison of Reference and Generated Responses')
plt.legend()
plt.savefig('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/length_analysis.eps', format='eps', dpi=600)
plt.show()

############ 5. Content Analysis of Generated Responses ############

generated_responses = evaluated_data['generated_response'].tolist()
word_counts = Counter(" ".join(generated_responses).split())

common_words = word_counts.most_common(20)
print("Most common words in generated responses:")
for word, count in common_words:
    print(f"{word}: {count}")

train_data: (1190, 2)
val_data: (173, 2)
test_data: (338, 2)


In [None]:
import pandas as pd
import openai
import sacrebleu
from rouge_score import rouge_scorer
import matplotlib.pyplot as plt
from collections import Counter
############ 1. Data Preparation for Model Training by Combine the datasets and prepare them for training ############
# Load cleaned Counsel-Chat dataset
counsel_chat_data = pd.read_csv(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/counsel_chat_data_after_data_preparation.csv')
display(counsel_chat_data.info())
display(counsel_chat_data.head(4))
# Rename columns for consistency with previous combined data format
counsel_chat_data.rename(
    columns={'questionText': 'prompt', 'answerText': 'response'}, inplace=True)
############ 1. Generate Responses Using GPT-4 Model via OpenAI API ############

openai.api_key = 'key_here'


def generate_responses(data, model_name='gpt-4'):
    responses = []

    for index, row in data.iterrows():
        prompt = row['prompt']
        try:
            response = openai.ChatCompletion.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": "You are a mental health counselor."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=150,
                temperature=0.7
            )
            responses.append(response.choices[0].message['content'].strip())
        except Exception as e:
            print(f"Error generating response for index {index}: {e}")
            responses.append("")

    data['generated_response'] = responses
    data.to_csv('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/generated_responses.csv', index=False)
    return data


# Generate responses for the dataset
counsel_chat_data = generate_responses(counsel_chat_data)

############ 2. Evaluation of Generated Responses ############


def evaluate_responses(data):
    bleu_scores = []
    rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}

    for index, row in data.iterrows():
        reference = row['response']
        hypothesis = row['generated_response']

        try:
            # BLEU Score
            bleu_score = sacrebleu.sentence_bleu(
                hypothesis, [reference]).score / 100  # Normalize BLEU score
            bleu_scores.append(bleu_score)

            # ROUGE Score
            scorer = rouge_scorer.RougeScorer(
                ['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
            scores = scorer.score(reference, hypothesis)
            for key in scores:
                rouge_scores[key].append(scores[key].fmeasure)

            print(f"Processed index {index}: BLEU = {bleu_score}, ROUGE-1 = {scores['rouge1'].fmeasure}, ROUGE-2 = {
                  scores['rouge2'].fmeasure}, ROUGE-L = {scores['rougeL'].fmeasure}")

        except Exception as e:
            print(f"Error processing index {index}: {e}")

    evaluation_results = {
        'bleu': sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0,
        'rouge1': sum(rouge_scores['rouge1']) / len(rouge_scores['rouge1']) if rouge_scores['rouge1'] else 0,
        'rouge2': sum(rouge_scores['rouge2']) / len(rouge_scores['rouge2']) if rouge_scores['rouge2'] else 0,
        'rougeL': sum(rouge_scores['rougeL']) / len(rouge_scores['rougeL']) if rouge_scores['rougeL'] else 0
    }

    return evaluation_results


# Evaluate the model
evaluation_results = evaluate_responses(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/generated_responses.csv')
print("Evaluation Results:", evaluation_results)

############ 3. Analyze BLEU Score Distribution ############


def calculate_bleu_scores(data):
    bleu_scores = []

    for index, row in data.iterrows():
        reference = row['response']
        hypothesis = row['generated_response']
        bleu_score = sacrebleu.sentence_bleu(hypothesis, [reference]).score
        bleu_scores.append(bleu_score)

    data['bleu_score'] = bleu_scores
    return data


evaluated_data = calculate_bleu_scores(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/generated_responses.csv')

# Plot Distribution:
plt.figure(figsize=(10, 6))
plt.hist(evaluated_data['bleu_score'], bins=50,
         color='skyblue', edgecolor='black', alpha=0.5)
plt.title('Distribution of BLEU Scores')
plt.xlabel('BLEU Score')
plt.ylabel('Frequency')
plt.savefig('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/bleu_score_distribution.eps', format='eps', dpi=600)
plt.show()

############ 4. Length Analysis of Generated Responses ############

evaluated_data['reference_length'] = evaluated_data['response'].apply(len)
evaluated_data['generated_length'] = evaluated_data['generated_response'].apply(
    len)

plt.figure(figsize=(10, 6))
plt.scatter(evaluated_data.index,
            evaluated_data['reference_length'], color='blue', label='Reference Response Length')
plt.scatter(evaluated_data.index,
            evaluated_data['generated_length'], color='orange', label='Generated Response Length')

plt.xlabel('Data Index')
plt.ylabel('Response Length')
plt.title('Length Comparison of Reference and Generated Responses')
plt.legend()
plt.savefig('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/length_analysis.eps', format='eps', dpi=600)
plt.show()

############ 5. Content Analysis of Generated Responses ############

generated_responses = evaluated_data['generated_response'].tolist()
word_counts = Counter(" ".join(generated_responses).split())

common_words = word_counts.most_common(20)
print("Most common words in generated responses:")
for word, count in common_words:
    print(f"{word}: {count}")

#### 1.


In [1]:
import pandas as pd
import openai
import sacrebleu
from rouge_score import rouge_scorer
import matplotlib.pyplot as plt
from collections import Counter
############ 1. Data Preparation for Model Training by Combine the datasets and prepare them for training ############
# Load cleaned Counsel-Chat dataset
counsel_chat_data = pd.read_csv(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/counsel_chat_data_after_data_preparation.csv')
display(counsel_chat_data.info())
display(counsel_chat_data.head(4))
# Rename columns for consistency with previous combined data format
counsel_chat_data.rename(
    columns={'questionText': 'prompt', 'answerText': 'response'}, inplace=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1383 entries, 0 to 1382
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   questionID     1383 non-null   object
 1   questionTitle  1383 non-null   object
 2   questionText   1383 non-null   object
 3   questionUrl    1383 non-null   object
 4   topics         1376 non-null   object
 5   therapistName  1383 non-null   object
 6   therapistUrl   1383 non-null   object
 7   answerText     1383 non-null   object
 8   upvotes        1383 non-null   int64 
dtypes: int64(1), object(8)
memory usage: 97.4+ KB


None

Unnamed: 0,questionID,questionTitle,questionText,questionUrl,topics,therapistName,therapistUrl,answerText,upvotes
0,5566fab2a64752d71ec3ca69,Escalating disagreements between mother and wife,my wife and mother are having tense disagreeme...,https://counselchat.com/questions/escalating-d...,Family Conflict,"Kristi King-Morgan, LMSW",https://counselchat.com/therapists/kristi-king...,<p>what you are describing is something psycho...,0
1,5566f94fa64752d71ec3ca64,I'm addicted to smoking. How can I stop?,"i'm planning to have baby, so i have to quit s...",https://counselchat.com/questions/i-m-addicted...,"Substance Abuse,Addiction",Rebecca Duellman,https://counselchat.com/therapists/rebecca-due...,<p>hi. good for you in planning ahead to do wh...,0
2,5567d26887a1cc0c3f3d8f46,Keeping secrets from my family,"i have secrets in my mind, and i don't know wh...",https://counselchat.com/questions/keeping-secr...,Family Conflict,Jeevna Bajaj,https://counselchat.com/therapists/jeevna-bajaj,<p>it sounds like keeping the secrets has beco...,0
3,556bed15c969ba5861709df5,The Underlying Causes of Being Possessive,i am extremely possessive in my relationships ...,https://counselchat.com/questions/the-underlyi...,"Behavioral Change,Social Relationships",Rebecca Duellman,https://counselchat.com/therapists/rebecca-due...,<p>hi there. it's great you are able to realiz...,0


#### 2.


In [None]:
############ 2. Generate Responses Using GPT-4 Model via OpenAI API ############

openai.api_key = 'sk-proj-6TLzq89EwvduXuziOOVDT3BlbkFJ1TqCRGrTSdnVF1oDysKl'


def generate_responses(data, model_name='gpt-4'):
    responses = []

    for index, row in data.iterrows():
        prompt = row['prompt']
        try:
            response = openai.ChatCompletion.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": "You are a mental health counselor. Your goal is to provide empathetic, supportive, and reflective responses to clients' questions. Focus on understanding the clients' concerns and offering thoughtful and compassionate guidance."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=150,
                temperature=0.7
            )
            responses.append(response.choices[0].message['content'].strip())
        except Exception as e:
            print(f"Error generating response for index {index}: {e}")
            responses.append("")

    data['generated_response'] = responses
    data.to_csv('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/generated_responses.csv', index=False)
    return data


# Generate responses for the dataset
counsel_chat_data = generate_responses(counsel_chat_data)

#### 3.


In [None]:
############ 2. Evaluation of Generated Responses ############

def evaluate_responses(data):
    bleu_scores = []
    rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}

    for index, row in data.iterrows():
        reference = row['response']
        hypothesis = row['generated_response']

        try:
            # BLEU Score
            bleu_score = sacrebleu.sentence_bleu(
                hypothesis, [reference]).score / 100  # Normalize BLEU score
            bleu_scores.append(bleu_score)

            # ROUGE Score
            scorer = rouge_scorer.RougeScorer(
                ['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
            scores = scorer.score(reference, hypothesis)
            for key in scores:
                rouge_scores[key].append(scores[key].fmeasure)

            print(f"Processed index {index}: BLEU = {bleu_score}, ROUGE-1 = {scores['rouge1'].fmeasure}, ROUGE-2 = {
                  scores['rouge2'].fmeasure}, ROUGE-L = {scores['rougeL'].fmeasure}")

        except Exception as e:
            print(f"Error processing index {index}: {e}")

    evaluation_results = {
        'bleu': sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0,
        'rouge1': sum(rouge_scores['rouge1']) / len(rouge_scores['rouge1']) if rouge_scores['rouge1'] else 0,
        'rouge2': sum(rouge_scores['rouge2']) / len(rouge_scores['rouge2']) if rouge_scores['rouge2'] else 0,
        'rougeL': sum(rouge_scores['rougeL']) / len(rouge_scores['rougeL']) if rouge_scores['rougeL'] else 0
    }

    return evaluation_results


# Evaluate the model
evaluation_results = evaluate_responses(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/generated_responses.csv')
print("Evaluation Results:", evaluation_results)

#### 4.


In [None]:
############ 3. Analyze BLEU Score Distribution ############

def calculate_bleu_scores(data):
    bleu_scores = []

    for index, row in data.iterrows():
        reference = row['response']
        hypothesis = row['generated_response']
        bleu_score = sacrebleu.sentence_bleu(hypothesis, [reference]).score
        bleu_scores.append(bleu_score)

    data['bleu_score'] = bleu_scores
    return data


evaluated_data = calculate_bleu_scores(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/generated_responses.csv')

# Plot Distribution:
plt.figure(figsize=(10, 6))
plt.hist(evaluated_data['bleu_score'], bins=50,
         color='skyblue', edgecolor='black', alpha=0.5)
plt.title('Distribution of BLEU Scores')
plt.xlabel('BLEU Score')
plt.ylabel('Frequency')
plt.savefig('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/bleu_score_distribution.eps', format='eps', dpi=600)
plt.show()

#### 5.


In [None]:
############ 4. Length Analysis of Generated Responses ############

evaluated_data['reference_length'] = evaluated_data['response'].apply(len)
evaluated_data['generated_length'] = evaluated_data['generated_response'].apply(
    len)

plt.figure(figsize=(10, 6))
plt.scatter(evaluated_data.index,
            evaluated_data['reference_length'], color='blue', label='Reference Response Length')
plt.scatter(evaluated_data.index,
            evaluated_data['generated_length'], color='orange', label='Generated Response Length')

plt.xlabel('Data Index')
plt.ylabel('Response Length')
plt.title('Length Comparison of Reference and Generated Responses')
plt.legend()
plt.savefig('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v3/length_analysis.eps', format='eps', dpi=600)
plt.show()

############ 5. Content Analysis of Generated Responses ############

generated_responses = evaluated_data['generated_response'].tolist()
word_counts = Counter(" ".join(generated_responses).split())

common_words = word_counts.most_common(20)
print("Most common words in generated responses:")
for word, count in common_words:
    print(f"{word}: {count}")

In [None]:
Revised Article Sections:

Abstract: Include the sentiment analysis and emotion detection results, highlighting their importance in evaluating the emotional tone and specific emotions in the generated responses.

Introduction: Mention the addition of sentiment analysis and emotion detection as part of the evaluation process to better understand the emotional quality of the AI-generated responses.

Methods: Detail the implementation of sentiment analysis and emotion detection, including the libraries and methods used.

Results: Present the findings from the sentiment analysis and emotion detection, discussing how they complement the BLEU and ROUGE scores in assessing the quality of the generated responses.

Discussion: Reflect on the implications of the sentiment and emotion analysis results for the use of AI in mental health counseling. Discuss how these results align with the call for papers’ focus on clinically applicable and ethically sound AI research.

Limitations and Future Work: Acknowledge the limitations of the current study, such as the need for more nuanced measures of empathy and understanding beyond sentiment and emotion analysis. Suggest future research directions, including the development of models that can adapt to individual client needs and provide personalized responses.

Conclusion: Summarize the study’s findings, including the sentiment and emotion analysis results, and emphasize the potential of AI to support mental health counseling while highlighting the need for further research to enhance its effectiveness.

Ensure that all references and citations are correctly formatted according to the Nature Portfolio submission guidelines. By incorporating these changes, the article will be more comprehensive and aligned with the call for papers’ requirements. Remember to include a discussion on the ethical considerations of using AI in mental health care, as emphasized in the call for papers.

In [None]:
from textblob import TextBlob
import pandas as pd
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

############ 6. Sentiment Analysis and Emotion Detection ############

# Function to perform sentiment analysis


def analyze_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Function to perform emotion detection


def detect_emotions(text):
    sid = SentimentIntensityAnalyzer()
    return sid.polarity_scores(text)

# Adding sentiment analysis and emotion detection to the evaluation


def evaluate_responses_with_sentiment(data):
    # Perform sentiment analysis
    data['sentiment'] = data['generated_response'].apply(
        lambda x: analyze_sentiment(x))

    # Perform emotion detection
    emotion_scores = data['generated_response'].apply(
        lambda x: detect_emotions(x))
    data = pd.concat([data, emotion_scores.apply(pd.Series)], axis=1)

    return data


# Apply the extended evaluation function
evaluated_data = pd.read_csv(
    '/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v2/test_generated_responses.csv')
# Apply the extended evaluation function
evaluated_data_with_sentiment = evaluate_responses_with_sentiment(
    evaluated_data)

# Display some sentiment and emotion analysis results
print(evaluated_data_with_sentiment[[
      'generated_response', 'sentiment', 'pos', 'neu', 'neg', 'compound']].head())

############ 7. Summarize Sentiment and Emotion Scores ############

# Calculate overall sentiment score
overall_sentiment = evaluated_data_with_sentiment['sentiment'].mean()
print(f"Overall Sentiment Score: {overall_sentiment}")

# Calculate average emotion scores
average_emotions = evaluated_data_with_sentiment[[
    'pos', 'neu', 'neg', 'compound']].mean()
print(f"Average Emotion Scores:\n{average_emotions}")

# Plotting the emotion distributions
plt.figure(figsize=(10, 6))
average_emotions.plot(kind='bar', color=['green', 'blue', 'red', 'purple'])
plt.title('Average Emotion Scores')
plt.ylabel('Score')
plt.xlabel('Emotion')
plt.xticks(rotation=0)
# plt.savefig('/Users/dipendrapant/Library/CloudStorage/OneDrive-NTNU/ForFun/npj_digital_medicine/code/data/result/v2/emotion_scores.eps', format='eps', dpi=600)
plt.show()