# **Psychology Chatbot**

## **Import necessary modules**

In [7]:
# Deep learning framework for neural network operations
import tensorflow as tf
# High-level neural networks API (runs on top of TensorFlow)
from tensorflow import keras

# JSON handling for data serialization/deserialization
import json
# Facebook's FAISS library for efficient similarity search
import faiss
# Numerical computing library for array/matrix operations
import numpy as np

# Client library for interacting with Ollama's LLM API
import ollama
# Sentence embedding models and similarity utilities
from sentence_transformers import SentenceTransformer, util

# Speech recognition library for microphone input
import speech_recognition as sr
# Text-to-speech conversion library
import pyttsx3

# Operating system interfaces for file/path operations
import os
# HTTP requests library for web API calls
import requests

# Text feature extraction (for bag-of-words/tf-idf)
from sklearn.feature_extraction.text import CountVectorizer

# GPT-2 model and tokenizer for perplexity evaluation
from transformers import GPT2LMHeadModel, GPT2TokenizerFast

# PyTorch deep learning framework
import torch
# Mathematical operations and constants
import math

# Evaluation metrics library
import evaluate
# ROUGE metric for text generation evaluation
rouge = evaluate.load("rouge")
# BLEU score metric for machine translation evaluation
bleu = evaluate.load("bleu")

## **Data Loading, Embedding Generation, and FAISS Indexing**

In [13]:
# Load the dataset
with open(r"C:\Users\Vidhi\ML Chatbot\data\unified_dataset.json", "r", encoding="utf-8") as file:
    data = json.load(file)

# Initialize embedding model (Use a model optimized for retrieval)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Extract questions and answers
texts = [entry["Title"] + " " + entry["Content"] for entry in data]

# Generate embeddings
embeddings = embedding_model.encode(texts, convert_to_numpy=True)

# Store in FAISS
dimension = embeddings.shape[1]  # Get embedding size
index = faiss.IndexFlatL2(dimension)  # Create FAISS index
index.add(embeddings)  # Add embeddings to FAISS

# Save FAISS index
faiss.write_index(index, "faiss_index.bin")

# Save data mapping (to retrieve original texts)
with open("data_mapping.json", "w", encoding="utf-8") as f:
    json.dump(data, f, indent=4)

print("FAISS index and data mapping saved!")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


FAISS index and data mapping saved!


## **RAG-Powered Chatbot with Conversation Memory**

In [4]:
# Configure Ollama client
tt_client = ollama.Client(host='http://localhost:11435')  # Change port if needed

# Load FAISS index and data
index = faiss.read_index("faiss_index.bin")
with open("data_mapping.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# File to store chat memory
MEMORY_FILE = "chat_memory.json"

def load_chat_memory():
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, "r", encoding="utf-8") as f:
            return json.load(f)
    return []

def save_chat_memory():
    with open(MEMORY_FILE, "w", encoding="utf-8") as f:
        json.dump(chat_memory, f, indent=4)

# Initialize memory
chat_memory = load_chat_memory()

def search_faiss(query, top_k=3):
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    _, indices = index.search(query_embedding, top_k)
    retrieved_texts = [data[i] for i in indices[0]]
    return retrieved_texts

def psychology_chatbot(query):
    retrieved_docs = search_faiss(query)
    context = "\n".join([doc["Title"] + ": " + doc["Content"][:300] + "..." for doc in retrieved_docs])
    memory_context = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in chat_memory[-3:]])
    
    prompt = f"""You are an AI psychology assistant. Answer based on the given knowledge:
    Previous Conversation:
    {memory_context}
    
    Current Context:
    {context}
    
    User Query: {query}
    Response:"""

    response = tt_client.chat(
        model="mistral", 
        messages=[{"role": "user", "content": prompt}]
    )
    
    answer = response["message"]["content"]
    
    chat_memory.append({"question": query, "answer": answer})
    save_chat_memory()
    
    return answer

def get_chat_memory():
    return chat_memory

# Speech-to-Text (STT)
def speech_to_text():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
    try:
        return recognizer.recognize_google(audio)
    except sr.UnknownValueError:
        return "Could not understand audio."
    except sr.RequestError:
        return "Error with speech recognition service."

# Text-to-Speech (TTS)
def text_to_speech(text):
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()

## **User Input Processing and Response Generation**

In [8]:
if __name__ == "__main__":
    while True:
        user_input = input("Enter your query (or say 'voice input' to speak, 'exit' to quit): ")
        
        if user_input.lower() == "exit":
            break
        elif user_input.lower() == "show memory":
            for i, entry in enumerate(get_chat_memory(), 1):
                print(f"{i}. Q: {entry['question']}\n   A: {entry['answer']}\n")
            continue
        elif user_input.lower() == "voice input":
            user_input = speech_to_text()
            print("You said:", user_input)
        
        if user_input and user_input.lower() not in ["show memory", "exit"]:
            response = psychology_chatbot(user_input)
            print("Bot:", response)
            text_to_speech(response)
            print("\n")

Enter your query (or say 'voice input' to speak, 'exit' to quit):  what is mental health


Bot:  Mental health refers to a state of well-being in which an individual can realize their own abilities, cope with the normal stresses of life, work productively and fruitfully, and make contributions to their community. It involves having beliefs, thoughts, feelings, and behaviors that allow you to live in harmony with yourself and others. Mental health is not just the absence of mental disorders or illnesses; it's a positive state that encompasses emotional, psychological, and social well-being.




Enter your query (or say 'voice input' to speak, 'exit' to quit):  exit


## **Semantic Similarity Scoring**

In [5]:
def score_retrieval(query):
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
    retrieved_docs = search_faiss(query)
    scores = []

    for i, doc in enumerate(retrieved_docs, 1):
        chunk_text = doc["Title"] + " " + doc["Content"]
        chunk_embedding = embedding_model.encode(chunk_text, convert_to_tensor=True)
        score = util.pytorch_cos_sim(query_embedding, chunk_embedding).item()
        scores.append((i, score, doc["Title"]))
    
    return scores

In [6]:
with open("chatbot_eval_output.json", "r", encoding="utf-8") as f:
    eval_data = json.load(f)

for item in eval_data:
    query = item["question"]
    scores = score_retrieval(query)
    for i, score, title in scores:
        print(f"#{i} | Title: {title} | Similarity Score: {score:.2f}")
    print()

#1 | Title: WHAT IS PSYCHOLOGY? | Similarity Score: 0.74
#2 | Title: PSYCHOLOGY AND OTHER DISCIPLINES | Similarity Score: 0.70
#3 | Title: Psychology as a Natural Science | Similarity Score: 0.67

#1 | Title: Nature of Stress | Similarity Score: 0.73
#2 | Title: Nature, Types, and Sources of Stress | Similarity Score: 0.68
#3 | Title: Psychological Stress | Similarity Score: 0.67

#1 | Title: What is mental health? | Similarity Score: 0.80
#2 | Title: What's the difference between mental health and mental illness? | Similarity Score: 0.69
#3 | Title: What does it mean to have a mental illness? | Similarity Score: 0.65

#1 | Title: Behaviorism: Focus on Observable Behavior | Similarity Score: 0.73
#2 | Title: What is behaviour? | Similarity Score: 0.61
#3 | Title: Situationism and Behavioural Variations | Similarity Score: 0.58

#1 | Title: Cognitive Psychology | Similarity Score: 0.71
#2 | Title: Psychology as a Natural Science | Similarity Score: 0.57
#3 | Title: The Rise of Cognitive

## **Keyword Overlap**
Extract keywords from the query and check how many appear in the retrieved chunk.

In [8]:
def keyword_overlap_score(query):
    retrieved_docs = search_faiss(query)
    for doc in retrieved_docs:
        doc_text = doc["Title"] + " " + doc["Content"]
        vectorizer = CountVectorizer().fit([query])
        query_vector = vectorizer.transform([query]).toarray()
        doc_vector = vectorizer.transform([doc_text]).toarray()
        overlap_score = np.minimum(query_vector, doc_vector).sum()
        print(f"Title: {doc['Title']}, Overlap Score: {overlap_score}")
    print()

In [9]:
with open("chatbot_eval_output.json", "r", encoding="utf-8") as f:
    eval_data = json.load(f)

for item in eval_data:
    query = item["question"]
    keyword_overlap_score(query)

Title: WHAT IS PSYCHOLOGY?, Overlap Score: 3
Title: PSYCHOLOGY AND OTHER DISCIPLINES, Overlap Score: 1
Title: Psychology as a Natural Science, Overlap Score: 3

Title: Nature of Stress, Overlap Score: 2
Title: Nature, Types, and Sources of Stress, Overlap Score: 2
Title: Psychological Stress, Overlap Score: 2

Title: What is mental health?, Overlap Score: 4
Title: What's the difference between mental health and mental illness?, Overlap Score: 4
Title: What does it mean to have a mental illness?, Overlap Score: 4

Title: Behaviorism: Focus on Observable Behavior, Overlap Score: 2
Title: What is behaviour?, Overlap Score: 1
Title: Situationism and Behavioural Variations, Overlap Score: 1

Title: Cognitive Psychology, Overlap Score: 2
Title: Psychology as a Natural Science, Overlap Score: 2
Title: The Rise of Cognitive Psychology and Constructivism, Overlap Score: 2

Title: Motivation, Overlap Score: 2
Title: NATURE OF MOTIVATION, Overlap Score: 3
Title: The Motivational Cycle, Overlap Sc

#### Generating answers of "evaluation_set.json" using chatbot

In [9]:
# Load the dataset
with open("evaluation_set.json", "r", encoding="utf-8") as f:
    dataset = json.load(f)

# Lists to store results
questions = []
expected_answers = []
generated_answers = []

# Process each entry
for entry in dataset:
    question = entry["question"]
    expected_answer = entry["answer"]
    generated_answer = psychology_chatbot(question)

    questions.append(question)
    expected_answers.append(expected_answer)
    generated_answers.append(generated_answer)

    print(f"Q: {question}\nExpected: {expected_answer[:100]}...\nGenerated: {generated_answer[:100]}...\n")

# Save the results to a JSON file
results = [
    {"question": q, "expected_answer": e, "generated_answer": g}
    for q, e, g in zip(questions, expected_answers, generated_answers)
]

with open("chatbot_eval_output.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=4)

print("\nStored all results in 'chatbot_eval_output.json'")

Q: what is psychology
Expected: Psychology is a scientific discipline that studies the behavior, thoughts, emotions, and mental proc...
Generated:  Psychology is a knowledge discipline that focuses on understanding human behavior, thoughts, and fe...

Q: what is stress
Expected: Stress is a pattern of responses that an organism makes to events that disturb its equilibrium......
Generated:  Stress refers to the pattern of responses an organism makes to events that disturb its equilibrium ...

Q: what is mental health
Expected: Mental health refers to a person’s emotional, psychological, and social well-being. It influences ho...
Generated:  Mental health refers to an individual's beliefs, thoughts, feelings, and behaviors regarding their ...

Q: define behaviorism in psychology
Expected: Behaviorism is a theoretical orientation in psychology that focuses on observable behaviors rather t...
Generated:  In psychology, Behaviorism is a theoretical perspective developed by John Watson aroun

## **Similarity-Based Accuracy Function**
* Use cosine similarity between:
* The chatbot’s response (generated_answer)
* The ground truth answer (expected_answer)

In [27]:
def compute_accuracy():
    with open("chatbot_eval_output.json", "r", encoding="utf-8") as f:
        eval_data = json.load(f)
    
    total = len(eval_data)
    score_sum = 0
    threshold = 0.7  # Define your acceptance threshold

    for item in eval_data:
        question = item["question"]
        expected_answer = item["expected_answer"]
        generated_answer = item["generated_answer"]

        # Convert to embeddings
        emb_expected = embedding_model.encode(expected_answer, convert_to_tensor=True)
        emb_generated = embedding_model.encode(generated_answer, convert_to_tensor=True)

        # Compute cosine similarity
        similarity = util.pytorch_cos_sim(emb_expected, emb_generated).item()

        print(f"\nQ: {question}\nExpected: {expected_answer[:100]}...\nGenerated: {generated_answer[:100]}...\nSimilarity: {similarity:.2f}")

        # Accuracy = % of responses above threshold
        score_sum += 1 if similarity >= threshold else 0

    accuracy = (score_sum / total) * 100
    print(f"\nOverall Accuracy: {accuracy:.2f}%")

In [28]:
compute_accuracy()


Q: what is psychology
Expected: Psychology is the scientific study of the mind, behavior, and mental processes. It seeks to understa...
Generated:  Psychology is a knowledge discipline that focuses on understanding human behavior, thoughts, and fe...
Similarity: 0.77

Q: what is stress
Expected: Stress is the body's physiological and psychological response to perceived threats or demands, known...
Generated:  Stress refers to the pattern of responses an organism makes to events that disturb its equilibrium ...
Similarity: 0.79

Q: what is mental health
Expected: Mental health encompasses emotional, psychological, and social well-being, influencing how individua...
Generated:  Mental health refers to an individual's beliefs, thoughts, feelings, and behaviors regarding their ...
Similarity: 0.79

Q: define behaviorism in psychology
Expected: Behaviorism is a psychological theory emphasizing observable behaviors over internal mental states. ...
Generated:  In psychology, Behaviorism is a

## **ROUGE / BLEU for Deeper Evaluation**

In [29]:
def evaluate_nlp_metrics():
    with open("chatbot_eval_output.json", "r", encoding="utf-8") as f:
        eval_data = json.load(f)

    refs, preds = [], []

    for item in eval_data:
        question = item["question"]
        expected = item["expected_answer"]
        generated = item["generated_answer"]

        refs.append(expected)
        preds.append(generated)

    print("ROUGE:", rouge.compute(predictions=preds, references=refs))
    print("\nBLEU:", bleu.compute(predictions=preds, references=[[ref] for ref in refs]))

In [30]:
evaluate_nlp_metrics()

ROUGE: {'rouge1': np.float64(0.4089562372622777), 'rouge2': np.float64(0.1266339231744525), 'rougeL': np.float64(0.23011940110539927), 'rougeLsum': np.float64(0.24400745090586576)}

BLEU: {'bleu': 0.10136881548267056, 'precisions': [0.4516276937184778, 0.14750692520775624, 0.06183170618317062, 0.03347378277153558], 'brevity_penalty': 0.9354640925264084, 'length_ratio': 0.9374597034171502, 'translation_length': 4362, 'reference_length': 4653}


## **Perplexity Evaluation**
* Perplexity is a common metric used to evaluate how well a language model predicts a sequence of words. It essentially measures the "surprise" of the model when it sees the actual next word in a sequence.
* A low perplexity score means the model is less surprised and is doing a good job at predicting the text.
* A high perplexity means the model is more surprised, so it's performing poorly.

In [31]:
# Load GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2TokenizerFast.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()  # Set model to evaluation mode (disables dropout)

# Function to compute perplexity of a given text
def compute_perplexity(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss
    return math.exp(loss.item())

# Evaluate perplexity for multiple chatbot responses
def evaluate_chatbot_perplexity(eval_set_path="chatbot_eval_output.json"):
    with open(eval_set_path, "r", encoding="utf-8") as f:
        eval_data = json.load(f)

    perplexities = []

    for item in eval_data:
        question = item["question"]
        generated_answer = item["generated_answer"]

        try:
            perplexity = compute_perplexity(generated_answer)
            perplexities.append(perplexity)
            print(f"Q: {question}\nA: {generated_answer[:100]}...\nPerplexity: {perplexity:.2f}\n")
        except Exception as e:
            print(f"Error with input: {generated_answer[:50]}... | {str(e)}")
    
    if perplexities:
        avg_perplexity = sum(perplexities) / len(perplexities)
        print(f"\nAverage Perplexity Score: {avg_perplexity:.2f}")
    else:
        print("\nNo responses could be evaluated.")

In [32]:
evaluate_chatbot_perplexity()

Q: what is psychology
A:  Psychology is a knowledge discipline that focuses on understanding human behavior, thoughts, and fe...
Perplexity: 15.26

Q: what is stress
A:  Stress refers to the pattern of responses an organism makes to events that disturb its equilibrium ...
Perplexity: 23.66

Q: what is mental health
A:  Mental health refers to an individual's beliefs, thoughts, feelings, and behaviors regarding their ...
Perplexity: 10.88

Q: define behaviorism in psychology
A:  In psychology, Behaviorism is a theoretical perspective developed by John Watson around 1910. It fo...
Perplexity: 32.95

Q: explain cognitive psychology
A:  Cognitive Psychology is a branch of psychology that focuses on understanding the mental processes i...
Perplexity: 13.30

Q: what is motivation
A:  Motivation is a fundamental factor that drives behavior and is both a mental and physiological stat...
Perplexity: 29.96

Q: what are emotions
A:  Emotions are a complex pattern of arousal, subjective feeling, a