In [1]:
import nltk
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity      
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import streamlit as st
# import spacy
lemmatizer = nltk.stem.WordNetLemmatizer()
# Download required NLTK data
# nltk.download('stopwords')
# nltk.download('punkt')
# nltk.download('wordnet')


In [8]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
df = pd.read_csv("Mental_Health_FAQ.csv", na_filter=False)
df = df[['Questions', 'Answers']]
df

Unnamed: 0,Questions,Answers
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,Can people with mental illness recover?,"When healing from mental illness, early identi..."
...,...,...
93,How do I know if I'm drinking too much?,Sorting out if you are drinking too much can b...
94,"If cannabis is dangerous, why are we legalizin...","Cannabis smoke, for example, contains cancer-c..."
95,How can I convince my kids not to use drugs?,You can't. But you can influence their capacit...
96,What is the legal status (and evidence) of CBD...,Cannabidiol or CBD is a naturally occurring co...


Here, the Next step is tokenize our text dataset.<br>
There are two types of tokenization:
    <ol><li>Word Tokenization: This is  the process of breaking down a text or document into individual words or tokens.</li>
    <li>Sent Tokenization: This is to break down the text data into individual sentences so that each sentence can be processed separately.</li><br></ol>
Lemmatization: The goal of lemmatization is to reduce a word to its canonical form so that variations of the same word can be treated as the same token<br>
For example, the word "jumped" may be lemmatized to "jump", and the word "walking" may be lemmatized to "walk".<br>
By reducing words to their base forms, lemmatization can help to simplify text data and reduce the number of unique tokens that need to be analyzed or processed.


In [4]:

# Define a function for text preprocessing (including lemmatization)
def preprocess_text(text):
    
    # Identifies all sentences in the data
    sentences = nltk.sent_tokenize(text)
    
    # Tokenize and lemmatize each word in each sentence
    preprocessed_sentences = []
    for sentence in sentences:
        tokens = [lemmatizer.lemmatize(word.lower()) for word in nltk.word_tokenize(sentence) if word.isalnum()]
        # Turns to basic root - each word in the tokenized word found in the tokenized sentence - if they are all alphanumeric 
        # The code above does the following:
        # Identifies every word in the sentence 
        # Turns it to a lower case 
        # Lemmatizes it if the word is alphanumeric

        preprocessed_sentence = ' '.join(tokens)
        preprocessed_sentences.append(preprocessed_sentence)
    
    return ' '.join(preprocessed_sentences)


df['tokenized Questions'] = df['Questions'].apply(preprocess_text)
df.head()

Unnamed: 0,Questions,Answers,tokenized Questions
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...,what doe it mean to have a mental illness
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...,who doe mental illness affect
2,What causes mental illness?,It is estimated that mental illness affects 1 ...,what cause mental illness
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...,what are some of the warning sign of mental il...
4,Can people with mental illness recover?,"When healing from mental illness, early identi...",can people with mental illness recover


In [5]:
# Create a corpus by flattening the preprocessed questions
corpus = df['tokenized Questions'].tolist()

In [6]:
# Vectorize corpus
tfidf_vectorizer = TfidfVectorizer()
X = tfidf_vectorizer.fit_transform(corpus)
# TDIDF is a numerical statistic used to evaluate how important a word is to a document in a collection or corpus. 
# The TfidfVectorizer calculates the Tfidf values for each word in the corpus and uses them to create a matrix where each row represents a document and each column represents a word. 
# The cell values in the matrix correspond to the importance of each word in each document.

In [7]:
def get_response(user_input):
    global most_similar_index
    
    user_input_processed = preprocess_text(user_input) # ....................... Preprocess the user's input using the preprocess_text function

    user_input_vector = tfidf_vectorizer.transform([user_input_processed])# .... Vectorize the preprocessed user input using the TF-IDF vectorizer

    similarity_scores = cosine_similarity(user_input_vector, X) # .. Calculate the score of similarity between the user input vector and the corpus (df) vector

    most_similar_index = similarity_scores.argmax() # ..... Find the index of the most similar question in the corpus (df) based on cosine similarity

    return df['Answers'].iloc[most_similar_index] # ... Retrieve the corresponding answer from the df DataFrame and return it as the chatbot's response

# create greeting list 
greetings = ["Hey There.... I am a creation of Ehiz Danny Agba Coder.... How can I help",
            "Hi Human.... How can I help",
            'Twale baba nla, wetin dey happen nah',
            'How far Alaye, wetin happen'
            "Good Day .... How can I help", 
            "Hello There... How can I be useful to you today",
            "Hi GomyCode Student.... How can I be of use"]

exits = ['thanks bye', 'bye', 'quit', 'exit', 'bye bye', 'close']
farewell = ['Thanks....see you soon', 'Babye, See you soon', 'Bye... See you later', 'Bye... come back soon']

random_farewell = random.choice(farewell) # ---------------- Randomly select a farewell message from the list
random_greetings = random.choice(greetings) # -------- Randomly select greeting message from the list

# Test your chatbot
while True:
    user_input = input("You: ")
    if user_input.lower() in exits:
        print(f"\nChatbot: {random_farewell}!")
        break
    if user_input.lower() in ['hi', 'hello', 'hey', 'hi there']:
        print(f"\nChatbot: {random_greetings}!")
    else:   
        response = get_response(user_input)
        print(f"\nChatbot: {response}")


Chatbot: Twale baba nla, wetin dey happen nah!

Chatbot: Twale baba nla, wetin dey happen nah!

Chatbot: Mental illnesses are health conditions that disrupt a personâ€™s thoughts, emotions, relationships, and daily functioning. They are associated with distress and diminished capacity to engage in the ordinary activities of daily life.
Mental illnesses fall along a continuum of severity: some are fairly mild and only interfere with some aspects of life, such as certain phobias. On the other end of the spectrum lie serious mental illnesses, which result in major functional impairment and interference with daily life. These include such disorders as major depression, schizophrenia, and bipolar disorder, and may require that the person receives care in a hospital.
It is important to know that mental illnesses are medical conditions that have nothing to do with a personâ€™s character, intelligence, or willpower. Just as diabetes is a disorder of the pancreas, mental illness is a medical c

KeyboardInterrupt: Interrupted by user

<h3><b>Model Technique</b></h3> <hr>

In [None]:
# tfidf_vectorizer = TfidfVectorizer()
# xtrain = tfidf_vectorizer.fit_transform(df['tokenized Questions'])
# # Xtrain is the preprocessed questions 

In [None]:
# from sklearn.preprocessing import LabelEncoder

# le = LabelEncoder()

# # Transform the Y 
# df['Answers_ID'] = le.fit_transform(df['Answers'])
# df.head()

# ytrain = df['Answers_ID'].values
# # ytrain is the transformed Answers 

In [None]:
df.head()

Unnamed: 0,Questions,Answers,tokenized Questions,Answers_ID
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...,what doe it mean to have a mental illness,48
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...,who doe mental illness affect,35
2,What causes mental illness?,It is estimated that mental illness affects 1 ...,what cause mental illness,36
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...,what are some of the warning sign of mental il...,72
4,Can people with mental illness recover?,"When healing from mental illness, early identi...",can people with mental illness recover,91


In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier

mnb = MultinomialNB()
mnb.fit(xtrain, ytrain)

rf = RandomForestClassifier()
rf.fit(xtrain, ytrain)

train_predict = mnb.predict(xtrain)
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(train_predict, ytrain))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1
           2       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         1
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         1
           8       1.00      1.00      1.00         1
           9       1.00      1.00      1.00         1
          10       1.00      1.00      1.00         1
          11       1.00      1.00      1.00         1
          12       1.00      1.00      1.00         1
          13       0.00      0.00      0.00         0
          14       1.00      1.00      1.00         1
          15       1.00      1.00      1.00         1
          16       1.00      1.00      1.00         1
          17       1.00    

In [None]:
def get_response(user_input):
    global results
    user_input_processed = preprocess_text(user_input) # ....................... Preprocess the user's input using the preprocess_text function

    user_input_vector = tfidf_vectorizer.transform([user_input_processed])# .... Vectorize the preprocessed user input using the TF-IDF vectorizer

    results = mnb.predict(user_input_vector)

    for elem in results:
        row_df = df.loc[df.isin([elem]).any(axis=1)]
        print(row_df['Answers'].values)

# create greeting list 
greetings = ["Hey There.... I am a creation of Ehiz Danny Agba Coder.... How can I help",
            "Hi Human.... How can I help",
            'Twale baba nla, wetin dey happen nah',
            'How far Alaye, wetin happen'
            "Good Day .... How can I help", 
            "Hello There... How can I be useful to you today",
            "Hi GomyCode Student.... How can I be of use"]

exits = ['thanks bye', 'bye', 'quit', 'exit', 'bye bye', 'close']
farewell = ['Thanks....see you soon', 'Babye, See you soon', 'Bye... See you later', 'Bye... come back soon']

random_farewell = random.choice(farewell) # ---------------- Randomly select a farewell message from the list
random_greetings = random.choice(greetings) # -------- Randomly select greeting message from the list

# Test your chatbot
while True:
    user_input = input("You: ")
    if user_input.lower() in exits:
        print(f"\nChatbot: {random_farewell}!")
        break
    if user_input.lower() in ['hi', 'hello', 'hey', 'hi there']:
        print(f"\nChatbot: {random_greetings}!")
    else:   
        response = get_response(user_input)
        print(f"\nChatbot: {response}")


Chatbot: How far Alaye, wetin happenGood Day .... How can I help!
['If your beliefs , thoughts , feelings or behaviours have a significant impact on your ability to function in what might be considered a normal or ordinary way, it would be important to seek help.']

Chatbot: None
['Mental illnesses are health conditions that disrupt a personâ€™s thoughts, emotions, relationships, and daily functioning. They are associated with distress and diminished capacity to engage in the ordinary activities of daily life.\nMental illnesses fall along a continuum of severity: some are fairly mild and only interfere with some aspects of life, such as certain phobias. On the other end of the spectrum lie serious mental illnesses, which result in major functional impairment and interference with daily life. These include such disorders as major depression, schizophrenia, and bipolar disorder, and may require that the person receives care in a hospital.\nIt is important to know that mental illnesses a