In [15]:
import nltk
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity      
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
# import spacy
lemmatizer = nltk.stem.WordNetLemmatizer()
# Download required NLTK data
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/titokarika/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/titokarika/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/titokarika/nltk_data...


True

In [16]:
data = pd.read_csv('Mental_Health_FAQ.csv')
data.drop('Question_ID', axis = 1, inplace = True)
data

Unnamed: 0,Questions,Answers
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,Can people with mental illness recover?,"When healing from mental illness, early identi..."
...,...,...
93,How do I know if I'm drinking too much?,Sorting out if you are drinking too much can b...
94,"If cannabis is dangerous, why are we legalizin...","Cannabis smoke, for example, contains cancer-c..."
95,How can I convince my kids not to use drugs?,You can't. But you can influence their capacit...
96,What is the legal status (and evidence) of CBD...,Cannabidiol or CBD is a naturally occurring co...


In [18]:
# Define a function for text preprocessing (including lemmatization)
def preprocess_text(text):
    global tokens
    # Identifies all sentences in the data
    sentences = nltk.sent_tokenize(text)
    
    # Tokenize and lemmatize each word in each sentence
    preprocessed_sentences = []
    for sentence in sentences:
        tokens = [lemmatizer.lemmatize(word.lower()) for word in nltk.word_tokenize(sentence) if word.isalnum()]
        # Turns to basic root - each word in the tokenized word found in the tokenized sentence - if they are all alphanumeric 
        # The code above does the following:
        # Identifies every word in the sentence 
        # Turns it to a lower case 
        # Lemmatizes it if the word is alphanumeric

        preprocessed_sentence = ' '.join(tokens)
        preprocessed_sentences.append(preprocessed_sentence)
    
    return ' '.join(preprocessed_sentences)


data['tokenized Questions'] = data['Questions'].apply(preprocess_text)
data.head(20)

Unnamed: 0,Questions,Answers,tokenized Questions
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...,what doe it mean to have a mental illness
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...,who doe mental illness affect
2,What causes mental illness?,It is estimated that mental illness affects 1 ...,what cause mental illness
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...,what are some of the warning sign of mental il...
4,Can people with mental illness recover?,"When healing from mental illness, early identi...",can people with mental illness recover
5,What should I do if I know someone who appears...,Although this website cannot substitute for pr...,what should i do if i know someone who appears...
6,How can I find a mental health professional fo...,Feeling comfortable with the professional you ...,how can i find a mental health professional fo...
7,What treatment options are available?,Just as there are different types of medicatio...,what treatment option are available
8,"If I become involved in treatment, what do I n...",Since beginning treatment is a big step for in...,if i become involved in treatment what do i ne...
9,What is the difference between mental health p...,There are many types of mental health professi...,what is the difference between mental health p...


In [19]:
corpus = data['tokenized Questions'].to_list()
corpus

['what doe it mean to have a mental illness',
 'who doe mental illness affect',
 'what cause mental illness',
 'can people with mental illness recover',
 'what should i do if i know someone who appears to have the symptom of a mental disorder',
 'how can i find a mental health professional for myself or my child',
 'what treatment option are available',
 'if i become involved in treatment what do i need to know',
 'what is the difference between mental health professional',
 'how can i find a mental health professional right for my child or myself',
 'if i become involved in treatment what do i need to know',
 'where else can i get help',
 'what should i know before starting a new medication',
 'if i feel better after taking medication doe this mean i am cured and can stop taking it',
 'how can i get help paying for my medication',
 'where can i go to find therapy',
 'where can i learn about type of mental health treatment',
 'what are the different type of mental health professional',

In [20]:
tfidf_vector = TfidfVectorizer()
v_corpus = tfidf_vector.fit_transform(corpus)
print(v_corpus)

  (0, 101)	0.36441743462193266
  (0, 128)	0.2595599997967565
  (0, 93)	0.4131356340148716
  (0, 216)	0.3064104920291124
  (0, 126)	0.4362655661041626
  (0, 110)	0.3782586235113424
  (0, 70)	0.4131356340148716
  (0, 228)	0.1765554179540623
  (1, 6)	0.5523407361335997
  (1, 231)	0.5066540480622876
  (1, 101)	0.39613691788888045
  (1, 128)	0.2821525222397721
  (1, 70)	0.44909562820041293
  (2, 36)	0.6958986765070116
  (2, 101)	0.5441013605412818
  (2, 128)	0.3875416915165623
  (2, 228)	0.2636098989594234
  (3, 189)	0.3888505175982014
  (3, 226)	0.3888505175982014
  (3, 208)	0.20619723379229024
  (3, 142)	0.5390782138680418
  (3, 192)	0.3888505175982014
  (3, 20)	0.27888228313399066
  (3, 101)	0.27888228313399066
  (3, 128)	0.19863672392259624
  :	:
  (95, 141)	0.42484474614924805
  (95, 112)	0.42484474614924805
  (95, 45)	0.42484474614924805
  (95, 74)	0.38970384828260646
  (95, 223)	0.3162701372700484
  (95, 134)	0.27709743719530117
  (95, 97)	0.20594459776752813
  (95, 33)	0.14762176669

In [21]:
input_text = "what are treatments available for mental illness"

In [22]:
user_input_processed = preprocess_text(input_text) # ....................... Preprocess the user's input using the preprocess_text function
user_input_processed

'what are treatment available for mental illness'

In [23]:
v_input = tfidf_vector.transform([user_input_processed])
print(v_input)

  (0, 228)	0.18747607328039523
  (0, 219)	0.43868971734114104
  (0, 128)	0.27561481888489653
  (0, 101)	0.3869581034075717
  (0, 82)	0.3253632005361528
  (0, 22)	0.539542552176249
  (0, 20)	0.3869581034075717


In [24]:
most_similar = cosine_similarity(v_input, v_corpus)
most_similar.argmax()
most_similar

array([[0.24565278, 0.23105381, 0.36677721, 0.29590953, 0.22366239,
        0.07449132, 0.15889103, 0.68238416, 0.18091143, 0.12567681,
        0.14583617, 0.18091143, 0.        , 0.02949201, 0.        ,
        0.10215391, 0.        , 0.24453844, 0.22880497, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.20818226,
        0.03242574, 0.0782142 , 0.31363764, 0.142731  , 0.10518517,
        0.        , 0.02933652, 0.02649492, 0.        , 0.        ,
        0.        , 0.02732962, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.07038295, 0.15684222, 0.12337691,
        0.12156161, 0.        , 0.08798003, 0.        , 0.09775343,
        0.19834517, 0.09946692, 0.05479921, 0.05479921, 0.03293875,
        0.03597838, 0.03513851, 0.04597598, 0.02606697, 0.02010757,
        0.0232432 , 0.        , 0.        , 0.        , 0.        ,
        0.18499455, 0.01911426, 0.09217361, 0.34

In [25]:
data['Answers'].iloc[most_similar.argmax()]

'Just as there are different types of medications for physical illness, different treatment options are available for individuals with mental illness. Treatment works differently for different people. It is important to find what works best for you or your child.'

In [26]:
def bot_response(user_input):
    user_input_processed = preprocess_text(user_input)
    v_input = tfidf_vector.transform([user_input_processed])
    most_similar = cosine_similarity(v_input, v_corpus)
    most_similar_index = most_similar.argmax()
    
    return data['Answers'].iloc[most_similar_index]

In [27]:
chatbot_greeting = [
    "Hello there, welcome to Orpheus Bot. pls ejoy your usage",
    "Hi user, This bot is created by oprheus, enjoy your usage",
    "Hi hi, How you dey my nigga",
    "Alaye mi, Abeg enjoy your usage",
    "Hey Hey, pls enjoy your usage"    
]

user_greeting = ["hi", "hello there", "hey", "hi there"]
exit_word = ['bye', 'thanks bye', 'exit', 'goodbye']


print(f'\t\t\t\t\tWelcome To Titos ChatBot\n\n')
while True:
    user_q = input('Pls ask your mental illness related question: ')
    if user_q in user_greeting:
        print(random.choice(chatbot_greeting))
    elif user_q in exit_word:
        print('Thank you for your usage. Bye')
        break
    else:
        responses = bot_response(user_q)
        print(f'ChatBot:  {responses}')

					Welcome To Titos ChatBot


ChatBot:  There are many types of mental health professionals. Finding the right one for you may require some research.
Thank you for your usage. Bye
