In [1]:
import nltk
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity      
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
# import spacy
lemmatizer = nltk.stem.WordNetLemmatizer()

# Download required NLTK data
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\flora\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\flora\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\flora\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [7]:
data = pd.read_csv('Mental_Health_FAQ.csv', sep = ',')
data

Unnamed: 0,Question_ID,Questions,Answers
0,1590140,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,2110618,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,6361820,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,9434130,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,7657263,Can people with mental illness recover?,"When healing from mental illness, early identi..."
...,...,...,...
93,4373204,How do I know if I'm drinking too much?,Sorting out if you are drinking too much can b...
94,7807643,"If cannabis is dangerous, why are we legalizin...","Cannabis smoke, for example, contains cancer-c..."
95,4352464,How can I convince my kids not to use drugs?,You can't. But you can influence their capacit...
96,6521784,What is the legal status (and evidence) of CBD...,Cannabidiol or CBD is a naturally occurring co...


In [9]:
data.drop('Question_ID',axis = 1,inplace = True)

In [11]:
# Define a function for text preprocessing (including lemmatization)
def preprocess_text(text):
    # Identifies all sentences in the data
    sentences = nltk.sent_tokenize(text)
    
    # Tokenize and lemmatize each word in each sentence
    preprocessed_sentences = []
    for sentence in sentences:
        tokens = [lemmatizer.lemmatize(word.lower()) for word in nltk.word_tokenize(sentence) if word.isalnum()]
        # Turns to basic root - each word in the tokenized word found in the tokenized sentence - if they are all alphanumeric 
        # The code above does the following:
        # Identifies every word in the sentence 
        # Turns it to a lower case 
        # Lemmatizes it if the word is alphanumeric

        preprocessed_sentence = ' '.join(tokens)
        preprocessed_sentences.append(preprocessed_sentence)
    
    return ' '.join(preprocessed_sentences)


data['tokenized Questions'] = data['Questions'].apply(preprocess_text)
data

Unnamed: 0,Questions,Answers,tokenized Questions
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...,what doe it mean to have a mental illness
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...,who doe mental illness affect
2,What causes mental illness?,It is estimated that mental illness affects 1 ...,what cause mental illness
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...,what are some of the warning sign of mental il...
4,Can people with mental illness recover?,"When healing from mental illness, early identi...",can people with mental illness recover
...,...,...,...
93,How do I know if I'm drinking too much?,Sorting out if you are drinking too much can b...,how do i know if i drinking too much
94,"If cannabis is dangerous, why are we legalizin...","Cannabis smoke, for example, contains cancer-c...",if cannabis is dangerous why are we legalizing it
95,How can I convince my kids not to use drugs?,You can't. But you can influence their capacit...,how can i convince my kid not to use drug
96,What is the legal status (and evidence) of CBD...,Cannabidiol or CBD is a naturally occurring co...,what is the legal status and evidence of cbd oil


In [12]:
xtrain = data['tokenized Questions'].to_list()
xtrain 

['what doe it mean to have a mental illness',
 'who doe mental illness affect',
 'what cause mental illness',
 'can people with mental illness recover',
 'what should i do if i know someone who appears to have the symptom of a mental disorder',
 'how can i find a mental health professional for myself or my child',
 'what treatment option are available',
 'if i become involved in treatment what do i need to know',
 'what is the difference between mental health professional',
 'how can i find a mental health professional right for my child or myself',
 'if i become involved in treatment what do i need to know',
 'where else can i get help',
 'what should i know before starting a new medication',
 'if i feel better after taking medication doe this mean i am cured and can stop taking it',
 'how can i get help paying for my medication',
 'where can i go to find therapy',
 'where can i learn about type of mental health treatment',
 'what are the different type of mental health professional',

In [13]:
# Vectorize corpus
tfidf_vectorizer = TfidfVectorizer()
corpus = tfidf_vectorizer.fit_transform(xtrain)

print(corpus)

  (0, 101)	0.36441743462193266
  (0, 128)	0.2595599997967565
  (0, 93)	0.4131356340148716
  (0, 216)	0.3064104920291124
  (0, 126)	0.4362655661041626
  (0, 110)	0.3782586235113424
  (0, 70)	0.4131356340148716
  (0, 228)	0.1765554179540623
  (1, 6)	0.5523407361335997
  (1, 231)	0.5066540480622876
  (1, 101)	0.39613691788888045
  (1, 128)	0.2821525222397721
  (1, 70)	0.44909562820041293
  (2, 36)	0.6958986765070116
  (2, 101)	0.5441013605412818
  (2, 128)	0.3875416915165623
  (2, 228)	0.2636098989594234
  (3, 189)	0.3888505175982014
  (3, 226)	0.3888505175982014
  (3, 208)	0.20619723379229024
  (3, 142)	0.5390782138680418
  (3, 192)	0.3888505175982014
  (3, 20)	0.27888228313399066
  (3, 101)	0.27888228313399066
  (3, 128)	0.19863672392259624
  :	:
  (95, 141)	0.42484474614924805
  (95, 112)	0.42484474614924805
  (95, 45)	0.42484474614924805
  (95, 74)	0.38970384828260646
  (95, 223)	0.3162701372700484
  (95, 134)	0.27709743719530117
  (95, 97)	0.20594459776752813
  (95, 33)	0.14762176669

In [14]:
user = input('Pls ask your question')
print(user)

what doe it mean to have a mental illness


In [15]:
preprocess_text(user)


'what doe it mean to have a mental illness'

In [16]:
# vectorize user input 
user_transformed = tfidf_vectorizer.transform([user])
print(user_transformed)

  (0, 228)	0.1765554179540623
  (0, 216)	0.3064104920291124
  (0, 128)	0.2595599997967565
  (0, 126)	0.4362655661041626
  (0, 110)	0.3782586235113424
  (0, 101)	0.36441743462193266
  (0, 93)	0.4131356340148716
  (0, 70)	0.4131356340148716


In [17]:
# find similarity 
similarity_scores = cosine_similarity(user_transformed, corpus)
similarity_scores

array([[1.        , 0.40313212, 0.3454121 , 0.17704296, 0.21063385,
        0.25849633, 0.06251536, 0.03379047, 0.10555576, 0.11835602,
        0.05737895, 0.10555576, 0.        , 0.02777408, 0.26372549,
        0.        , 0.11397   , 0.06517563, 0.09178474, 0.10489678,
        0.10280407, 0.08898973, 0.        , 0.        , 0.19605545,
        0.03053691, 0.07365815, 0.06767132, 0.13441679, 0.09905803,
        0.        , 0.02762764, 0.100104  , 0.09176701, 0.        ,
        0.        , 0.02573765, 0.        , 0.07349987, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.06628308, 0.06170926, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.07803851, 0.        , 0.05160711, 0.05160711, 0.03102004,
        0.03388261, 0.03309166, 0.04329784, 0.13722713, 0.10585443,
        0.02188926, 0.        , 0.        , 0.        , 0.        ,
        0.17421844, 0.11656428, 0.16276909, 0.32

In [19]:
data['Answers'].iloc[similarity_scores.argmax()]

'Mental illnesses are health conditions that disrupt a personâ€™s thoughts, emotions, relationships, and daily functioning. They are associated with distress and diminished capacity to engage in the ordinary activities of daily life.\nMental illnesses fall along a continuum of severity: some are fairly mild and only interfere with some aspects of life, such as certain phobias. On the other end of the spectrum lie serious mental illnesses, which result in major functional impairment and interference with daily life. These include such disorders as major depression, schizophrenia, and bipolar disorder, and may require that the person receives care in a hospital.\nIt is important to know that mental illnesses are medical conditions that have nothing to do with a personâ€™s character, intelligence, or willpower. Just as diabetes is a disorder of the pancreas, mental illness is a medical condition due to the brainâ€™s biology.\nSimilarly to how one would treat diabetes with medication and i

In [23]:
def collector():
    user = input('Pls ask your question: ')
    pre_user = preprocess_text(user)
    vect_user = tfidf_vectorizer.transform([pre_user])
    similarity_scores = cosine_similarity(vect_user, corpus)
    most_similar_index = similarity_scores.argmax()
    
    return data['Answers'].iloc[most_similar_index]

In [24]:
collector()

'Mental illnesses are health conditions that disrupt a personâ€™s thoughts, emotions, relationships, and daily functioning. They are associated with distress and diminished capacity to engage in the ordinary activities of daily life.\nMental illnesses fall along a continuum of severity: some are fairly mild and only interfere with some aspects of life, such as certain phobias. On the other end of the spectrum lie serious mental illnesses, which result in major functional impairment and interference with daily life. These include such disorders as major depression, schizophrenia, and bipolar disorder, and may require that the person receives care in a hospital.\nIt is important to know that mental illnesses are medical conditions that have nothing to do with a personâ€™s character, intelligence, or willpower. Just as diabetes is a disorder of the pancreas, mental illness is a medical condition due to the brainâ€™s biology.\nSimilarly to how one would treat diabetes with medication and i

In [25]:
def responder(text):
    user_input_processed = preprocess_text(user_input)
    vectorized_user_input = tfidf_vectorizer.transform([user_input_processed])
    similarity_scores = cosine_similarity(vectorized_user_input, corpus)
    argument_maximum = similarity_scores.argmax()
    return data['Answers'].iloc[argument_maximum]

bot_greetings = ['Hello user, You are chatting with Flora....How may i help you',
             'Hi Dear, i am here if you need me',
             'Hey, what do need me to do',
             'Hello i am here for you',
             'How can i be of help' ]

bot_farewell = ['Thanks for your usage....bye',
            'Alright dear...Hope to see you soon',
            'Hope i was of help to know...Bye',
            'Do you have anymore question you to ask....Bye',
            'Thanks for reaching out...Hope i answer all your question...Bye']

human_greeting = ['Hi', 'Hello There', 'Hiyya', 'Hey', 'hello', 'Wassup']

human_exits = ['Thanks bye', 'bye', 'quite', 'exit', 'bye bye', 'close']

import random
random_greeting = random.choice(bot_greetings)
random_farewell = random.choice(bot_farewell)

while True:
    user_input = input('You: ')

    if user_input.lower() in human_greeting:
        print(random_greeting)
    elif user_input.lower() in human_exits:
        print(random_farewell)
        break
    else:
      responder(user_input)


Hi Dear, i am here if you need me
Do you have anymore question you to ask....Bye
