# Installing required packages

In [1]:
!pip install tensorflow

Collecting numpy~=1.19.2
  Using cached numpy-1.19.5-cp38-cp38-win_amd64.whl (13.3 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.21.1
    Uninstalling numpy-1.21.1:


You should consider upgrading via the 'F:\mentalhealthbotCanada\venv\Scripts\python.exe -m pip install --upgrade pip' command.


      Successfully uninstalled numpy-1.21.1
Successfully installed numpy-1.19.5


In [2]:
!pip install nltk



You should consider upgrading via the 'F:\mentalhealthbotCanada\venv\Scripts\python.exe -m pip install --upgrade pip' command.





# Importing required libraries

In [3]:
import json
import pickle
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Making lists to store words, classes and documents

In [4]:
vocabulary_list = []
categories_list = []
document_list = []

# List of characters to ignore

In [5]:
characters = ['?', '!']

# Opening and Loading the Dataset 

In [6]:
dataset = open('mentalhealthCanada.json').read()
intents = json.loads(dataset)

# Updating the vocabulary, category and documents lists

In [7]:
for intent in intents['intents']:
    for pattern in intent['patterns']:

        #Tokenizing each word from pattern into the vocabulary list
        w = nltk.word_tokenize(pattern)
        vocabulary_list.extend(w)
        
        #Adding documents into the collection (corpus)
        document_list.append((w, intent['tag']))

        #Adding unique tags into the categories list
        if intent['tag'] not in categories_list:
            categories_list.append(intent['tag'])

# Text pre-processing (lower case, remove non-alphanumeric characters and lemmatize text)

In [8]:
lemma = WordNetLemmatizer()
vocabulary = [lemma.lemmatize(w.lower()) for w in vocabulary_list if w not in characters]
vocabulary = sorted(list(set(vocabulary)))

# Sorting the categories list

In [9]:
categories = sorted(list(set(categories_list)))

# Documents list is a combination between intents and patterns

In [10]:
print (len(document_list), "documents")

177 documents


# Categories list is the intents

In [11]:
print (len(categories), "classes", categories)

57 classes ['alberta_support', 'anxiety_causes', 'anxiety_disorders', 'anxiety_symptoms', 'anxiety_treatment', 'bc_support', 'bipolar_causes', 'bipolar_disorder', 'bipolar_symptoms', 'bipolar_treatment', 'canada_helpline', 'canada_support', 'depression', 'depression_causes', 'depression_symptoms', 'depression_treatment', 'eating_causes', 'eating_disorder', 'eating_symptoms', 'eating_treatment', 'generalized_disorder', 'goodbye', 'greeting', 'helpline_select', 'helpline_select_app', 'helpline_select_group', 'manitoba_support', 'mental_health', 'mental_health_illness', 'mental_illness', 'mental_illness_causes', 'mental_illness_symptoms', 'mental_illness_types', 'mental_treatment', 'nb_support', 'noanswer', 'ns_support', 'obsessive_disorder', 'ontario_support', 'options', 'panic_disorder', 'personality_causes', 'personality_disorders', 'personality_symptoms', 'personality_treatment', 'phobias', 'post_disorder', 'post_symptoms', 'quebec_support', 'saskatchewan_support', 'schizophrenia', 's

# Vocabulary list is all the unique words

In [12]:
print (len(vocabulary), "Unique lemmatized words", vocabulary)

125 Unique lemmatized words ["'s", ',', '1', '2', '3', '4', 'a', 'about', 'alberta', 'and', 'anxiety', 'any', 'anyone', 'apps', 'are', 'available', 'awesome', 'better', 'bipolar', 'both', 'british', 'brunswick', 'bye', 'call', 'can', 'canada', 'care', 'cause', 'chat', 'chatting', 'columbia', 'concern', 'contact', 'could', 'crisis', 'day', 'depression', 'detail', 'different', 'disorder', 'do', 'eating', 'factor', 'for', 'generalized', 'get', 'give', 'good', 'goodbye', 'group', 'have', 'health', 'hello', 'help', 'helpful', 'helping', 'helpline', 'hey', 'hi', 'how', 'i', 'illness', 'in', 'information', 'is', 'issue', 'later', 'manitoba', 'me', 'mental', 'more', 'my', 'new', 'next', 'nice', 'nova', 'number', 'obsessive-compulsive', 'of', 'on', 'online', 'ontario', 'panic', 'peer', 'personality', 'phobia', 'phone', 'post-traumatic', 'provide', 'province', 'province-wise', 'quebec', 'regarding', 'resource', 'same', 'saskatchewan', 'schizophrenia', 'scotia', 'see', 'service', 'should', 'socia

# Converting the vocabulary and category lists from objects into byte-stream and storing in respective pickle files

In [13]:
pickle.dump(vocabulary,open('vocabulary.pkl','wb'))
pickle.dump(categories,open('categories.pkl','wb'))

# Creating a list for the training data

In [14]:
traininglist = []

# Creating and empty array to store the outputs

In [15]:
output_empty = [0] * len(categories)

# Training set will consist of bag of words for each text sentence

In [16]:
for doc in document_list:
    # Creating the bag of words list
    bagofwords = []
    
    # Creating a list of tokenized words for the pattern
    pattern_words = doc[0]
    
    # Lemmatizing each word - Creating base lemma words in an attempt to represent relative words
    pattern_words = [lemma.lemmatize(word.lower()) for word in pattern_words]
    
    # Creating the bag of words array with 1, if word match is found in the current pattern
    for w in vocabulary:
        bagofwords.append(1) if w in pattern_words else bagofwords.append(0)
    
    # Output will be '0' for each tag and '1' will be for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[categories.index(doc[1])] = 1
    traininglist.append([bagofwords, output_row])

# Shuffling the training data and transforming the list into a numpy array

In [17]:
random.shuffle(traininglist)
training = np.array(traininglist)

  training = np.array(traininglist)


# Creating the training and testing lists. X is the patterns, Y is the intents

In [18]:
X_train = list(training[:,0])
Y_train = list(training[:,1])
print("Training data created")

Training data created


# Creating the Chatbot model - 3 layers. First layer contains 128 neurons, second layer contains 64 neurons and third output layer contains number of neurons that are equal to number of intents for predicting the output intent with softmax

In [19]:
# Sequential model code adapted from 'Keras' documentation
# https://keras.io/guides/sequential_model/

botmodel = Sequential()
botmodel.add(Dense(128, input_shape=(len(X_train[0]),), activation='relu'))
botmodel.add(Dropout(0.5))
botmodel.add(Dense(64, activation='relu'))
botmodel.add(Dropout(0.5))
botmodel.add(Dense(len(Y_train[0]), activation='softmax'))

# Compiling the Chatbot model.

In [20]:
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
botmodel.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Fitting and saving the Chatbot model for future use

In [21]:
createmodel = botmodel.fit(np.array(X_train), np.array(Y_train), epochs=1000, batch_size=8, verbose=1)
botmodel.save('mentalhealthbot_model.h5', createmodel)
print("Model created")

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

# Summary of the model

In [22]:
botmodel.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               16128     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 57)                3705      
Total params: 28,089
Trainable params: 28,089
Non-trainable params: 0
_________________________________________________________________
