In [None]:
# install tflearn in google colab
!pip install -q tflearn

In [None]:
# install nltk in google colab
!pip install -q nltk

In [None]:
!pip install -q tensorflow

In [None]:
#Used in Tensorflow Model
import numpy as np
import tensorflow as tf
import tflearn
import random

#Used to for Contextualisation and Other NLP Tasks.
import nltk
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer(language='english')


import json #for import file
import pickle #for serializing the structure
import warnings
warnings.filterwarnings("ignore") #to ignore the warning

In [None]:
print("Processing the Intents.....")
# load the json file in google colab storage
with open('/content/assignmentNLP.json') as json_data: 
    intents = json.load(json_data)

Processing the Intents.....


In [None]:
#for later tokenize sentence
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
words = []
classes = []
documents = []
ignore_words = ['?','!','.']
print("Looping through the Intents to Convert them to words, classes, documents and ignore_words.......")
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

Looping through the Intents to Convert them to words, classes, documents and ignore_words.......


In [None]:
print("Stemming, Lowering and Removing Duplicates.......")
#change the word to lower case and stemming the words
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# remove duplicates
classes = sorted(list(set(classes)))

print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique stemmed words", words)

Stemming, Lowering and Removing Duplicates.......
216 documents
101 classes ['Age', 'Alopecia areata', 'BestWay', 'Biotin', 'COVIDTE', 'CSbaldness', 'Cancer', 'Cause', 'Covid', 'Depression', 'Diabetes', 'Dieting', 'ExcercideTE', 'ExcessVit', 'FTE', 'Fever', 'Gphase', 'Growingback', 'HCT', 'HTUC', 'HWAMHCPD', 'HairColoring', 'HairCutQ', 'HairFallingDown1', 'HairLife', 'Iron', 'LowBiotin', 'Lupus', 'MHairGrow', 'Mbalness', 'PBaldnessS', 'PSummerHL', 'Psoriasis', 'Rphase', 'STD', 'STE', 'Shampoo', 'StopHL', 'Stress', 'SummerHL', 'Supplement', 'TCSbaldness', 'TFFB', 'TFMB', 'Telogeneffluvium', 'Thyroid Issues', 'Vitamin A', 'VitaminC', 'VitaminD', 'WDTSGGO', 'WSD', 'WTE', 'WashHairQ', 'Wbalness', 'Zinc', 'avoid', 'baldness', 'deal', 'disease', 'doctor', 'doctorType', 'emotional trauma', 'factors', 'food', 'goodbye', 'greeting', 'growBack', 'hair styling', 'hairLossVit', 'hairline', 'healthCondition', 'hours', 'infections', 'issueTE', 'itchy', 'lessIron', 'medication', 'medicationsCause', '

In [None]:
print("Creating the Data for our Model.....")
training = []
output = []
print("Creating an List (Empty) for Output.....")
output_empty = [0] * len(classes)

print("Creating Traning Set, Bag of Words for our Model....")
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # stem each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    # create our bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

Creating the Data for our Model.....
Creating an List (Empty) for Output.....
Creating Traning Set, Bag of Words for our Model....


In [None]:
#Shuffling Randomly and Converting into Numpy Array for Faster Processing
random.shuffle(training)
training = np.array(training)

#Create train list 
train_x = list(training[:,0])
train_y = list(training[:,1])

#Clear the pattern graphics and reset the replacement graphics
from tensorflow.python.framework import ops
ops.reset_default_graph()


In [None]:
# Build neural network
net = tflearn.input_data(shape=[None, len(train_x[0])]) 
net = tflearn.fully_connected(net, 16)#the number of neurons in each layer are: 16
net = tflearn.fully_connected(net, 16)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax') 
net = tflearn.regression(net,optimizer='adam',
                             learning_rate=0.01)
print("Training....")

Training....


In [None]:
model = tflearn.DNN(net, tensorboard_verbose=3)
print("DOne")

DOne


In [None]:
print("Training the Model.......")
model.fit(train_x, train_y, n_epoch=500, batch_size=16, show_metric=True)
print("Saving the Model.......")
model.save('model.tflearn')


Training Step: 6999  | total loss: [1m[32m0.35882[0m[0m | time: 0.134s
| Adam | epoch: 500 | loss: 0.35882 - acc: 0.9552 -- iter: 208/216
Training Step: 7000  | total loss: [1m[32m0.39733[0m[0m | time: 0.147s
| Adam | epoch: 500 | loss: 0.39733 - acc: 0.9534 -- iter: 216/216
--
Saving the Model.......
INFO:tensorflow:/content/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


In [None]:
print("Pickle is also Saved..........")
pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( "training_data", "wb" ) )

Pickle is also Saved..........


In [None]:
print("Loading Pickle.....")
data = pickle.load( open( "training_data", "rb" ) )
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']


with open('/content/assignmentNLP.json') as json_data:
    intents = json.load(json_data)
    
print("Loading the Model......")
# load our saved model
model.load('./model.tflearn')

Loading Pickle.....
Loading the Model......
INFO:tensorflow:Restoring parameters from /content/model.tflearn


In [None]:
def clean_up_sentence(sentence):
    # It Tokenize or Break it into the constituents parts of Sentense.
    sentence_words = nltk.word_tokenize(sentence)
    # Stemming means to find the root of the word.
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# Return the Array of Bag of Words: True or False and 0 or 1 for each word of bag that exists in the Sentence
def bow(sentence, words, show_details=False):
    sentence_words = clean_up_sentence(sentence)
    bag = [0]*len(words)
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
    return(np.array(bag))

ERROR_THRESHOLD = 0.50
print("ERROR_THRESHOLD = 0.50")

def classify(sentence):
    results = model.predict([bow(sentence, words)])[0]
    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    print(results)
    for r in results:
        return_list.append((classes[r[0]], r[1])) #Tuppl -> Intent and Probability
    return return_list

def response(sentence, userID='123', show_details=False):
    results = classify(sentence)
    if results:
        while results:
            for i in intents['intents']:
                if i['tag'] == results[0][0]:
                    return print(random.choice(i['responses']))

            results.pop(0)

    else:
      print("Sorry I'm not understand your question please write it more details ")

ERROR_THRESHOLD = 0.50


In [None]:
while True:
    input_data = input("You- ")
    answer = response(input_data)
    answer
    if input_data == ("goodbye" or "bye" or "Goodbye"):
      break;


You- what is baldness
[[56, 0.9967301]]
Baldness usually refers to excessive hair loss on the scalp. Hereditary hair loss with age is the most common cause of baldness.
You- sfdsufhsdfsdf
[]
Sorry I'm not understand your question please write it more details 
You- goodbye
[[64, 0.99942845]]
See you later, thanks for visiting
