#### Libraries

In [17]:
import json
import numpy as np
import pandas as pd
import nltk
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader



#### NLP methods

In [2]:
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

# we use tokenize to split our sentence into words
def tokenize(sentence):
    return nltk.word_tokenize(sentence)
    
# we use stemmer method to take "the root" of the word, for example reject "s" in plural words. 
# Stemmers remove morphological affixes from words, leaving only the word stem.
def stem(word):
    return stemmer.stem(word.lower())

# this method is similar to stemmer, but return word with knowledge of the context
def lemmatize(word):
    return lemmatizer.lemmatize(word.lower())

# we use bag of words method to convert sentence into vector with value 1 at the position where words appear
def bag_of_words(token_sentence,all_words):
    token_sentence = [lemmatize(word) for word in token_sentence]
    
    bag = np.zeros(len(all_words),dtype=np.float32)
    for index,word in enumerate(all_words):
        if word in token_sentence:
            bag[index]=1.0
            
    return bag
    

In [3]:
# example of tokenize
print("Tokenized sentence:",tokenize("My dog isn't good" ))

Tokenized sentence: ['My', 'dog', 'is', "n't", 'good']


In [4]:
# example of stem
words=["observe","observations","observative"]

stemmed_w=[stem(word) for word in words]
lemm_w=[lemmatize(word) for word in words]

print(f'Stemmed: {stemmed_w} and Lemmatizer: {lemm_w}')

Stemmed: ['observ', 'observ', 'observ'] and Lemmatizer: ['observe', 'observation', 'observative']


#### Load data

In [5]:
with open('intents.json','r') as f:
    intents = json.load(f)

#Let's see our dataset 
intents

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi there',
    'Hello',
    'Hola',
    'Hey',
    'Hi',
    'Hello',
    'Good morning',
    'Good Evening'],
   'responses': ['Hello, Welcome to Our IT Chatbot',
    'Good to see you, Welcome to our IT Chatbot',
    'Hi there, how can I help?'],
   'context': ['']},
  {'tag': 'goodbye',
   'patterns': ['Bye',
    'See you later',
    'Goodbye',
    'Nice chatting to you, bye',
    'Till next time',
    'Bye Chatbot'],
   'responses': ['See you!',
    'Have a nice day',
    'Bye! Come back again soon.',
    'Happy to help'],
   'context': ['']},
  {'tag': 'thanks',
   'patterns': ['Thanks',
    'Thank you',
    "That's helpful",
    'Awesome, thanks',
    'Thanks for helping me'],
   'responses': ['Happy to help! Any other issues?',
    'Any time! Any other issues I can help with?',
    'My pleasure! Any other Issues I can help with?'],
   'context': ['']},
  {'tag': 'noanswer',
   'patterns': ['q', 'random'],
   'responses': ["Sorry, 

In [6]:
all_words=[]
tags=[]
words_labeled=[] #word with corresponding tag

for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
        word = tokenize(pattern)  
        all_words.extend(word)    # extend instead of append, beacuse we don't want list os lists
        words_labeled.append((word,tag))     # word with meaning 

all_words

['Hi',
 'there',
 'Hello',
 'Hola',
 'Hey',
 'Hi',
 'Hello',
 'Good',
 'morning',
 'Good',
 'Evening',
 'Bye',
 'See',
 'you',
 'later',
 'Goodbye',
 'Nice',
 'chatting',
 'to',
 'you',
 ',',
 'bye',
 'Till',
 'next',
 'time',
 'Bye',
 'Chatbot',
 'Thanks',
 'Thank',
 'you',
 'That',
 "'s",
 'helpful',
 'Awesome',
 ',',
 'thanks',
 'Thanks',
 'for',
 'helping',
 'me',
 'q',
 'random',
 'How',
 'you',
 'could',
 'help',
 'me',
 '?',
 'What',
 'you',
 'can',
 'do',
 '?',
 'What',
 'help',
 'you',
 'provide',
 '?',
 'How',
 'you',
 'can',
 'be',
 'helpful',
 '?',
 'What',
 'support',
 'is',
 'offered',
 'What',
 'services',
 'do',
 'you',
 'provide',
 '?',
 'What',
 'can',
 'you',
 'help',
 'me',
 'with',
 'How',
 'to',
 'reset',
 'my',
 'password',
 '?',
 'Iâ€™m',
 'unable',
 'to',
 'log',
 'in',
 '!',
 'My',
 'Password',
 'is',
 'Lost',
 ',',
 'need',
 'to',
 'Reset',
 '.',
 'Open',
 'password',
 'reset',
 'module',
 'Reset',
 'my',
 'password',
 'Forgot',
 'password',
 'how',
 'do',
 '

As we can observe, above we have all words but also punctuation signs. Good idea is to remove them from this list. We should also convert our words to row version (without endings) and remove duplicates.

In [7]:
signs=['?','!','.',',']

all_words=[lemmatize(word) for word in all_words if word not in signs]
all_words = sorted(set(all_words)) # aplly as a set for get unique values
all_words

["'s",
 '@',
 'a',
 'am',
 'anything',
 'awesome',
 'be',
 'blue',
 'bluetooth',
 'by',
 'bye',
 'can',
 'canâ€™t',
 'chatbot',
 'chatting',
 'closed',
 'computer',
 'connect',
 'could',
 'dead',
 'death',
 'deleted',
 'departmentâ€™s',
 'detected',
 'device',
 'do',
 'document',
 'doe',
 'down',
 'dreaded',
 'dropping',
 'evening',
 'fast',
 'file',
 'find',
 'folder',
 'for',
 'forgot',
 'gmail.com',
 'good',
 'goodbye',
 'got',
 'ha',
 'have',
 'hello',
 'help',
 'helpful',
 'helping',
 'hey',
 'hi',
 'hola',
 'hotmail.in',
 'how',
 'i',
 'id',
 'important',
 'in',
 'internet',
 'is',
 'isnâ€™t',
 'issue',
 'iâ€™m',
 'iâ€™ve',
 'just',
 'keep',
 'laptop',
 'later',
 'log',
 'login',
 'lost',
 'me',
 'mistake',
 'module',
 'morning',
 'mouse',
 'my',
 'neccesary',
 'need',
 'next',
 'nice',
 'not',
 'nothing',
 'of',
 'offered',
 'okay',
 'open',
 'outlook.com',
 'password',
 'pc',
 'pendrive',
 'print',
 'printer',
 'printing',
 'problem',
 'provide',
 'q',
 'random',
 'really',
 'r

Now we apply bag of words to our dataset.

Here is a short example of using bag of words method
![Bag of words](bag_of.png)

In [8]:
X_train=[]
y_train=[]

for (word,tag) in words_labeled:
    bag = bag_of_words(word,all_words)
    X_train.append(bag)
    
    label=tags.index(tag)
    y_train.append(label)
    
X_train = np.array(X_train)
y_train = np.array(y_train)

print(f'X_train: {X_train}')
print(f'y_train: {y_train}')

X_train: [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
y_train: [ 0  0  0  0  0  0  0  0  1  1  1  1  1  1  2  2  2  2  2  3  3  4  4  4
  4  4  4  4  5  5  5  5  5  5  5  5  5  6  6  6  6  7  7  8  8  8  8  8
  8  9  9  9 10 10 10 11 11 11 11 11 12 12 12 12 13 13 13 13 14 14 14 14
 15 15 15 15 15 15 16 16 16 16 16 17 17 17 17 17]


In [9]:
# it must inherit dataset from pytorch

class Chatbotdata(Dataset):
    def __init__(self):
        self.n_samples = len(X_train) #number of samples is equal to amount of X_train elements, so amount of sentences
        self.x_data = X_train
        self.y_data = y_train
        
    #dataset(index)
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.n_samples
    

We implemented it as a pytorch dataset, because we can automatically iterate over this and get batch training for further computations

In [10]:
#Dataset
batch_size=8
dataset = Chatbotdata()
train_loader = DataLoader(dataset = dataset, batch_size = batch_size, shuffle=True, num_workers=0)

#### Neural network model


![Neural Network](nn.png)

In [11]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, num_classes)
        
        #activation function
        self.relu = nn.ReLU()
        
    def forward(self, x):
        
        # input layer 
        output = self.layer1(x)
        output = self.relu(output)
        
        # hidden layer 
        output = self.layer2(output)
        output = self.relu(output)
        
        # output layer 
        output = self.layer3(output)
        # no softmax, because we apply cross-entropy loss later
        return output
    
    

If we have gpu support, then we can use it instead of cpu. It boost calculations in our neural network model.

In [12]:
#Hyperparameters:
batch_size = 8
hidden_size = 8
output_size = len(tags) # number of labels
input_size = len(X_train[0]) # all of the bog have the same size, we can just take first
learning_rate = 0.005
num_epochs=2000


print(f'input_size: {input_size}, output_size: {output_size}')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet(input_size, hidden_size, output_size).to(device)

input_size: 141, output_size: 18


In [13]:
# loss and optimizer 

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

#### Training

In [14]:
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device, dtype=torch.int64)
        
        # forward learning
        outputs = model(words)
        loss = criterion(outputs, labels)
        
        # backward learning and optimizer step
        optimizer.zero_grad() #apply this, because we don't want to sum gradient after epoch
        
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print(f'epoch={epoch+1}/{num_epochs}, loss={loss.item():.2f}')
print(f'final loss: {loss.item():.2f}') 

epoch=100/2000, loss=2.92
epoch=200/2000, loss=2.57
epoch=300/2000, loss=2.38
epoch=400/2000, loss=2.37
epoch=500/2000, loss=2.49
epoch=600/2000, loss=2.39
epoch=700/2000, loss=1.51
epoch=800/2000, loss=1.96
epoch=900/2000, loss=1.39
epoch=1000/2000, loss=1.55
epoch=1100/2000, loss=1.27
epoch=1200/2000, loss=0.64
epoch=1300/2000, loss=0.59
epoch=1400/2000, loss=0.21
epoch=1500/2000, loss=0.33
epoch=1600/2000, loss=0.27
epoch=1700/2000, loss=0.25
epoch=1800/2000, loss=0.07
epoch=1900/2000, loss=0.03
epoch=2000/2000, loss=0.14
final loss: 0.14


In [17]:
data = {
    "model_state":model.state_dict(),
    "input_size":input_size,
    "output_size":output_size,
    "hidden_size":hidden_size,
    "all_words":all_words,
    "tags":tags
}

In [18]:
FILE ='data.pth'
torch.save(data, FILE)
print(f'training complete, file saved to file {FILE}')

training complete, file saved to file data.pth


#### Chat


In [18]:
bot_name = "Bot"

print("Let's chat, end with writing quit")

while True:
    sentence = input("You: ")
    if sentence == "quit":
        break

    sentence = tokenize(sentence)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X)
    X = X.to(device)
    
    output = model(X)
    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    # we implement softmax in goal to get vector consists only 0 and 1 from probabilities values on the output of model
    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    # if the proba is large enough
    if prob.item() > 0.75:
        for intent in intents["intents"]:
            if tag == intent["tag"]:
                print(f'{bot_name}:{random.choice(intent["responses"])}')
    else:
        print(f'{bot_name}: I dont understand...')

Let's chat, end with writing quit
You: Hello
Bot:Good to see you, Welcome to our IT Chatbot
You: I have problem with my mouse
Bot:a) If you encounter a mouse problem, you should first try these options:
1) If it is a first-time issue, restarting your PC can resolve the issue instantly.
2) Confirm that the mouse or the wireless adaptor is firmly connected to the PC.
3) You may also try to unplug the mouse cable or the wireless adaptor and reconnect using a different port.
4)Check the mouse and the ports for damages and even try the mouse on a different computer.
4 If none of these solves the problem, you can now proceed to other solutions.
b) Troubleshoot Hardware and Devices
c) Updating Incompatible Mouse Drivers
d) Roll Back or Reinstall Mouse Drivers
e) Deactivate Enhanced Pointer Precision
f) Adjusting the Mouse Sensitivity
g) Configure Touchpad Delay
h) Disable Touchpad
You: I have also virus
Bot:Hey, I cant seems that you have not listed you Issue here.
 I can help you out with th