In [1]:
import nltk
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:
from nltk.stem.porter import *
stemmer = PorterStemmer()

In [4]:
def tokenize(sentence):
  return nltk.word_tokenize(sentence)

In [5]:
def stem(word):
  return stemmer.stem(word.lower())

In [6]:
words = ['connection', 'connecting', 'connected']

stemmed_words = [stem(w) for w in words]

print(stemmed_words)

['connect', 'connect', 'connect']


In [7]:
def bag_of_words(tokenized_sentence, all_words):

  tokenized_sentence = [stem(w) for w in tokenized_sentence]
  bag = np.zeros(len(all_words), dtype = np.float32)

  for idx, w in enumerate(all_words):
    if w in tokenized_sentence:
      bag[idx] = 1.0
  
  return bag

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
import json

In [10]:
with open('/content/drive/MyDrive/Colab Notebooks/Chat Bot Data/intents.json', 'r') as f:
  intents = json.load(f)

In [11]:
all_words = []
tags = []
xy = []

for intent in intents['intents']:
  tag = intent['tag']
  tags.append(tag)

  for pattern in intent['patterns']:
    w = tokenize(pattern)
    all_words.extend(w)
    xy.append((w,tag))

ignore_words = ['?', '.', ',', '!']

all_words = [stem(w) for w in all_words if w not in ignore_words]

all_words = sorted(set(all_words))
tags = sorted(set(tags))

In [12]:
X_train = []
y_train = []

for (pattern_sentence,tag) in xy:
  bag = bag_of_words(pattern_sentence, all_words)
  X_train.append(bag)
  
  labels = tags.index(tag)
  y_train.append(labels)         #Since we have to use cross-entropy loss function we are not using one-hot encoded vector


In [13]:

 
X_train = np.array(X_train)
y_train = np.array(y_train)

In [14]:
class ChatDataset(Dataset):

  def __init__(self):
    self.n_samples = len(X_train)
    self.x_data = X_train
    self.y_data = y_train

  def __getitem__(self, idx):
    return self.x_data[idx], self.y_data[idx]

  def __len__(self):
    return self.n_samples

batch_size = 8

dataset = ChatDataset()
train_loader = DataLoader(dataset = dataset, batch_size = batch_size, shuffle = True)

In [15]:
class NeuralNetwork(nn.Module):

  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNetwork, self).__init__()
    self.l1 = nn.Linear(input_size, hidden_size)
    self.l2 = nn.Linear(hidden_size, hidden_size)
    self.l3 = nn.Linear(hidden_size,num_classes)
    self.relu = nn.ReLU()
      
  def forward(self, x):
    out = self.l1(x)
    out = self.relu(out)
    out = self.l2(out)
    out = self.relu(out)
    out = self.l3(out)
        # no activation and no softmax at the end
    return out  





In [16]:
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)
print(input_size, output_size)
print(output_size, tags)

54 7
7 ['delivery', 'funny', 'goodbye', 'greeting', 'items', 'payments', 'thanks']


In [17]:
device = torch.device('cuda')
model = NeuralNetwork(input_size, hidden_size, output_size).to(device)


In [18]:
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [19]:
learning_rate = 0.001
epochs = 1000

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)



In [20]:
for epoch in range(epochs):
  for(words,labels) in train_loader:
    words = words.to(device)
    labels = labels.to(device)

    outputs = model(words)
    loss = loss_fn(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')


print(f'final loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 1.2135
Epoch [100/1000], Loss: 0.8889
Epoch [100/1000], Loss: 0.8092
Epoch [100/1000], Loss: 1.1286
Epoch [200/1000], Loss: 0.1526
Epoch [200/1000], Loss: 0.4909
Epoch [200/1000], Loss: 0.2768
Epoch [200/1000], Loss: 0.5505
Epoch [300/1000], Loss: 0.0572
Epoch [300/1000], Loss: 0.0723
Epoch [300/1000], Loss: 0.0529
Epoch [300/1000], Loss: 0.0155
Epoch [400/1000], Loss: 0.0143
Epoch [400/1000], Loss: 0.0125
Epoch [400/1000], Loss: 0.0235
Epoch [400/1000], Loss: 0.0146
Epoch [500/1000], Loss: 0.0079
Epoch [500/1000], Loss: 0.0086
Epoch [500/1000], Loss: 0.0065
Epoch [500/1000], Loss: 0.0074
Epoch [600/1000], Loss: 0.0035
Epoch [600/1000], Loss: 0.0044
Epoch [600/1000], Loss: 0.0049
Epoch [600/1000], Loss: 0.0043
Epoch [700/1000], Loss: 0.0013
Epoch [700/1000], Loss: 0.0037
Epoch [700/1000], Loss: 0.0031
Epoch [700/1000], Loss: 0.0021
Epoch [800/1000], Loss: 0.0012
Epoch [800/1000], Loss: 0.0020
Epoch [800/1000], Loss: 0.0017
Epoch [800/1000], Loss: 0.0034
Epoch [9

In [21]:
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

In [22]:
FILE = "data.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')


training complete. file saved to data.pth


Testing the model

In [23]:
import random

In [24]:
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

In [25]:

model.load_state_dict(model_state)
model.eval()


NeuralNetwork(
  (l1): Linear(in_features=54, out_features=8, bias=True)
  (l2): Linear(in_features=8, out_features=8, bias=True)
  (l3): Linear(in_features=8, out_features=7, bias=True)
  (relu): ReLU()
)

In [None]:
bot_name = "Sam"
print("Let's chat! (type 'quit' to exit)")
while True:
    # sentence = "do you use credit cards?"
    sentence = input("You: ")
    if sentence == "quit":
        break

    sentence = tokenize(sentence)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)

    output = model(X)
    _, predicted = torch.max(output, dim=1)

    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                print(f"{bot_name}: {random.choice(intent['responses'])}")
    else:
        print(f"{bot_name}: I do not understand...")

Let's chat! (type 'quit' to exit)
You: Hi
Sam: Hey :-)
You: What do you sell
Sam: We sell coffee and tea
You: Are you cash only?
Sam: We accept VISA, Mastercard and Paypal
You: How long does delivery take?
Sam: Shipping takes 2-4 days
You: Thank you
Sam: Any time!
