In [2]:
import os
import json
import numpy as np
import pandas as pd

import nltk
from nltk.stem.porter import PorterStemmer
from collections import Counter

from sklearn.preprocessing import LabelEncoder

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

device = ('cuda' if torch.cuda.is_available() else 'cpu')
device 

'cpu'

## Data Loading and Exploration

In [33]:
with open('intents.json','r') as json_file:
    data = json.load(json_file)

In [34]:
for intent in data['intents']:
    print(intent.keys())
    print('Tag : ',intent['tag'],'','Patterns : ',intent['patterns'],'','Responses : ',intent['responses'], sep='\n')
    break

dict_keys(['tag', 'patterns', 'responses'])
Tag : 
greeting

Patterns : 
['Hi', 'Hey', 'How are you', 'Is anyone there?', 'Hello', 'Good day']

Responses : 
['Hey :-)', 'Hello, thanks for visiting', 'Hi there, what can I do for you?', 'Hi there, how can I help?']


In [35]:
vocab = []
X_train = []
y_train = []
##-- parsing data
for intent in data['intents']:
    for pattern in intent['patterns']:
        X_train.append(pattern)
        y_train.append(intent['tag'])

In [36]:
df = pd.DataFrame(data={'Patterns':X_train, 'label':y_train}).sample(frac=1.0)
df.head()

Unnamed: 0,Patterns,label
15,What do you sell?,items
22,When do I get my delivery?,delivery
3,Is anyone there?,greeting
8,Goodbye,goodbye
25,Do you know a joke?,funny


## Tokenization

In [8]:
#nltk.load('punkt')
stemmer = PorterStemmer()

def tokenize(sentence):
    return nltk.word_tokenize(sentence)

def stem(word):
    return stemmer.stem(word.lower())

In [9]:
#--- tokens
tokens = []
null = df['Patterns'].apply(lambda x: tokens.extend(tokenize(x)))
print('Show : ', tokens[:5], ' , Size : ',len(tokens))

Show :  ['Do', 'you', 'take', 'credit', 'cards']  , Size :  103


In [10]:
#--- stemming & filtering
ignore_words = ['?','!','.',',']

filtered_tokens = [stem(tok) for tok in tokens if tok not in ignore_words]
print('Show : ', filtered_tokens[:5])

Show :  ['do', 'you', 'take', 'credit', 'card']


In [11]:
word_index = {}
index_word = {}

frequent_tokens = Counter(filtered_tokens).most_common()

for idx, (token, _) in enumerate(frequent_tokens) :
    word_index[token] = idx + 1
    index_word[idx+1] = token

In [12]:
word_index.keys()

dict_keys(['you', 'do', 'take', 'how', 'a', 'are', 'thank', 'long', 'doe', 'what', 'joke', 'i', 'deliveri', 'item', 'there', "'s", 'tell', 'me', 'credit', 'card', 'ship', 'sell', 'hi', 'hello', 'goodby', 'know', 'when', 'get', 'my', 'see', 'later', 'bye', 'kind', 'of', 'which', 'have', 'can', 'pay', 'with', 'paypal', 'lot', 'hey', 'cash', 'onli', 'good', 'day', 'that', 'help', 'accept', 'mastercard', 'someth', 'funni', 'is', 'anyon'])

In [15]:
def bag_of_words_sentence(word_index, sentence):
    bag_of_words = np.zeros((len(word_index)+1,),dtype=np.long)
    
    tokens = tokenize(sentence)
    filtered_tokens = [stem(tok) for tok in tokens if tok not in ignore_words]
    
    for tok in filtered_tokens:
        if tok in word_index.keys():
            idx = word_index[tok]
            bag_of_words[idx] = 1
    return bag_of_words

In [16]:
bag_of_words_sentence(word_index,'how are you?')

array([0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [17]:
def bag_documents(word_index, documents):
    bag_docs = []
    for sentence in documents:
        bag_sentence = bag_of_words_sentence(word_index, sentence)
        bag_docs.append(bag_sentence)
    return np.array(bag_docs, np.float32)

In [18]:
#--- Get X_train by bag of words
X_train = bag_documents(word_index, df['Patterns'].tolist())

In [19]:
#--- Get y_train by encode the labels
label_encoder = LabelEncoder()
df['target'] = label_encoder.fit_transform(df['label'])
y_train = df['target'].values

In [20]:
X_train.shape, y_train.shape

((26, 55), (26,))

## Dataset, Dataloader

In [21]:
class ChatbotDataset(Dataset):
    def __init__(self, X_train,y_train):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.length = len(y_train)
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, index):
        return self.X_train[index, :], self.y_train[index]

In [22]:
BATCH_SIZE = 2
train_dataset = ChatbotDataset(X_train,y_train)
train_loader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True)

In [24]:
##-- check train loader
for x,y in train_loader:
    print(x,y)
    break

tensor([[0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.]]) tensor([3, 2], dtype=torch.int32)


## Model

In [25]:
class ChatbotModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        #-- very simple linear model
        self.l1 = nn.Linear(input_size,hidden_size)
        self.l2 = nn.Linear(hidden_size,hidden_size)
        self.l3 = nn.Linear(hidden_size,output_size)
        self.softmax = nn.Softmax(dim=1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        
        out = self.l2(out)
        out = self.relu(out)
        
        out = self.l3(out)
 
        return out   

In [26]:
input_size = X_train.shape[1]
hidden_size = 128
n_classes = label_encoder.classes_.shape[0]

model = ChatbotModel(input_size= input_size, hidden_size = hidden_size, output_size = n_classes).to(device)

## Training

In [28]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [29]:
num_epochs = 75
# Train the model
for epoch in range(num_epochs):
    for bag_words, labels in train_loader:
        bag_words = bag_words.to(device)
        labels = labels.to(device, dtype=torch.long)
        
 
        outputs = model(bag_words)  
 
        loss = criterion(outputs, labels)
        
 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 5 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [5/75], Loss: 1.6685
Epoch [10/75], Loss: 0.3487
Epoch [15/75], Loss: 0.1166
Epoch [20/75], Loss: 0.0275
Epoch [25/75], Loss: 0.0107
Epoch [30/75], Loss: 0.0185
Epoch [35/75], Loss: 0.0067
Epoch [40/75], Loss: 0.0100
Epoch [45/75], Loss: 0.0024
Epoch [50/75], Loss: 0.0007
Epoch [55/75], Loss: 0.0010
Epoch [60/75], Loss: 0.0014
Epoch [65/75], Loss: 0.0017
Epoch [70/75], Loss: 0.0007
Epoch [75/75], Loss: 0.0006


---

## Saving Trained Model

In [32]:
data_to_save = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": n_classes,
"all_words": word_index
}

FILE = "data.pth"
torch.save(data_to_save, FILE)

print(f'training complete. file saved to {FILE}')

training complete. file saved to data.pth


## Chat Module

In [37]:
def chat(model):
    model.eval()
    print('--- Lets Chat ---')
    while True:
        print('')
        print('-'*47)
        sentence = input('You: ')
        if sentence in ['quit', 'q']:
            break

        x = bag_of_words_sentence(word_index, sentence)
        x = torch.tensor(x, dtype=torch.float32)
        output = 0
        with torch.no_grad():
            output = model(x.unsqueeze(0))   
        
        output = torch.softmax(output.cpu().detach(), dim=1)
        
        #print('----','Mean : ', output.mean(),'Max : ',output.max(),'Min : ',output.min())
        confidence =  (output.max()).numpy()
        
        if confidence < 0.7:
            print('Robo :', "I'm Sorry Can you give me more details?")
            print('----!%','Confidence : ', confidence)
            continue

        value, p_class_idx = torch.max(output, dim=1)
        #print(value)
        tag_class = label_encoder.classes_[p_class_idx]
        #print('Robo: ', label_encoder.classes_[p_class_idx])
        
        for intent in data['intents']:
            if intent['tag'] == tag_class:
                print('Robo:', np.random.choice(intent['responses']) ,f'\t [confidence:{confidence}]')
                
chat(model)

--- Lets Chat ---

-----------------------------------------------
You: hello
Robo: Hi there, what can I do for you? 	 [confidence:0.9986945986747742]

-----------------------------------------------
You: what do you sell?
Robo: We have coffee and tea 	 [confidence:0.9990430474281311]

-----------------------------------------------
You: how long does it take to deliver?
Robo: Delivery takes 2-4 days 	 [confidence:0.9972363114356995]

-----------------------------------------------
You: $!^%@^!#
Robo : I'm Sorry Can you give me more details?
----!% Confidence :  0.6546845

-----------------------------------------------
You: XZXCZCWQE
Robo : I'm Sorry Can you give me more details?
----!% Confidence :  0.6546845

-----------------------------------------------
You: tell me a joke
Robo: What did the buffalo say when his son left for college? Bison. 	 [confidence:0.9998366832733154]

-----------------------------------------------
You: thank you bye
Robo: My pleasure 	 [confidence:0.91826

---

# What's Next, (Improvements)
    * We need more data & patterns of interest depending on Use Cases, The Data we trained on is very small and limited.
    * We can use sequential model because this is a very simple bag of words dependent model
    , it cannot analyze context or sequence meaning.

## Reference

* This was done following a tutorial by Python Engineer
https://www.youtube.com/watch?v=RpWeNzfSUHw