In [1]:
import json
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from nltk_utils import tokenize, stem, bag_of_words
# from model import NeuralNet
import pickle

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\WINDOWS\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [31]:
PATH_INTENTS = '../../data/intents.json'
PATH_MODEL = 'torch_model.h5'
PATH_MODEL_PKL = 'torch_model.pkl'
PATH_MODEL_PTH = 'torch_model.pth'
PATH_METADATA = 'torch_metadata.json'
PATH_DATA = 'torch_data.pth'

In [3]:
# Open file json
with open(PATH_INTENTS, 'r') as f:
    intents = json.load(f)

In [4]:
# Define list
all_words = []  # Setence
tags = []       # Tags of setence
xy = []         # X and Y of setence

In [5]:
# Getting setence, tags, xy
for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    
    for pattern in intent['patterns']:
        w = tokenize(pattern)
        all_words.extend(w)
        xy.append((w, tag))

In [6]:
# Stemming
ignore_words = ['?', '!', '.', ',']
all_words = [stem(w) for w in all_words if w not in ignore_words] 

In [7]:
# Remove duplicate word
all_words = sorted(set(all_words))      
tags = sorted(set(tags)) 
# print(tags)

In [8]:
# Define train data
x_train = []
y_train = []
for (pattern_setence, tag) in xy:
    # Getting the feature
    bag = bag_of_words(pattern_setence, all_words)
    x_train.append(bag)
    
    # Getting label from index of tag
    label = tags.index(tag)
    y_train.append(label)   # CrossEntropyLoss

In [9]:
# Convert to array
x_train = np.array(x_train)
y_train = np.array(y_train)

In [10]:
# Create custom class dataset
class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(x_train)
        self.x_data = x_train
        self.y_data = y_train
    
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.n_samples

In [11]:
# Define Hyperparameter
batch_size = 8
hidden_size = 8
input_size = len(x_train[0])
output_size = len(tags)
learning_rate = 0.001
num_epochs = 1000
# print(input_size, len(all_words))
# print(output_size, tags)

In [12]:
# Create dataset
dataset = ChatDataset()
# train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=2)
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)
# train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers = 2, persistent_workers=True)

In [13]:
for (words, labels) in train_loader:
    first_word = words
    first_label = labels
    print(f'words \t: {words}')
    print(f'labels \t: {labels}')
    if first_word != None and first_label != None:
        break

words 	: tensor([[0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
labels 	: tensor([47, 22, 68,  6, 60, 44, 70, 66], dtype=torch.int32)


In [14]:
all_words[0:5]

["'ll", "'m", "'re", "'s", "'ve"]

In [15]:
xy[0:3]

[(['Hi'], 'greeting'),
 (['Hey'], 'greeting'),
 (['Is', 'anyone', 'there', '?'], 'greeting')]

In [16]:
len(tags)

80

In [17]:
first_word.shape

torch.Size([8, 279])

In [18]:
first_label

tensor([47, 22, 68,  6, 60, 44, 70, 66], dtype=torch.int32)

In [19]:
x_train.shape

(232, 279)

In [20]:
y_train.shape

(232,)

In [21]:
# Checking cuda
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [22]:
torch.cuda.is_available()

False

In [23]:
# Create Feed Forward Neural Network Algorithm
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        # First linear layer
        out = self.l1(x)
        out = self.relu(out)
        
        # Second linear layer
        out = self.l2(out)
        out = self.relu(out)
        
        # Third linear layer
        out = self.l3(out)    # no activation and no softmax
        return out

In [24]:
# Create model
model = NeuralNet(input_size, hidden_size, output_size).to(device)

# Loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [25]:
# Training model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device, dtype=torch.int64)
        
        # Forward
        outputs = model(words)
        loss = criterion(outputs, labels)
        
        # Backward and optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Print loss per 100 epoch
    if (epoch + 1) % 100 == 0:
        print(f'epoch : {epoch+1}/{num_epochs}, loss={loss.item():.4f}')

# Print final loss when training done
print(f'final loss, loss={loss.item():.4f}')

epoch : 100/1000, loss=0.2089
epoch : 200/1000, loss=0.0371
epoch : 300/1000, loss=0.0050
epoch : 400/1000, loss=0.0005
epoch : 500/1000, loss=0.0001
epoch : 600/1000, loss=0.0000
epoch : 700/1000, loss=0.0000
epoch : 800/1000, loss=0.0000
epoch : 900/1000, loss=0.0000
epoch : 1000/1000, loss=0.0000
final loss, loss=0.0000


In [26]:
# # Save data
# data = {
#     'model_state' : model.state_dict(),
#     'input_size' : input_size,
#     'output_size' : output_size,
#     'hidden_size' : hidden_size,
#     'all_words' : all_words,
#     'tags' : tags,
# }

# # Save file of data
# torch.save(data, PATH_DATA)
# print(f'Training complete. file saved to {PATH_DATA}')

Training complete. file saved to torch_data.pth


In [32]:
# Save model to pth
torch.save(model, PATH_MODEL_PTH)

In [29]:
# Save metadata to json
metadata = {
    # 'model_state' : model.state_dict(),
    'input_size' : input_size,
    'output_size' : output_size,
    'hidden_size' : hidden_size,
    'all_words' : all_words,
    'tags' : tags,
}

with open(PATH_METADATA, 'w') as f:
    json.dump(metadata, f)
print('Training complete. Metadata saved.')

Training complete. Metadata saved.


In [27]:
# Save the entire model to a .pkl file using pickle
with open(PATH_MODEL_PKL, 'wb') as f:
    pickle.dump(model, f)