### Importing Libraries

In [30]:
import numpy as np
import nltk
import random
import json
import import_ipynb
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from nlp_preprocess import bag_of_words, tokenize, stem
from neural_net import NeuralNet



In [31]:
#Reading the Intents file

with open('intents.json', 'r') as f:
    intents = json.load(f)

In [32]:
#Intents has different tags denoting to emotions, patterns: different inputs that it can expect out from the user
#responses: numerous outputs that it can generate

intents

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi',
    'Hey',
    'How are you',
    'Is anyone there?',
    'Hello',
    'Good day'],
   'responses': ['Hey there, Welcome to Therapize :)',
    'Hello, Welcome to Therapize :)',
    'Hi there, what can I do for you? :)',
    'Hi there, how can I help? :)']},
  {'tag': 'goodbye',
   'patterns': ['Bye', 'See you later', 'Goodbye', 'Buh-Bye', 'Goodnight'],
   'responses': ['See you later, thanks for visiting Therapize',
    'Have a nice day :)',
    'Bye! Come back again soon :)',
    'Hope we were of any help, see you soon!']},
  {'tag': 'thanks',
   'patterns': ['Thanks',
    'Thank you',
    "That's helpful",
    "Thank's a lot!",
    'Cheers'],
   'responses': ['Happy to help :)',
    'Any time :)',
    'My pleasure :)',
    'Glad we could be of some help :)']},
  {'tag': 'about',
   'patterns': ['What is Therapize about?',
    'What does therapize provide?',
    'Who are you, what do you do?',
    'Who am I talking tp, who are you

### Preprocessing

In [33]:
#Three lists to store all words, the tags, pairs for words with tags
vocabulary = []
tags = []
w_t = []


#looping through each line in the intents:
for intent in intents['intents']:
    tag = intent['tag']
    #appending to tags list
    tags.append(tag)
    for pattern in intent['patterns']:
        #tokenize each word in the pattern sequences to store as a word
        w = tokenize(pattern)
        #extending all the tokenized wordds to vocab
        vocabulary.extend(w)
        #the word paired with it's associated tag
        w_t.append((w, tag))

#stemming and turning words into lower case
#words to ignore in all the expressions

ignore_words = ['?', '.', '!','%','#','@','^','*']
vocabulary = [stem(w) for w in vocabulary if w not in ignore_words]

#To remove duplicates and sort the words
vocabulary = sorted(set(vocabulary))
tags = sorted(set(tags))

print(w_t,":",len(w_t), "Patterns",'\n')
print(tags,":",len(tags),"Tags",'\n')
print(vocabulary,":", len(vocabulary), "Vocabulary made of stemmed tokenized words",'\n')


[(['Hi'], 'greeting'), (['Hey'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['Buh-Bye'], 'goodbye'), (['Goodnight'], 'goodbye'), (['Thanks'], 'thanks'), (['Thank', 'you'], 'thanks'), (['That', "'s", 'helpful'], 'thanks'), (['Thank', "'s", 'a', 'lot', '!'], 'thanks'), (['Cheers'], 'thanks'), (['What', 'is', 'Therapize', 'about', '?'], 'about'), (['What', 'does', 'therapize', 'provide', '?'], 'about'), (['Who', 'are', 'you', ',', 'what', 'do', 'you', 'do', '?'], 'about'), (['Who', 'am', 'I', 'talking', 'tp', ',', 'who', 'are', 'you'], 'about'), (['What', 'is', 'Anxiety'], 'Anxiety'), (['How', 'do', 'I', 'know', 'if', 'I', 'have', 'anxiety', 'disorder'], 'Anxiety'), (['How', 'would', 'I', 'know', 'when', 'to', 'visit', 'a', 'therapist', 'for', 'help', 'for', 'my', 'anxiety'], 'Anxiety'), (['Is', 

# Preparation of Training Data

In [34]:
#train tests for features and labels
X_train = []
y_train = []

for (pattern_sequence, tag) in w_t:
    #X_train[] for all b_o_g as features
    bag = bag_of_words(pattern_sequence, vocabulary)
    X_train.append(bag)
    #y_train[] storing all the indexes of tags denoting emotion
    label = tags.index(tag)
    y_train.append(label)
    
X_train = np.array(X_train)
y_train = np.array(y_train)

#Hyper-parameters for the network

num_epochs = 1000
batch_size = 8
learning_rate = 0.001
#the input-size can be the len of any b_o_g in the X train as they are of the same size
#output-size is the total labels/emotions possible 
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)
print(input_size, output_size) 


83 8


# Loaders

In [35]:
class loaderdata(Dataset):

    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples

dataset = loaderdata()

train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Simple ANN with one hidden layer
model = NeuralNet(input_size, hidden_size, output_size).to(device)

#Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

## Training the Model


In [36]:
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words =  words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        #Calling the model for forward-pass
        outputs = model(words)
        #Loss criteria
        loss = criterion(outputs, labels)
        
        #Optimizing through backward-pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


print(f'The Final Loss: {loss.item():.4f}')

#defining schema for data to be stored
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"vocabulary": vocabulary,
"tags": tags
}

#model with trained weights stored as .pth file
FILE = "modeltrained.pth"
torch.save(data, FILE)

print(f'Training is complete. file saved to {FILE}')

Epoch [100/1000], Loss: 0.6226
Epoch [200/1000], Loss: 0.0801
Epoch [300/1000], Loss: 0.0055
Epoch [400/1000], Loss: 0.0046
Epoch [500/1000], Loss: 0.0036
Epoch [600/1000], Loss: 0.0015
Epoch [700/1000], Loss: 0.0009
Epoch [800/1000], Loss: 0.0012
Epoch [900/1000], Loss: 0.0003
Epoch [1000/1000], Loss: 0.0001
The Final Loss: 0.0001
Training is complete. file saved to modeltrained.pth
