In [1]:
import json as js
import re

with open('E:/Ai project/.ipynb_checkpoints/instents.json', 'r') as f:
    data = js.load(f)['intents']

In [2]:
from nltk.stem import WordNetLemmatizer
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\mahan\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\mahan\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [3]:
def clean_words(word):
    
    word = re.sub("[^a-z0-9]*", "", word.lower())
    word = lemmatizer.lemmatize(word)
    
    return word

In [4]:
raw_dataset = []
token2idx = {'<PAD>' : 0, '<UNK>' : 1}
counter = 2

lemmatizer = WordNetLemmatizer()

for item in data:
    label = item['tag'].lower()
    
    for pattern in item['patterns']:
        sentence = ''
        
        for word in pattern.split():
            clean_word = clean_words(word)
            sentence += word + ''
            
            if clean_word not in token2idx.keys():
                token2idx[clean_word] = counter
                counter += 1
                
        raw_dataset.append(tuple([sentence.strip(), label]))

In [5]:
max_len = max([len(sent) for item in data for sent in item])

In [6]:
def tokenize_sent(sent, max_len, token2isx, PAD = 0, UNK = 1):
    
    token_list = []
    
    for token in sent.split():
        token = token2idx.get(clean_words(token), UNK)
        token_list.append(token)
        
    if len(token_list) < max_len:
        diff = max_len - len(token_list)
        token_list.extend([PAD] * diff)
    elif len(token_list) > max_len:
        token_list = token_list[:max_len]
        
    return token_list

In [7]:
tag2label = {item['tag'].lower(): idx for idx, item in enumerate(data)}

In [8]:
label2tag = {v : k for k, v in tag2label.items()}

In [9]:
dataset = []

for sentence, tag in raw_dataset:
    token_list = tokenize_sent(sentence, max_len, token2idx)
    label = tag2label[tag]
    dataset.append(tuple([token_list, label]))

In [10]:
import torch

dataloader = torch.utils.data.DataLoader(dataset, batch_size = 16, shuffle = 16)

In [11]:
class Classification(torch.nn.Module):
    def __init__(self):
        super(Classification, self).__init__()
        self.embedd = torch.nn.Embedding(len(token2idx.keys()), 16, padding_idx = 0)
        self.fc1 = torch.nn.Linear(16, 32)
        self.fc2 = torch.nn.Linear(32, len(tag2label.keys()))
        
    def forward(self, x):
        x = self.embedd(x)
        x = torch.sum(x, dim = 0)
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.log_softmax(self.fc2(x), dim = 1)
        
        return x

In [12]:
model = Classification()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)

In [13]:
accuracies, losses = [], []

total_step = len(dataloader)

for epoch in range(100):
    correct, total = 0, 0
    
    for i ,(features, labels) in enumerate(dataloader):
        fetures = torch.stack(features, dim = 0)
        
        output = model(fetures)
        
        loss = criterion(output, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        correct += (torch.argmax(output, dim = 1) == labels).float().sum()
        total += labels.size(0)
        
    accuracy = (100 * correct) / total
    accuracies.append(accuracy)
    losses.append(loss)
    
    if (epoch +1) % 5 == 0:
        print(
            f'Epoch [{epoch + 1} / 100], Step[{i + 1} / {total_step}], Loss: {round(loss.item(), 4)}'
            f'Accuracy : {accuracy}'
        )
    print()





Epoch [5 / 100], Step[1 / 1], Loss: 0.4536Accuracy : 83.33333587646484





Epoch [10 / 100], Step[1 / 1], Loss: 0.3653Accuracy : 83.33333587646484





Epoch [15 / 100], Step[1 / 1], Loss: 0.3514Accuracy : 83.33333587646484





Epoch [20 / 100], Step[1 / 1], Loss: 0.3495Accuracy : 83.33333587646484





Epoch [25 / 100], Step[1 / 1], Loss: 0.3491Accuracy : 83.33333587646484





Epoch [30 / 100], Step[1 / 1], Loss: 0.3491Accuracy : 83.33333587646484





Epoch [35 / 100], Step[1 / 1], Loss: 0.3491Accuracy : 83.33333587646484





Epoch [40 / 100], Step[1 / 1], Loss: 0.349Accuracy : 83.33333587646484





Epoch [45 / 100], Step[1 / 1], Loss: 0.349Accuracy : 83.33333587646484





Epoch [50 / 100], Step[1 / 1], Loss: 0.349Accuracy : 83.33333587646484





Epoch [55 / 100], Step[1 / 1], Loss: 0.349Accuracy : 83.33333587646484





Epoch [60 / 100], Step[1 / 1], Loss: 0.349Accuracy : 83.33333587646484





Epoch [65 / 100], Step[1 / 1], Loss: 0.349Accuracy : 83.33333587646484





Ep

In [14]:
optimizer.state_dict()

{'state': {0: {'step': tensor(100.),
   'exp_avg': tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
             0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
             0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
             0.0000e+00],
           [ 3.9140e-08,  7.2076e-07,  2.8947e-06, -4.7084e-07, -1.6364e-06,
             1.2009e-06, -3.0252e-06, -1.2824e-06, -3.7163e-07, -1.4814e-06,
             4.7092e-06, -2.6926e-06,  5.6231e-07, -8.5042e-07, -1.1989e-06,
             2.6796e-06],
           [ 3.4043e-07, -5.1706e-07,  9.3601e-07,  6.9616e-07, -5.0328e-07,
            -6.0139e-07, -7.6896e-07, -7.8893e-07,  6.5484e-07,  6.8622e-07,
             4.8278e-07, -4.9727e-07,  3.9291e-08,  8.2765e-07, -8.1476e-07,
            -7.0617e-07],
           [ 3.6480e-07, -2.8316e-07,  7.1643e-07,  5.3605e-07, -7.7087e-07,
            -3.9984e-07, -1.7791e-07, -4.0678e-07,  5.6478e-07,  1.2246e-07,
             8.0193e-07, -1

In [15]:
import random

while True:
    input_sent = input('Enter Your sentence')
    token_input = tokenize_sent(input_sent, max_len, token2idx)
    input_tensor = torch.tensor(token_input).view(len(token_input), 1)
    
    output = model(input_tensor)
    
    label = torch.argmax(output)
    tag = label2tag[int(label)]
    
    for item in data:
        if item['tag'] == tag:
            break
            
    print(random.choice(item['responses']))

Enter Your sentencehi
hello
Enter Your sentencetell me a joke
see you later
Enter Your sentenceJoke
hi
Enter Your sentencegreeting
koskeshe kure khar savar


KeyboardInterrupt: Interrupted by user