In [1]:
import os
import json
import random

import nltk
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


class ChatbotModel(nn.Module):

    def __init__(self, input_size, output_size):
        super(ChatbotModel, self).__init__()

        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)

        return x

In [2]:
class ChatbotAssistant:

    def __init__(self, intents_path, function_mappings = None):
        self.model = None
        self.intents_path = intents_path

        self.documents = []
        self.vocabulary = []
        self.intents = []
        self.intents_responses = {}

        self.function_mappings = function_mappings

        self.X = None
        self.y = None

    @staticmethod
    def tokenize_and_lemmatize(text):
        lemmatizer = nltk.WordNetLemmatizer()

        words = nltk.word_tokenize(text)
        words = [lemmatizer.lemmatize(word.lower()) for word in words]

        return words

    def bag_of_words(self, words):
        return [1 if word in words else 0 for word in self.vocabulary]

    def parse_intents(self):
        lemmatizer = nltk.WordNetLemmatizer()

        if os.path.exists(self.intents_path):
            with open(self.intents_path, 'r', encoding='utf-8') as f:
                intents_data = json.load(f)

            for intent in intents_data['intents']:
                if intent['tag'] not in self.intents:
                    self.intents.append(intent['tag'])
                    self.intents_responses[intent['tag']] = intent['responses']

                for pattern in intent['patterns']:
                    pattern_words = self.tokenize_and_lemmatize(pattern)
                    self.vocabulary.extend(pattern_words)
                    self.documents.append((pattern_words, intent['tag']))

                self.vocabulary = sorted(set(self.vocabulary))

    def prepare_data(self):
        bags = []
        indices = []

        for document in self.documents:
            words = document[0]
            bag = self.bag_of_words(words)

            intent_index = self.intents.index(document[1])

            bags.append(bag)
            indices.append(intent_index)

        self.X = np.array(bags)
        self.y = np.array(indices)

    def train_model(self, batch_size, lr, epochs): #lr = learning rate 
        X_tensor = torch.tensor(self.X, dtype=torch.float32)  #bag of words
        y_tensor = torch.tensor(self.y, dtype=torch.long)  #correct classification 

        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

        self.model = ChatbotModel(self.X.shape[1], len(self.intents)) 

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=lr)

        for epoch in range(epochs):
            running_loss = 0.0

            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
                running_loss += loss
            
            print(f"Epoch {epoch+1}: Loss: {running_loss / len(loader):.4f}")

    def save_model(self, model_path, dimensions_path):
        torch.save(self.model.state_dict(), model_path)

        with open(dimensions_path, 'w') as f:
            json.dump({ 'input_size': self.X.shape[1], 'output_size': len(self.intents) }, f)

    def load_model(self, model_path, dimensions_path):
        with open(dimensions_path, 'r') as f:
            dimensions = json.load(f)

        self.model = ChatbotModel(dimensions['input_size'], dimensions['output_size'])
        self.model.load_state_dict(torch.load(model_path, weights_only=True))

    def process_message(self, input_message):
        words = self.tokenize_and_lemmatize(input_message)
        bag = self.bag_of_words(words)

        bag_tensor = torch.tensor([bag], dtype=torch.float32)

        self.model.eval()
        with torch.no_grad():
            predictions = self.model(bag_tensor)

        predicted_class_index = torch.argmax(predictions, dim=1).item()
        predicted_intent = self.intents[predicted_class_index]

        if self.function_mappings:
            if predicted_intent in self.function_mappings:
                self.function_mappings[predicted_intent]()

        if self.intents_responses[predicted_intent]:
            return random.choice(self.intents_responses[predicted_intent])
        else:
            return None

In [3]:
chatbot = ChatbotAssistant('intents.json')
print(chatbot.tokenize_and_lemmatize('run running runs ran'))

['run', 'running', 'run', 'ran']


In [14]:
if __name__ == '__main__':
    assistant = ChatbotAssistant('intents.json')
    assistant.parse_intents()
    assistant.prepare_data()
    assistant.train_model(batch_size=8, lr=0.001, epochs=100)
    assistant.save_model('chatbot_model.pth', 'dimensions.json')


Epoch 1: Loss: 2.9471
Epoch 2: Loss: 2.9351
Epoch 3: Loss: 2.9345
Epoch 4: Loss: 2.9332
Epoch 5: Loss: 2.9241
Epoch 6: Loss: 2.9121
Epoch 7: Loss: 2.9141
Epoch 8: Loss: 2.8946
Epoch 9: Loss: 2.8759
Epoch 10: Loss: 2.8691
Epoch 11: Loss: 2.8483
Epoch 12: Loss: 2.8258
Epoch 13: Loss: 2.8295
Epoch 14: Loss: 2.7746
Epoch 15: Loss: 2.7352
Epoch 16: Loss: 2.6902
Epoch 17: Loss: 2.6179
Epoch 18: Loss: 2.6182
Epoch 19: Loss: 2.5503
Epoch 20: Loss: 2.4270
Epoch 21: Loss: 2.3405
Epoch 22: Loss: 2.2375
Epoch 23: Loss: 2.1486
Epoch 24: Loss: 2.0945
Epoch 25: Loss: 1.9416
Epoch 26: Loss: 1.9665
Epoch 27: Loss: 1.7692
Epoch 28: Loss: 1.7422
Epoch 29: Loss: 1.6137
Epoch 30: Loss: 1.5687
Epoch 31: Loss: 1.4231
Epoch 32: Loss: 1.3351
Epoch 33: Loss: 1.3327
Epoch 34: Loss: 1.3138
Epoch 35: Loss: 1.1360
Epoch 36: Loss: 1.1706
Epoch 37: Loss: 1.0816
Epoch 38: Loss: 1.0563
Epoch 39: Loss: 1.0103
Epoch 40: Loss: 0.8962
Epoch 41: Loss: 0.8400
Epoch 42: Loss: 0.8498
Epoch 43: Loss: 0.8702
Epoch 44: Loss: 0.83

In [6]:
if __name__ == '__main__':

    assistant = ChatbotAssistant('intents.json')
    assistant.parse_intents()
    assistant.load_model('chatbot_model.pth', 'dimensions.json')

    while True:
        message = input('You:')

        if message.lower() in ['/quit', 'exit', 'bye']:
         print("Chatbot: Goodbye!")
         break

        print(assistant.process_message(message))

You: hi


Hey!


You: how are you


Hi there, what can I do for you?


You: tell me about sports


Our campus includes:
- Modern classrooms & labs
- Digital library
- Hostel & canteen facilities
- Sports grounds & gym
- Student clubs & cultural societies.


You: what about facilities


Our campus includes:
- Modern classrooms & labs
- Digital library
- Hostel & canteen facilities
- Sports grounds & gym
- Student clubs & cultural societies.


You: bye


Chatbot: Goodbye!
