## Import Packages

In [2]:
import numpy as np
import json
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.preprocessing import LabelEncoder


## Import Dataset

In [3]:
import json

# Load data from JSON file
with open("/content/intents.json") as file:
    data = json.load(file)

# Initialize lists to hold training sentences, labels, and responses
training_sentences = []
training_labels = []
labels = []
responses = []

# Iterate through each intent in the JSON data
for intent in data["intents"]:
    # Add each pattern to the training sentences and corresponding tag to the training labels
    for pattern in intent["patterns"]:
        training_sentences.append(pattern)
        training_labels.append(intent["tag"])

    # Add the responses to the responses list
    responses.append(intent["responses"])

    # If the tag is not already in labels, add it
    if intent["tag"] not in labels:
        labels.append(intent["tag"])

# Determine the number of unique classes
num_classes = len(labels)

## Encoding Labels

In [4]:
# Initialize and fit the label encoder
lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)

# Transform the training labels to numerical values
training_labels = lbl_encoder.transform(training_labels)



In [10]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
from sklearn.preprocessing import LabelEncoder

# Load the intents JSON file
with open("/content/intents.json") as file:
    data = json.load(file)

# Extract training data
training_sentences = []
training_labels = []
labels = []
responses = []

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        training_sentences.append(pattern)
        training_labels.append(intent["tag"])
    responses.append(intent["responses"])

    if intent["tag"] not in labels:
        labels.append(intent["tag"])

# Encode the labels
lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)

# Tokenize the sentences
vocab_size = 1000  # Define the size of the vocabulary
embedding_dim = 16  # Define the dimension of the dense embedding
max_len = 20  # Define the maximum length of input sequences
oov_token = "<OOV>"  # Token for out-of-vocabulary words

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)

# Build the model
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation="relu"))
model.add(Dense(len(labels), activation="softmax"))

# Compile the model
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

# Define number of epochs for training
epochs = 20

# Fit the model on the training data
history = model.fit(padded_sequences, np.array(training_labels), epochs=epochs)

# Save the model and tokenizers
model.save("chat")

with open("tokenizer.pickle", "wb") as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open("label_encoder.pickle", "wb") as enc_file:
    pickle.dump(lbl_encoder, enc_file, protocol=pickle.HIGHEST_PROTOCOL)


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 20, 16)            16000     
                                                                 
 global_average_pooling1d (  (None, 16)                0         
 GlobalAveragePooling1D)                                         
                                                                 
 dense (Dense)               (None, 16)                272       
                                                                 
 dense_1 (Dense)             (None, 8)                 136       
                                                                 
Total params: 16408 (64.09 KB)
Trainable params: 16408 (64.09 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8

## Saving Trained Model

In [11]:
import tensorflow as tf
import pickle

# Save the trained model
model.save("chat")

# Save the tokenizer to a file using pickle
with open("tokenizer.pickle", "wb") as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Save the label encoder to a file using pickle
with open("label_encoder.pickle", "wb") as ecn_file:
    pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)



## ChatBot Conversation

In [12]:
!pip install colorama

import colorama

colorama.init()

from colorama import Fore, Style, Back

import random



In [None]:
import tensorflow.keras as keras
import pickle
from colorama import Fore, Style
import random

# Load the intents JSON file
with open("/content/intents.json") as file:
    data = json.load(file)

def chat():
    # Load the trained model
    model = keras.models.load_model("chat")

    # Load the tokenizer
    with open("tokenizer.pickle", "rb") as handle:
        tokenizer = pickle.load(handle)

    # Load the label encoder
    with open("label_encoder.pickle", "rb") as enc:
        lbl_encoder = pickle.load(enc)

    max_len = 20

    while True:
        # Get user input
        print(Fore.LIGHTBLUE_EX + "User: " + Style.RESET_ALL, end="")
        inp = input()

        if inp.lower() == "quit":
            break

        # Tokenize and pad the input
        sequences = tokenizer.texts_to_sequences([inp])
        padded_sequences = keras.preprocessing.sequence.pad_sequences(sequences, truncating="post", maxlen=max_len)

        # Predict the intent
        result = model.predict(padded_sequences)
        tag = lbl_encoder.inverse_transform([np.argmax(result)])[0]

        # Debugging: print the predicted tag
        print(f"Predicted tag: {tag}")

        # Find and print the appropriate response
        for i in data["intents"]:
            if i["tag"] == tag:
                response = random.choice(i["responses"])
                print(Fore.GREEN + "ChatBot: " + Style.RESET_ALL, response)
                break

        print(Fore.YELLOW + "welcome messages" + Style.RESET_ALL)

# Start the chat
chat()




User: hi
Predicted tag: help
ChatBot:  Tell me how can assist you
welcome messages
User: have a complaint
Predicted tag: help
ChatBot:  Yes Sure, How can I support you
welcome messages
User: 