In [10]:
# pip install gradio
import random
import gradio as gr
import json 
import numpy as np
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
import pickle

# open the data file we have
# if you run the code in Colab, match the path in to your Google Drive
with open('aurabot.json') as file:
    # data = json.load(file)
    file_data = file.read()
    
# MODIFY THE KEY VALUES
file_data = file_data.replace('"question"', '"patterns"')
file_data = file_data.replace('"answer"', '"responses"')

# now load the processed text-JSON into Python dictionary
data = json.loads(file_data)

# load the model
model = keras.models.load_model('aurabot_chat_model.keras')

# load tokenizer object
with open('aurabot_chat_tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

# load label encoder object
with open('aurabot_chat_label_encoder.pickle', 'rb') as enc:
    lbl_encoder = pickle.load(enc)


In [12]:
# pip install textblob
from textblob import TextBlob

word = "maechine"
corrected = TextBlob(word).correct()
corrected = str(corrected)
print("Corrected: " + corrected)

Corrected: machine


In [13]:
import urllib.request, urllib.error
from urllib.parse import quote
import json
from textblob import TextBlob

# create function for downloading a word definition
def get_word_definition(word):
    # this function uses this API:
    # https://dictionaryapi.dev/


    # EXTRA: use TextBlob module to fix a typo if the user
    # didn't write the defined word correctly
    corrected = TextBlob(word).correct()
    corrected = str(corrected)
    prefix = ""

    # fix the typo if present and inform user it was fixed
    if corrected != word:
        word = corrected
        prefix = "Small typo fixed: " + word + "!\n"

    # if "word" has a space, the URL will be broken
    # we have to replace the spaces in the URL with HTML entities
    word = quote(word)

    # let's quickly test our API works
    url = "https://api.dictionaryapi.dev/api/v2/entries/en/" + word

    # many modern URLs / APIs require a valid header including the user agent
    # basically we mimic a web browser here
    # Define headers, including a User-Agent (this was asked from ChatGPT)
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
    }

    # attempt downloading the JSON response from the URL
    try:
        req = urllib.request.Request(url, headers=headers)
        raw_data = urllib.request.urlopen(req).read().decode("UTF-8")
    except urllib.error.HTTPError as e:
        print(e)
        return "Sorry, I couldn't find a definition for that word."
    except urllib.error.URLError as e:
        print(e)
        return "Sorry, I couldn't find a definition for that word."
    else:
        data = json.loads(raw_data)
        
        # with the headers, the data is correctly downloaded!
        # now we have to extract the definition from the data

        # based on the data:
        # list -> dictionary[0] => meanings[0] => definitions[0] => definition
        filtered_data = data[0]['meanings'][0]['definitions'][0]['definition']
        return prefix + "Definition: " + filtered_data
    

# if we try to search "copmuter" which is a typo in "computer"
# the API doesn't work. However, there's a module in Python called TextBlob
# that allows us to detect and fix common typos

# you can test the function like this:
# get_word_definition("machine")

In [15]:
# max len value as the same as in training phase
max_len = 30

from datetime import datetime

# the chat-function connected to UI
def chat_response(message, history):
    # we have to connect the variables outside the function in order to use this
    global model, tokenizer, lbl_encoder, max_len
    
    # input is the message-parameter from gradio
    
    # собираем контекст из последних 3 сообщений + текущее сообщение
    context = " ".join([msg["content"] for msg in history[-3:]]) + " " + message
    
    # токенизируем весь контекст, а не только текущее сообщение
    sequence = tokenizer.texts_to_sequences([context])
    padded = keras.preprocessing.sequence.pad_sequences(sequence, truncating='post', maxlen=max_len)

    # if user wants a definition, use the helper function to fetch an answer
    # the function is able to fix small typos in the user's given word!
    if "define:" in context:
        parts = context.split("define:")
        inp = parts[1].strip()
        definition = get_word_definition(inp)
        return definition
    else:
        # run the input through the model and process the result
        result = model.predict(padded)
        
        # can somehow get the confidence from the model regarding this result?
        confidence = round(np.amax(result) * 100, 1)

        # a good cutoff point is probably somewhere between 70-80%
        # because anything under 90% seems to be the wrong answer
        # the confidence % is also added into the bot's messages
        # so we can test our chatbot's answer quality better
        if confidence < 80:
            return "Sorry, I don't know the answer" + f" ({confidence} %)"

        tag = lbl_encoder.inverse_transform([np.argmax(result)])

        # find the correct set of responses, and return a random one
        for i in data['intents']:
            if i['tag'] == tag:
                current_response = np.random.choice(i['responses'])

                # replace time placeholder in aurabot data with the actual timestamp
                # you can always prettify the timestamp into some better format
                # like: 26.3.2025 10:45:35
                if "{CURRENT_TIME}" in current_response:
                    current_response = current_response.replace("{CURRENT_TIME}", str(datetime.now()))

                return current_response + f" ({confidence} %)"


# launch chat UI
demo = gr.ChatInterface(chat_response, type="messages", autofocus=True)

if __name__ == "__main__":
    demo.launch(share=True)


* Running on local URL:  http://127.0.0.1:7861

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
