# This notebook is using to simulate a user that is chatting with the chatBot

In [1]:
import json
import redis
import time
import os
from uuid import uuid4
from generative_retirver import settings

from haystack.document_stores import ElasticsearchDocumentStore
from haystack.nodes import PreProcessor
from eda.src import config

### Cretating Document store
first run the docker-compose.yaml

**> docker-compose up -d**

In [2]:
# Get the host where Elasticsearch is running, default to localhost
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")

document_store = ElasticsearchDocumentStore(
    host=host,
    username="",
    password="",
    index="document"
)

In [3]:
json_file_path = os.path.join(config.DATA_PROCESSED_PATH, "json_dataset.json")
# Load the documents from the JSON file
with open(json_file_path, "r") as f:
    document_list = json.load(f)

In [4]:
# Create an instance of PreProcessor
# Each document is divided into paragraphs of approximately 500 tokens.

preprocessor = PreProcessor(
    clean_empty_lines=True,
    clean_whitespace=True,
    clean_header_footer=True,
    split_by="word",
    split_length=200,
    split_overlap=10,
    split_respect_sentence_boundary=False,
)

doc_processed = preprocessor.process(document_list)

# clear the document store before loading documents
document_store.delete_documents()

# Upload documents to the document store
document_store.write_documents(doc_processed)

Preprocessing:   0%|          | 0/1 [00:00<?, ?docs/s]

### Load message broker redi

In [5]:
# Connect to Redis and assign to variable `db``
# Make use of settings.py module to get Redis settings like host, port, etc.
db = redis.Redis(settings.REDIS_IP, settings.REDIS_PORT, db=settings.REDIS_DB_ID)
db.ping()

True

In [6]:

def model_predict(json_name, query_number):
    """
    Receives the name of the user_jason and queues the job into Redis.
    Will loop until getting the answer from our retriver-generative service.

    Parameters
    ----------
    json_name : str
        Name for the json file with the user query.
    query_number : int
        Identifies the query within the json

    Returns
    -------
    answer, context : tuple(str, str)
        Model generate an answer as a string and the context where found it
    """
    
    # Assign an unique ID for this job and add it to the queue.
    # We need to assing this ID because we must be able to keep track
    # of this particular job across all the services
    job_id = str(uuid4())

    # Create a dict with the job data we will send through Redis having the   
    Out_dict = {
        "id": job_id,
        "query_number":query_number,
        "json_name": json_name,
    }
    job_data = json.dumps(Out_dict)

    # Send the job to the model service using Redis 
    db.lpush(settings.REDIS_QUEUE, job_data)

    # Loop until we received the response from the retriver-generative model
    while True:
        # Attempt to get model predictions using job_id        
        output = db.get(job_id)

        # Check if the text was correctly processed by our model
        if output is not None:
            output = json.loads(output.decode("utf-8"))
            answer = output["answer"]
            context = output["context"]

            db.delete(job_id)
            break

        # Sleep some time waiting for model results
        time.sleep(settings.SERVER_SLEEP)

    return answer, context

### Simulating a user
first run in bash the retriver-generative model

**> python .\generative_retirver\retriver_gen.py**

In [7]:
# Save documents in the file json_dataset.json
def save_json(my_file,json_path):    
    with open(json_path, "w") as f:
        json.dump(my_file, f)

In [9]:
# create the user_json_file to save the user interaction
json_user=[]
job_id = str(uuid4())
user_json_name = job_id +'.json'
json_file_path = os.path.join(settings.UPLOAD_FOLDER, user_json_name)
with open(json_file_path, "w") as f:
    json.dump(json_user, f)

# Get the query from user
myquery = input("Please write your query: ")
# read the json user
#with open(json_file_path, "r") as f:
#    json_user = json.load(f)

# Recive queries
cont=0
while myquery:
    if cont!= 0:
        myquery = input("Please write your query: ")
        if myquery=='exit':break

    if myquery !='':
        _json={'number':cont, 'query': myquery, 'answer':''}
        json_user.append(_json)
        save_json(json_user,json_file_path)

        answer, context = model_predict(user_json_name, cont)        
        json_user[cont]={'number':cont, 'query': myquery, 'answer':answer}
        cont+=1
        #json_user.append(_json)
        save_json(json_user,json_file_path)
                
        print("Answer query",cont, answer)
    else:
        print('No query detected')
        continue

Answer query 1 ["Cash equivalents are assets that are readily convertible to cash and have a maturity period of three months or less. In the given context, the company's cash and cash equivalents decreased by $12,352,000 from the beginning of the year to the end of the year."]
Answer query 2 ["The Consolidated Statements of Cash Flows for GENCOR INDUSTRIES, INC. show the company's cash flows from operating, investing, and financing activities, as well as the net increase or decrease in cash and cash equivalents for the years ended September 30, 2021 and 2020. The statements also include non-cash investing and financing activities."]
Answer query 3 ["There is no information provided in the given context regarding the revenues and expenses of the company. The context only discusses the company's expectations and beliefs, risks and uncertainties, and its core products."]
Answer query 4 ['The context does not provide information on whether or not GENCOR INDUSTRIES has any debt.']
