# Logging of chats to ES

## Creating index

In [None]:
import sys
import os

from datetime import datetime

sys.path.insert(1, os.path.realpath(os.path.pardir))

os.environ['STAGE'          ] = 'dev'
os.environ['ES_USERNAME'    ] = 'elastic'
os.environ['ES_PASSWORD'    ] = 'changeme'
os.environ['TF_CACHE_DIR'   ] = '/var/tmp/models'
## select the environment for ingestion
# os.environ['ES_HOST'    ] = 'http://localhost:9200/'
# os.environ['ES_HOST'    ] = 'https://dev.es.chat.ask.eduworks.com/'
os.environ['ES_HOST'    ] = 'https://qa.es.chat.ask.eduworks.com/'

import config

In [None]:
import importlib
importlib.reload(config)

In [None]:
pipeline_id = "transform_id"
mapping  = {
    "pipeline": {
        "id"    : pipeline_id,
        "body"  : {
            "description"   : "Replace the _id with chat_id for the logs index",
            "processors"    : [{
                "set": {
                    "field": "_id",
                    "value": "{{chat_id}}"
                }
            }]
        }
    },
    "settings": {
        "number_of_shards"  : 2, 
        "number_of_replicas": 1,
        "default_pipeline"  : pipeline_id
    },
    "mappings": {
        "dynamic"   : "false",
        "_source"   : {"enabled": "true"},
        "properties": {
            "chat_id"       : {"type": "keyword", "index": "true", "doc_values": "false", "ignore_above": 256},
            "timestamp"     : {"type": "date"   , "index": "true", "doc_values": "true"},
            "chat_history"  : {
                "dynamic"       : "false",
                "type"          : "nested",
                "properties"    : {
                    "agent"     : {"type": "keyword"        , "index": "false", "doc_values": "false", "ignore_above": 256  },
                    "timestamp" : {"type": "date"           , "index": "false", "doc_values": "false"                       },
                    "text"      : {"type": "match_only_text"                                                                },
                    
                }
            }
        }
    }
}

In [None]:
from elasticsearch import Elasticsearch, RequestError
from elasticsearch.client import IngestClient


# increase the timeout if necessary
es_client = Elasticsearch([config.es_host], http_auth=(config.es_username, config.es_password), timeout = 20)
es_ingest = IngestClient(es_client)

# create pipeline for replacing _id with chat_id
es_ingest.put_pipeline(
    id   = mapping['pipeline']['id'     ],
    body = mapping['pipeline']['body'   ])

# create index
es_client.indices.delete(
    index   = config.es_logging_index, 
    ignore  = 404)
es_client.indices.create(
    index       = config.es_logging_index   , 
    settings    = mapping['settings']       , 
    mappings    = mapping['mappings']       )

es_client.indices.refresh()

## Saving logs

In [None]:
doc_sample = {
    "chat_id": "4aa6cccf-6666-4ad4-82e5-4831a50275b7",
    "timestamp": "2022-07-25T15:37:21.095352",
    "chat_history": [
        {
            "agent": "user",
            "text": "hello",
            "timestamp": "2022-07-25T15:37:21.095352"
        },
        {
            "agent": "bot",
            "text": "Bot Configuration:</br>Debug: True</br>Version: 17.06.22</br><strong>expert_url <i>https://ucanr.edu/About/Locations/</i></strong></br><strong>es_search_size <i>100</i></strong></br><strong>es_cut_off <i>0.4</i></strong></br><strong>es_top_n <i>10</i></strong></br><strong>es_ask_weight <i>0.6</i></strong></br><strong>es_slots_weight <i>0.1</i></strong></br></br>To change the configuration parameters, use following schema:</br>parameter <i>param_name value</i></br>(i.e. <strong>parameter es_cut_off <i>0.5</i></strong>)",
            "timestamp": "2022-07-25T15:37:21.122762"
        },
        {
            "agent": "bot",
            "text": "Hi, I'm Scout, the UC IPM Assistant!",
            "timestamp": "2022-07-25T15:37:21.122768"
        },
        {
            "agent": "bot",
            "text": "How can I help you?",
            "timestamp": "2022-07-25T15:37:21.122790"
        },
        {
            "agent": "bot",
            "text": "testing logging - sender_id = 4aa6cccf-6666-4ad4-82e5-4831a50275b7",
            "timestamp": "2022-07-25T15:37:21.137084"
        },
        {
            "agent": "user",
            "text": "Yellow pests",
            "timestamp": "2022-07-25T15:37:25.852446"
        },
        {
            "agent": "bot",
            "text": "Extracted slots</br>[Format: (<i>relation</i>) <strong>entity</strong> - <strong>value</strong>]:</br>Group 1:</br>(<i>pest</i>) <strong>type</strong> - <strong>pests</strong></br>(<i>plant</i>) <strong>descr</strong> - <strong>yellow</strong></br>(<i>plant</i>) <strong>type</strong> - <strong>pests</strong></br></br>Composed 1 additional queries:</br>1) <i>pests yellow pests</i></br>",
            "timestamp": "2022-07-25T15:37:27.298472"
        },
        {
            "agent": "bot",
            "text": "Final transformed query with synonym replacement that was used for retrieval:</br> <i>Yellow pests. pests yellow pests<i>",
            "timestamp": "2022-07-25T15:37:27.298490"
        },
        {
            "agent": "bot",
            "text": "Results with slots improvement... Top 10 results.",
            "timestamp": "2022-07-25T15:37:27.298497"
        },
        {
            "agent": "bot",
            "text": "Did that answer your question? If not, can you give me more information? For example, what do you need to know about the pests?",
            "timestamp": "2022-07-25T15:37:27.315641"
        },
        {
            "agent": "user",
            "text": "/intent_affirm",
            "timestamp": "2022-07-25T15:37:30.371686"
        },
        {
            "agent": "user",
            "text": "Yoooo, how are you",
            "timestamp": "2022-07-25T15:37:30.371686"
        },
        {
            "agent": "bot",
            "text": "Anything else I can help with?",
            "timestamp": "2022-07-25T15:37:30.404433"
        }
    ]
}

In [None]:
response = None
try:
    response = await config.es_client.index(
        index       = config.es_logging_index   ,
        document    = doc_sample                ,
        id          = doc_sample['chat_id']     ,
    )
except RequestError as e:
    print(f'Error at inserting logs with chat_id - {doc_sample["chat_id"]}')
    raise(e)

response

## Retrieving logs

In [None]:
# date format = `dd.mm.yyyy`
def _parse_date(aft_date = None, bfr_date = None):

    
    try:
        if aft_date is None:
            aft_date = datetime.min
        else:

            aft_date = datetime.strptime(aft_date, '%d.%m.%Y')
        
        if bfr_date is None:
            bfr_date = datetime.max
        else:
            bfr_date = datetime.strptime(bfr_date, '%d.%m.%Y')

        aft_date = aft_date.isoformat()
        bfr_date = bfr_date.isoformat()

    except (TypeError, ValueError) as e:
        print(f'Input(s) should be string in the format `dd.mm.yyyy`')
        raise(e)

    return aft_date, bfr_date


# aft_date = None
# bfr_date = None
aft_date = '05.05.2021'
bfr_date = None
aft_date, bfr_date = _parse_date(
    aft_date = aft_date, 
    bfr_date = bfr_date
)    
    

query = {
    "range": {"timestamp": {
        'gte': aft_date,
        'lte': bfr_date 
    }}
}

response = await config.es_client.search(
    index   = config.es_logging_index  ,
    query   = query                     
)

response