# Logging of chats to ES

## Creating index

In [None]:
import json
import sys
import os

from datetime import datetime

sys.path.insert(1, os.path.realpath(os.path.pardir))

os.environ['STAGE'          ] = 'dev'
os.environ['ES_USERNAME'    ] = 'elastic'
os.environ['ES_PASSWORD'    ] = 'changeme'
os.environ['TF_CACHE_DIR'   ] = '/var/tmp/models'
## select the environment for ingestion
os.environ['ES_HOST'    ] = 'http://localhost:9200/'
# os.environ['ES_HOST'    ] = 'https://dev.es.chat.ask.eduworks.com/'
# os.environ['ES_HOST'    ] = 'https://qa.es.chat.ask.eduworks.com/'

import config

In [None]:
import importlib
importlib.reload(config)

In [None]:
pipeline_id = "transform_id"
mapping  = {
    "pipeline": {
        "id"    : pipeline_id,
        "body"  : {
            "description"   : "Replace the _id with chat_id for the logs index",
            "processors"    : [{
                "set": {
                    "field": "_id",
                    "value": "{{chat_id}}"
                }
            }]
        }
    },
    "settings": {
        "number_of_shards"  : 2, 
        "number_of_replicas": 1,
        "default_pipeline"  : pipeline_id
    },
    "mappings": {
        "dynamic"   : "false",
        "_source"   : {"enabled": "true"},
        "properties": {
            "chat_id"       : {"type": "keyword", "index": "true", "doc_values": "false", "ignore_above": 256},
            "timestamp"     : {"type": "date"   , "index": "true", "doc_values": "true"},
            "chat_history"  : {
                "dynamic"       : "false",
                "type"          : "nested",
                "properties"    : {
                    "agent"     : {"type": "keyword"        , "index": "false", "doc_values": "false", "ignore_above": 256  },
                    "timestamp" : {"type": "date"           , "index": "false", "doc_values": "false"                       },
                    "text"      : {"type": "match_only_text"                                                                },
                    "intent"    : {"type": "keyword"        , "index": "false", "doc_values": "false", "ignore_above": 256  },
                    "results"   : {
                        "dynamic"   : "false",
                        "type"      : "nested",
                        "properties": {
                            "score"     : {"type": "keyword"        , "index": "false", "doc_values": "false", "ignore_above": 256  },
                            "url"       : {"type": "keyword"        , "index": "false", "doc_values": "false", "ignore_above": 256  }
                        }
                    }
                }
            }
        }
    }
}

In [None]:
from elasticsearch import Elasticsearch, RequestError
from elasticsearch.client import IngestClient


# increase the timeout if necessary
es_client = Elasticsearch([config.es_host], http_auth=(config.es_username, config.es_password), timeout = 20)
es_ingest = IngestClient(es_client)

# create pipeline for replacing _id with chat_id
es_ingest.put_pipeline(
    id   = mapping['pipeline']['id'     ],
    body = mapping['pipeline']['body'   ])

# create index
es_client.indices.delete(
    index   = config.es_logging_index, 
    ignore  = 404)
es_client.indices.create(
    index       = config.es_logging_index   , 
    settings    = mapping['settings']       , 
    mappings    = mapping['mappings']       )

es_client.indices.refresh()

## Reading sample events json file and transforming logs

In [None]:
doc_sample_raw = None
with open(os.path.join(os.path.dirname(__name__), 'events_sample/events_log.json'), 'r') as f:
    doc_sample_raw = json.load(f)
doc_sample_raw

In [None]:
def _parse_tracker_events(events):
    '''Parse chat history - filtering in only `bot` and `user` events.'''
    chat_history = []

    for event in events:
        if event['event'] == 'user':
            '''
            structure:
                agent       keyword
                timestamp   date
                text        match_only_text
                intent      keyword
            '''
            text        = event['text']
            intent      = event['parse_data']['intent']['name']
            timestamp   = datetime.fromtimestamp(event['timestamp']).isoformat()
            chat_history.append({
                'agent'     : 'user'    ,
                'timestamp' : timestamp ,
                'text'      : text      ,
                'intent'    : intent
            })
        elif event['event'] == 'bot':
            '''
            structure:
                agent       keyword
                timestamp   date
                text        match_only_text
                results     nested
                    url         keyword
                    score       keyword
            '''
            text        = event['text']
            timestamp   = datetime.fromtimestamp(event['timestamp']).isoformat()
            
            chat_history.append({
                'agent'     : 'bot'     ,
                'timestamp' : timestamp ,
                'text'      : text      ,
            })

            if event['data']['custom'] is not None:
                results = []
                custom = event['data']['custom']
                for result in custom['data']:
                    url     = result['url'  ]
                    score   = result['score']
                    results.append({
                        'url'   : url,
                        'score' : score
                    })
                chat_history[-1]['results'] = results
    
    return chat_history

chat_history = _parse_tracker_events(doc_sample_raw)
doc_sample = {
    'chat_id'       : 'chat_id'                     , # tracker.sender_id            
    'timestamp'     : chat_history[0]['timestamp']  ,
    'chat_history'  : chat_history
}
doc_sample

In [None]:
response = None
try:
    response = await config.es_client.index(
        index       = config.es_logging_index   ,
        document    = doc_sample                ,
        id          = doc_sample['chat_id']     ,
    )
except RequestError as e:
    print(f'Error at inserting logs with chat_id - {doc_sample["chat_id"]}')
    raise(e)

response

## Retrieving logs

In [None]:
# date format = `dd.mm.yyyy`
def _parse_date(aft_date = None, bfr_date = None):

    
    try:
        if aft_date is None:
            aft_date = datetime.min
        else:

            aft_date = datetime.strptime(aft_date, '%d.%m.%Y')
        
        if bfr_date is None:
            bfr_date = datetime.max
        else:
            bfr_date = datetime.strptime(bfr_date, '%d.%m.%Y')

        aft_date = aft_date.isoformat()
        bfr_date = bfr_date.isoformat()

    except (TypeError, ValueError) as e:
        print(f'Input(s) should be string in the format `dd.mm.yyyy`')
        raise(e)

    return aft_date, bfr_date


# aft_date = None
# bfr_date = None
aft_date = '05.05.2021'
bfr_date = None
aft_date, bfr_date = _parse_date(
    aft_date = aft_date, 
    bfr_date = bfr_date
)    
    

query = {
    "range": {"timestamp": {
        'gte': aft_date,
        'lte': bfr_date 
    }}
}

response = await config.es_client.search(
    index   = config.es_logging_index  ,
    query   = query                     
)

response