# Logging of chats to ES

## Creating index

In [None]:
import sys
import os

sys.path.insert(1, os.path.realpath(os.path.pardir))

os.environ['STAGE'          ] = 'dev'
os.environ['ES_USERNAME'    ] = 'elastic'
os.environ['ES_PASSWORD'    ] = 'changeme'
os.environ['TF_CACHE_DIR'   ] = '/var/tmp/models'
## select the environment for ingestion
os.environ['ES_HOST'    ] = 'http://localhost:9200/'
# os.environ['ES_HOST'    ] = 'https://dev.es.chat.ask.eduworks.com/'
# os.environ['ES_HOST'    ] = 'https://qa.es.chat.ask.eduworks.com/'

import config

In [None]:
import importlib
importlib.reload(config)

In [None]:
pipeline_id = "transform_id"
mapping  = {
    "pipeline": {
        "id"    : pipeline_id,
        "body"  : {
            "description"   : "Replace the _id with chat_id for the logs index",
            "processors"    : [{
                "set": {
                    "field": "_id",
                    "value": "{{chat_id}}"
                }
            }]
        }
    },
    "settings": {
        "number_of_shards"  : 2, 
        "number_of_replicas": 1,
        "default_pipeline"  : pipeline_id
    },
    "mappings": {
        "dynamic"   : "false",
        "_source"   : {"enabled": "true"},
        "properties": {
            "chat_id"       : {"type": "keyword", "index": "true", "doc_values": "false", "ignore_above": 256},
            "timestamp"     : {"type": "date"   , "index": "true", "doc_values": "true"},
            "chat_history"  : {
                "dynamic"       : "false",
                "type"          : "nested",
                "properties"    : {
                    "agent"     : {"type": "keyword"        , "index": "false", "doc_values": "false", "ignore_above": 256  },
                    "timestamp" : {"type": "date"           , "index": "false", "doc_values": "false"                       },
                    "text"      : {"type": "match_only_text"                                                                },
                    "intent"    : {"type": "keyword"        , "index": "false", "doc_values": "false", "ignore_above": 256  },
                    "results"   : {
                        "dynamic"   : "false",
                        "type"      : "nested",
                        "properties": {
                            "score"     : {"type": "keyword"        , "index": "false", "doc_values": "false", "ignore_above": 256  },
                            "url"       : {"type": "keyword"        , "index": "false", "doc_values": "false", "ignore_above": 256  }
                        }
                    }
                }
            }
        }
    }
}

In [None]:
from elasticsearch import Elasticsearch, RequestError
from elasticsearch.client import IngestClient


# increase the timeout if necessary
es_client = Elasticsearch([config.es_host], http_auth=(config.es_username, config.es_password), timeout = 20)
es_ingest = IngestClient(es_client)

# create pipeline for replacing _id with chat_id
es_ingest.put_pipeline(
    id   = mapping['pipeline']['id'     ],
    body = mapping['pipeline']['body'   ])

# create index
es_client.indices.delete(
    index   = config.es_logging_index, 
    ignore  = 404)
es_client.indices.create(
    index       = config.es_logging_index   , 
    settings    = mapping['settings']       , 
    mappings    = mapping['mappings']       )

es_client.indices.refresh()