In [46]:
from elasticsearch import Elasticsearch
from pprint import pprint
from dotenv import load_dotenv
import os

load_dotenv()
es_url = os.getenv("ES_URL")
es_username = os.getenv("ES_USERNAME")
es_password = os.getenv("ES_PASSWORD")

In [48]:
es = Elasticsearch(
    es_url,
    basic_auth=(es_username, es_password)
)
index_name = "viral-texts"

In [28]:
def get_document_by_id(doc_id):
    try:
        response = es.get(index=index_name, id=doc_id)
        return response['_source']
    except Exception as e:
        print(f"Error: {e}")
        return None

In [11]:
def search_documents(field_name, keyword, size=10):
    try:
        query = {
            "query": {
                "match": {
                    field_name: keyword
                }
            },
            "size": size
        }
        response = es.search(index=index_name, body=query)
        return response['hits']['hits']
    except Exception as e:
        print(f"Error: {e}")
        return None

In [49]:
doc_id = "e0eDDZIBLCOFKfgZ6Tn7"
document = get_document_by_id(doc_id)
print("Document by ID:")
pprint(document)

Document by ID:
{'batch': 'lu_blastoise_ver01',
 'begin': 2103,
 'boiler': False,
 'city': 'Opelousas',
 'cluster': 309240220972,
 'collation': '4',
 'corpus': 'ca',
 'country': 'United States of America',
 'coverage': 'http://dbpedia.org/resource/Opelousas%2C_Louisiana',
 'date': '1872-01-13',
 'dateRange': '1868/1878',
 'day': '1872-01-13',
 'daylag': 7,
 'ed': '1',
 'end': 2407,
 'frame': '0016',
 'group': '/lccn/sn86079077',
 'id': 'https://tile.loc.gov/storage-services/service/ndnp/lu/batch_lu_blastoise_ver01/data/sn86079077/0029587613A/1872011301/1872011301_1.xml#pageModsBib3',
 'issue': 'https://tile.loc.gov/storage-services/service/ndnp/lu/batch_lu_blastoise_ver01/data/sn86079077/0029587613A/1872011301/1872011301_1.xml',
 'lang': 'fr,en',
 'lat': '30.533527777777778',
 'lon': '-92.0815',
 'open': 'true',
 'p1dpi': 0,
 'p1h': 1653,
 'p1height': 25248,
 'p1id': 'https://tile.loc.gov/storage-services/service/ndnp/lu/batch_lu_blastoise_ver01/data/sn86079077/0029587613A/1872011301/0

In [50]:
field_name = "text"  # replace with the field you want to search
keyword = "talked of thieves"  # replace with your keyword
search_results = search_documents(field_name, keyword)
print("Search Results:")
pprint(search_results)

Search Results:
[{'_id': '1rnKDJIBLCOFKfgZIP8q',
  '_index': 'viral-texts',
  '_score': 21.238295,
  '_source': {'batch': 'mohi_edwards_ver01',
              'begin': 46777,
              'boiler': False,
              'city': 'Kansas City',
              'cluster': 223339262878,
              'collation': '8',
              'corpus': 'ca',
              'country': 'United States of America',
              'coverage': 'http://dbpedia.org/resource/Kansas_City%2C_Missouri',
              'date': '1896-09-04',
              'dateRange': '1892/1897',
              'day': '1896-09-04',
              'daylag': 1,
              'ed': '1',
              'end': 46891,
              'frame': '0156',
              'group': '/lccn/sn84020062',
              'id': 'https://tile.loc.gov/storage-services/service/ndnp/mohi/batch_mohi_edwards_ver01/data/sn86063624/00211109385/1896090401/1896090401_1.xml#pageModsBib4',
              'issue': 'https://tile.loc.gov/storage-services/service/ndnp/mohi/batch