In [1]:
import ast
from models import Recipe
import time

# Third-party imports
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import pandas as pd
import numpy as np

# Elasticsearch mappings
MAPPINGS = {
    "users": {
        "mappings": {
            "properties": {
                "email": {"type": "keyword"},
                "name": {"type": "text"},
                "password": {"type": "keyword"},
                "embedding": {
                    "type": "dense_vector",
                    "dims": 768,
                },
            }
        }
    },
    "recipes": {
        "mappings": {
            "properties": {
                "id": {"type": "integer"},
                "title": {"type": "text"},
                "ingredients": {
                    "type": "text",
                    "fields": {"keyword": {"type": "keyword"}},
                },
                "instructions": {"type": "text"},
                "prep_time": {"type": "integer"},
                "cook_time": {"type": "integer"},
                "cuisine": {"type": "keyword"},
                "course": {"type": "keyword"},
                "diet": {"type": "keyword"},
                "image": {"type": "keyword", "index": False},
                "url": {"type": "keyword", "index": False},
                "embedding": {
                    "type": "dense_vector",
                    "dims": 768,
                    "similarity": "cosine",
                },
            }
        }
    },
    "feedback": {
        "mappings": {
            "properties": {
                "email": {"type": "keyword"},
                "input_description": {"type": "text"},
                "input_image": {"type": "text", "index": False},
                "recipe_ids": {"type": "integer"},
                "rating": {"type": "integer"},
                "comment": {"type": "text"},
                "created_at": {"type": "date"},
            }
        }
    },
    "user_reviews": {
        "mappings": {
            "properties": {
                "email": {"type": "keyword"},
                "reviews": {
                    "type": "nested",
                    "properties": {
                        "content": {"type": "text"},
                        "created_at": {"type": "date"},
                    },
                },
            }
        }
    },
    "recipe_additions": {
        "mappings": {
            "properties": {
                "id": {"type": "integer"},
                "title": {"type": "text"},
                "ingredients": {
                    "type": "text",
                    "fields": {"keyword": {"type": "keyword"}},
                },
                "instructions": {"type": "text"},
                "prep_time": {"type": "integer"},
                "cook_time": {"type": "integer"},
                "cuisine": {"type": "keyword"},
                "course": {"type": "keyword"},
                "diet": {"type": "keyword"},
                "image": {"type": "keyword", "index": False},
                "url": {"type": "keyword", "index": False},
                "embedding": {
                    "type": "dense_vector",
                    "dims": 768,
                },
                "accepted": {"type": "boolean"},
            }
        }
    },
}


# def create_elasticsearch_client():
#     """Create and configure Elasticsearch client"""
#     es = Elasticsearch(
#         "http://elasticsearch:9200",
#         basic_auth=("elastic", "pass"),
#     )

#     # Update disk watermark thresholds
#     es.cluster.put_settings(
#         body={
#             "persistent": {
#                 "cluster.routing.allocation.disk.watermark.low": "99%",
#                 "cluster.routing.allocation.disk.watermark.high": "99%",
#                 "cluster.routing.allocation.disk.watermark.flood_stage": "99%",
#             }
#         }
#     )

#     return es
# from elasticsearch import Elasticsearch


def create_elasticsearch_client():
    """Create and return Elasticsearch client"""
    return Elasticsearch(
        "http://localhost:9200",
        basic_auth=("elastic", "pass"),
    )

In [2]:
def display_elasticsearch_data(es_client, index_name, size=10000):
    """
    Retrieve and display all documents from a specified Elasticsearch index as a DataFrame

    Args:
        es_client: Elasticsearch client
        index_name: Name of the index to query
        size: Maximum number of documents to retrieve (default 10000)

    Returns:
        pandas DataFrame containing the index data
    """
    try:
        # Query all documents from the index
        response = es_client.search(
            index=index_name, body={"query": {"match_all": {}}, "size": size}
        )

        # Extract the documents from the response
        documents = [hit["_source"] for hit in response["hits"]["hits"]]

        # Convert to DataFrame
        df = pd.DataFrame(documents)

        print(f"\nData from {index_name} index:")
        print(f"Total documents: {len(df)}")
        print("\nSample of the data:")
        print(df.head())

        return df

    except Exception as e:
        print(f"Error retrieving data from {index_name}: {e}")
        return None


def display_all_indices_data(es_client):
    """
    Display data from all indices defined in MAPPINGS
    """
    dfs = {}

    for index_name in MAPPINGS.keys():
        print(f"\n{'='*50}")
        print(f"Retrieving data from {index_name}")
        print("=" * 50)

        df = display_elasticsearch_data(es_client, index_name)
        if df is not None:
            dfs[index_name] = df

    return dfs

In [3]:
es = create_elasticsearch_client()
all_dfs = display_all_indices_data(es)
# Access individual DataFrames using:
# recipes_df = all_dfs["recipes"]
# users_df = all_dfs["users"]
# etc.


Retrieving data from users

Data from users index:
Total documents: 2

Sample of the data:
               email       name     password  \
0   test@example.com  Test User  testpass123   
1  testa@example.com  Test User  testpass123   

                                           embedding  
0  [-0.020968295633792877, 0.011384704150259495, ...  
1  [-0.020968295633792877, 0.011384704150259495, ...  

Retrieving data from recipes

Data from recipes index:
Total documents: 5044

Sample of the data:
     id                                              title  \
0  4529  lavand-e-murgh recipe - afghani chicken in yog...   
1  4640  afghani dhoog recipe - cucumber mint buttermil...   
2  5978   malida recipe (healthy whole wheat afghan sweet)   
3  7092     moroccan spiced millet and lentil salad recipe   
4  6684                      chickpea & date tagine recipe   

                                         ingredients  \
0  [fresh pomegranate fruit kernels few garnish, ...   
1  [cumin powd