[Sample dataset: recipes](https://docs.aiven.io/docs/products/opensearch/howto/sample-dataset)

In [2]:
# !pip install opensearch-py

In [3]:
import os,openai,json
from dotenv import find_dotenv,load_dotenv
load_dotenv(find_dotenv())

openai.api_key = os.environ['OPENAI_API_KEY']
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import HumanMessage

from opensearchpy import OpenSearch

In [4]:
SERVICE_URI = 'localhost:9200'
INDEX_NAME = 'epicurious-recipes'
os_client = OpenSearch(hosts=SERVICE_URI, ssl_enable=True)

In [5]:
INDEX_NAME = 'epicurious-recipes'
mapping_data = os_client.indices.get_mapping(INDEX_NAME)
mappings = mapping_data[INDEX_NAME]['mappings']
# print(json.dumps(mappings,indent=2))

In [6]:
DSL_TEMPLATE = """\
Given an input question, create a syntactically correct Elasticsearch query to run. \
Unless the user specifies in their question a specific number of examples they wish to obtain, \
always limit your query to at most {top_k} results. \
You can order the results by a relevant column to return the most interesting examples in the database.

Unless told to do not query for all the columns from a specific index, \
only ask for a the few relevant columns given the question.

Pay attention to use only the column names that you can see in the mapping description. \
Be careful to not query for columns that do not exist. Also, pay attention to which column is in which index. \
Return the query as valid json.

Use the following format:

Question: Question here
ESQuery: Elasticsearch Query formatted as json

Mapping: {mapping}

Question: {input}
ESQuery:
"""

In [7]:
def get_os_dsl(query,k):
    prompt = DSL_TEMPLATE.format(
        top_k=k,
        mapping=json.dumps(mappings,indent=2),
        input=query
    )
    chat = ChatOpenAI()
    messages = [HumanMessage(content=prompt)]
    response = chat(messages)
    # print(response.content)
    return response.content

In [8]:
import langchain; langchain.debug = True

In [9]:
top_k = 100
query = '2010年以降のレシピのうち、カロリーが高い順に100個のレシピのカロリーの散布図を書いてください。'
dsl = get_os_dsl(query,top_k)
print(dsl)

[32;1m[1;3m[llm/start][0m [1m[1:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Given an input question, create a syntactically correct Elasticsearch query to run. Unless the user specifies in their question a specific number of examples they wish to obtain, always limit your query to at most 100 results. You can order the results by a relevant column to return the most interesting examples in the database.\n\nUnless told to do not query for all the columns from a specific index, only ask for a the few relevant columns given the question.\n\nPay attention to use only the column names that you can see in the mapping description. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which index. Return the query as valid json.\n\nUse the following format:\n\nQuestion: Question here\nESQuery: Elasticsearch Query formatted as json\n\nMapping: {\n  \"properties\": {\n    \"calories\": {\n      \"type\": \"float\"\n

[36;1m[1;3m[llm/end][0m [1m[1:llm:ChatOpenAI] [3.81s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "{\n  \"size\": 100,\n  \"query\": {\n    \"range\": {\n      \"date\": {\n        \"gte\": \"2010-01-01\"\n      }\n    }\n  },\n  \"aggs\": {\n    \"calorie_distribution\": {\n      \"terms\": {\n        \"field\": \"calories\",\n        \"size\": 100,\n        \"order\": {\n          \"_key\": \"desc\"\n        }\n      }\n    }\n  }\n}",
        "generation_info": {
          "finish_reason": "stop"
        },
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "{\n  \"size\": 100,\n  \"query\": {\n    \"range\": {\n      \"date\": {\n        \"gte\": \"2010-01-01\"\n      }\n    }\n  },\n  \"aggs\": {\n    \"calorie_distribution\": {\n      \"terms\": {

In [10]:
res = os_client.search(index=INDEX_NAME,body=dsl)
print(json.dumps(res,indent=2))

{
  "took": 39,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3948,
      "relation": "eq"
    },
    "max_score": 1.0,
    "hits": [
      {
        "_index": "epicurious-recipes",
        "_id": "sN0xdokBfefTV2V6Axdd",
        "_score": 1.0,
        "_source": {
          "directions": [
            "Preheat oven to 350\u00b0F. Coat cake pans with nonstick spray. Line bottom of pans with parchment; coat paper. Whisk flour, baking soda, and salt in a medium bowl. Using an electric mixer, beat sugar, butter, and brown sugar in a large bowl until light and fluffy, about 3 minutes. Add eggs one at a time, beating to blend between additions and occasionally scraping down sides and bottom of bowl. Beat in vanilla.",
            "Add dry ingredients; beat on low speed just to blend. Add bananas and sour cream; beat just to blend. Fold in mini chips. Divide batter evenly among pans; s