In [None]:
import json
from elasticsearch import Elasticsearch

In [None]:
with open('../credentials.json') as f:
    credentials = json.load(f)

bonsai_url = credentials['bonsai_url']
access_key = credentials['access_key']
access_secret = credentials['access_secret']


In [None]:
es = Elasticsearch(
    [bonsai_url],
    basic_auth=(access_key, access_secret),
    headers={'Content-Type': 'application/json'}
)
info = es.info()
version_number = info['version']['number']

print(f"Elasticsearch version: {version_number}")

In [None]:
terms = [1, 2]

# Nested Query to Retrieve Documents
query = {
    "query": {
        "function_score": {
            "query": {
                "bool": {
                    "should": [
                        {
                            "nested": {
                                "path": "job_skill_levels",
                                "query": {
                                    "term": {"job_skill_levels.job_skill_id": term}
                                }
                            }
                        } for term in terms
                    ],
                    "minimum_should_match": 1
                }
            },
            "script_score": {
                "script": {
                    "source": """
                    double score = _score;
                    if (doc['job_skill_levels'].length > 0) {
                        score = score / doc['job_skill_levels'].length;
                    }
                    return score;
                    """
                }
            }
        }
    },
    "size": 100  # Number of documents to retrieve
}

In [None]:
index_name = "learning_opportunities_production"

In [None]:
response = es.search(index=index_name, body=query, size=10)
# Get documents from the query response
documents = [hit["_source"] for hit in response["hits"]["hits"]]
scores = [hit["_score"] for hit in response["hits"]["hits"]]

# Print the documents
for i, doc in enumerate(documents):
    # print(f"Score {scores[i]} Document {i+1}: {doc}")
    print(f"Score {scores[i]}")

In [None]:
doc_id = 1

# Your query
query = {
    "query": {
        "bool": {
            "should": [
                {
                    "nested": {
                        "path": "job_skill_levels",
                        "query": {
                            "term": {"job_skill_levels.job_skill_id": term}
                        }
                    }
                } for term in [1, 2]
            ],
            "minimum_should_match": 1
        }
    }
}

# Run the explain API
explanation = es.explain(index=index_name, id=doc_id, body=query)

# Print the explanation
print(explanation)

In [None]:
levels = set()
for document in documents:
    print(document['course'])
    print('level', document['level_id']) 
    print(len(document['job_skill_levels']), document['job_skill_levels'])
    print(document['learning_opportunity_skills'])
    print(document['skill_ids'])
print(levels)

In [None]:
query = {
    "query": {
        "match_all": {}
    },
    "size": 100  # Number of documents to retrieve (you can adjust this)
}

# Execute the query
response = es.search(index=index_name, body=query)

# Get the list of documents
documents = [hit["_source"] for hit in response["hits"]["hits"]]

# Randomly select a document to inspect its fields
random_doc = documents[0]

# Print the fields of the randomly selected documentb

In [None]:
random_doc