In [4]:
import requests
import json
import string

In [7]:
def get_query_tokens(query):
    request = {
        "query": query,
        "params": {
            "rows": 0,
            "debugQuery": "true"
        }
    }
        
    results = requests.post(f"{url}/{collection}/search", json=request).json()
    
    if("error" in results):
        print(results['error']['msg'])
        return
    
    if("debug" not in results):
        print("No debug section or no parsedQuery in debug section.")
        return;
    
    if("parsedquery_toString" not in results["debug"]):
        print("No parsedQuery in debug section.")
        return
        
    query_tokens = []
    parsed_query = results['debug']['parsedquery_toString']
    query_comps = parsed_query.replace('+(', '').replace('(', '').replace(')', '').split(' ')
        
    for comp in query_comps:
        _, _, after = comp.partition('fulltext_search:')
            
        if(len(after) > 0):
            query_tokens.append(after)
        
    return query_tokens

In [8]:
def get_query_facets(query):
    facet_config = {
        "body": {
            "type": "terms",
            "field": field,
            "sort": { "relatedness": "desc"},
            "mincount": 2,
            "limit": 10,
            "facet": {
                "relatedness": {
                    "type": "func",
                    "func": "relatedness($fore,$back)",
                    "min_popularity": 0.0005
                }
            }
        }
    }

    request = {
        "params": {
            "qf": qf,
            "fore": "{!type=$defType qf=$qf v=$q}",
            "back": "*:*",
            "defType": "edismax",
            "rows": 0,
            "echoParams": "none",
            "omitHeader": "true",
        },
        "query": query,
        "facet": facet_config
    }
    
    facet_result = requests.post(f"{url}/{collection}/search", json=request).json()
    return facet_result

In [4]:
def create_expanded_query(query):
    facet_result = get_query_facets(query)
    query_tokens = get_query_tokens(query)
    
    original_terms = ""
    query_expansion = ""

    for term in query_tokens:
        if len(query_expansion) > 0:
            query_expansion += " "
        original_terms += " " + term
        query_expansion += term + "^" + str(2)

    if 'body' not in facet_result['facets']:
        return query
        
    terms = facet_result["facets"]["body"]["buckets"]
    for bucket in facet_result["facets"]["body"]["buckets"]:
        term = bucket["val"]
        boost = bucket["relatedness"]["relatedness"]
        if len(query_expansion) > 0:
            query_expansion += " "
        # Add every term with its boost (= relatedness with the initial query)
        query_expansion += term + "^" + str(boost)
    
    # Build the expanded query
    expanded_query = "\"" + query + "\"^3 " + query_expansion
    
    return expanded_query

In [23]:
def search(query, facet):
    expanded_query = create_expanded_query(query)
    
    request = {
        "query": expanded_query,
        "fields": ["id", "title", "release", "origin", "director", "cast", "plot"],
        "filter": [
            facet
        ],
        "params": {
            "hl.q": "\"" + query + "\"" + query,
            "mm": "30%"
        }
    }

    search_results = requests.post(f"{url}/{collection}/search", json=request).json()
    
    return search_results