In [1]:
%load_ext autoreload
%autoreload 2
import json
import mycode.metrics as metrics
import mycode.utils as u
import mycode.vap as vap

In [3]:
# The goal is to show that Vespa does `.fill()` when there is no need for that.

In [4]:
# Create a sample VAP
from vespa.package import (ApplicationPackage, Field, Schema, Document, RankProfile, Function)

doc_schema = Schema(
    name="doc",
    document=Document(
        fields=[
            Field(
                name="id",
                type="int",
                indexing=["attribute"],
                attribute=["fast-search"],
            ),
        ]
    ),
    rank_profiles=[
        RankProfile(
            name='fields',
            inherits='unranked',
            first_phase="0",
            functions=[
                Function(
                    name='id',
                    expression='attribute(id)'
                )
            ],
            match_features=[
                'id'
            ],
        )
    ]
)

application_package = ApplicationPackage(
    name="test",
    schema=[doc_schema],
)

In [5]:
from vespa.deployment import VespaDocker

# In case running colima on macos run the following
# !sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
vespa_docker = VespaDocker(
    container_image="vespaengine/vespa:8.588.8",
)

In [6]:
# Start a docker container and deploy the application package
client = vespa_docker.deploy(
    application_package=application_package,
)

Waiting for configuration server, 0/60 seconds...
Waiting for configuration server, 5/60 seconds...
Waiting for application to come up, 0/300 seconds.
Waiting for application to come up, 5/300 seconds.
Waiting for application to come up, 10/300 seconds.
Application is up!
Finished deployment.


In [7]:
# Create and feed 1 dummy doc
docs = [
    {
        'id': f'{1}',
        'fields': {
            'id': 1,
        }
    }
]

client.feed_iterable(docs, schema="doc", namespace="doc", callback=vap.feed_callback)

In [19]:
no_hacks_resp = client.query(body={
    'yql': 'select matchfeatures from sources * where true',
    'ranking': 'fields',
    'presentation.summary': 'default',
    'trace.level': 6,
}).json
no_hacks_resp

{'trace': {'children': [{'message': "Using query profile 'default' of type 'root'"},
   {'message': 'Resolved properties:\n'},
   {'message': "Invoking chain 'vespa' [com.yahoo.prelude.statistics.StatisticsSearcher@native -> com.yahoo.prelude.querytransform.PhrasingSearcher@vespa -> ... -> federation@native]"},
   {'children': [{'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.prelude.statistics.StatisticsSearcher in native'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.prelude.querytransform.PhrasingSearcher in vespa'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.prelude.searcher.FieldCollapsingSearcher in vespa'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.search.yql.MinimalQueryInserter in vespa'"},
     {'message': 'YQL query parsed: [\nselect matchfeatures from sources * where true\nTRUE\n]'},
     {'timestamp': 2,
      'message': "Invoke searcher 'com.yahoo.search.yql.FieldFilter in vespa'"},
  

In [20]:
trace_json = json.dumps(no_hacks_resp['trace'], indent=2)
print(u.find_matches_with_context(trace_json, 'Ignoring', 6))

              "message": "Sending 1 summary fetch requests with jrt/protobuf"
            },
            {
              "message": "Not resending query during document summary fetching"
            },
            {
              "message": "Ignoring fill(default): Hits already filled: result.hits().getFilled()=[[presentation], [f:documentid,matchfeatures,rankfeatures,summaryfeatures], [f:matchfeatures]]"
            }
          ]
        },
        {
          "message": "Got 1 hits from source:test_content"
        },


In [9]:
# ^ As expected: one call to Vespa to get the summary class, and on the second take fill is ignored. This is problematic because we want to avoid roundtrip to content nodes.

In [10]:
# Let's build a bundle that hacks the `.fill()` behaviour
# The relevant Java code is here:
!cat ../examples/ignore-fill-bundle/src/main/java/lt/jocas/examples/SkipFillSearcher.java

package lt.jocas.examples;

import com.yahoo.prelude.fastsearch.PartialSummaryHandler;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
import com.yahoo.search.searchchain.Execution;

import java.util.Set;

import static com.yahoo.prelude.fastsearch.PartialSummaryHandler.PRESENTATION;

public class SkipFillSearcher extends Searcher {

    @Override
    public Result search(Query query, Execution execution) {
        return execution.search(query);
    }

    private static final Set<String> IGNORED_SUMMARY_FIELDS = Set.of("matchfeatures");

    private boolean isFillIgnorable(String summaryClass, Result result) {
        return PRESENTATION.equals(summaryClass) &&
                result.getQuery().getPresentation().getSummaryFields().equals(IGNORED_SUMMARY_FIELDS);
    }

    @Override
    public void fill(Result result, String summaryClass, Execution execution) {
        var adjustedSummaryClass = summaryClass

In [11]:
# Package into a bundle jar
!(cd ../examples/ignore-fill-bundle && mvn -f pom.xml package)
# There should be a file `../examples/ignore-fill-bundle/target/ignore-fill-bundle-0.0.1-deploy.jar`
!ls -alh ../examples/ignore-fill-bundle/target/ignore-fill-bundle-0.0.1-deploy.jar

[[1;34mINFO[m] Scanning for projects...
[[1;34mINFO[m] 
[[1;34mINFO[m] [1m----------------< [0;36mlt.jocas.examples:ignore-fill-bundle[0;1m >----------------[m
[[1;34mINFO[m] [1mBuilding ignore-fill-bundle 0.0.1[m
[[1;34mINFO[m]   from pom.xml
[[1;34mINFO[m] [1m--------------------------[ container-plugin ]--------------------------[m
[90mDownloading from [0mcentral[90m: https://repo.maven.apache.org/maven2/[0morg/apache/maven/plugins/maven-resources-plugin/maven-metadata.xml
Downloaded[90m from [0mcentral[90m: https://repo.maven.apache.org/maven2/[0morg/apache/maven/plugins/maven-resources-plugin/maven-metadata.xml[90m (988 B at 2.3 kB/s)[0m
[[1;34mINFO[m] Latest version of plugin org.apache.maven.plugins:maven-resources-plugin failed compatibility check
[[1;34mINFO[m] Looking for compatible RELEASE version of plugin org.apache.maven.plugins:maven-resources-plugin
[[1;34mINFO[m] Selected plugin org.apache.maven.plugins:maven-resources-plug

In [12]:
# Now let's prepare another application package with the new search chain
from vespa.package import ServicesConfiguration
from vespa.configuration.services import services, container, document_api, search, chain, searcher, document_processing, content, documents, document, redundancy

services_config = ServicesConfiguration(
    application_name="test",
    services_config=services(
        container(
            document_api(),
            document_processing(),
            search(
                chain(
                    searcher(
                        id="lt.jocas.examples.SkipFillSearcher",
                        bundle="ignore-fill-bundle",
                    ),
                    id="fields",
                    inherits='vespa'
                )
            ),
            id="test_container",
            version="1.0"
        ),
        content(
            redundancy(1),
            documents(
                document(type="doc", mode="index"),
            ),
            id="test_content",
            version="1.0"
        )
    )
)

application_package = ApplicationPackage(
    name='test',
    schema=[doc_schema],
    services_config=services_config
)

# Currently, pyvespa provides no way to add bundle jars nicely into VAP
# So, we need to do a little dance and add it manually.

tmp_vap_folder = '_tmp'
application_package.to_files(tmp_vap_folder)

vap.add_bundles(
    application_root=tmp_vap_folder,
    bundles=['../examples/ignore-fill-bundle/target/ignore-fill-bundle-0.0.1-deploy.jar']
)

client = vap.redeploy_from_disk(
    docker=vespa_docker,
    application_root=tmp_vap_folder
)

In [14]:
#| label: resp-no-summary
# Let's add the 'searchChain': 'fields',
no_summary_resp = client.query(body={
    'yql': 'select matchfeatures from sources * where true',
    'ranking': 'fields',
    'presentation.summary': 'default',
    'trace.level': 6,
    'searchChain': 'fields',
}).json
no_summary_resp

{'trace': {'children': [{'message': "Using query profile 'default' of type 'root'"},
   {'message': 'Resolved properties:\n'},
   {'message': "Invoking chain 'fields' [lt.jocas.examples.SkipFillSearcher@fields -> com.yahoo.prelude.statistics.StatisticsSearcher@native -> ... -> federation@native]"},
   {'children': [{'timestamp': 0,
      'message': "Invoke searcher 'lt.jocas.examples.SkipFillSearcher in fields'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.prelude.statistics.StatisticsSearcher in native'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.prelude.querytransform.PhrasingSearcher in vespa'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.prelude.searcher.FieldCollapsingSearcher in vespa'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.search.yql.MinimalQueryInserter in vespa'"},
     {'message': 'YQL query parsed: [\nselect matchfeatures from sources * where true\nTRUE\n]'},
     {'times

In [15]:
#| label: trace-no-summary
trace_json = json.dumps(no_summary_resp['trace'], indent=2)
print(u.find_matches_with_context(trace_json, 'summary', 6))




In [16]:
# ^ no summary is being fetched because trace has no matches.

In [17]:
with_summary_resp = client.query(body={
    'yql': 'select matchfeatures from sources * where true',
    'ranking': 'fields',
    'presentation.summary': 'default',
    'trace.level': 6,
    'searchChain': 'vespa',  # default search chain
    'presentation.timing': True,
}).json
with_summary_resp

{'trace': {'children': [{'message': "Using query profile 'default' of type 'root'"},
   {'message': 'Resolved properties:\n'},
   {'message': "Invoking chain 'vespa' [com.yahoo.prelude.statistics.StatisticsSearcher@native -> com.yahoo.prelude.querytransform.PhrasingSearcher@vespa -> ... -> federation@native]"},
   {'children': [{'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.prelude.statistics.StatisticsSearcher in native'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.prelude.querytransform.PhrasingSearcher in vespa'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.prelude.searcher.FieldCollapsingSearcher in vespa'"},
     {'timestamp': 0,
      'message': "Invoke searcher 'com.yahoo.search.yql.MinimalQueryInserter in vespa'"},
     {'message': 'YQL query parsed: [\nselect matchfeatures from sources * where true\nTRUE\n]'},
     {'timestamp': 1,
      'message': "Invoke searcher 'com.yahoo.search.yql.FieldFilter in vespa'"},
  

In [18]:
#| label: trace-with-summary
trace_json = json.dumps(with_summary_resp['trace'], indent=2)
print(u.find_matches_with_context(trace_json, 'Ignoring', 6))

              "message": "Sending 1 summary fetch requests with jrt/protobuf"
            },
            {
              "message": "Not resending query during document summary fetching"
            },
            {
              "message": "Ignoring fill(default): Hits already filled: result.hits().getFilled()=[[presentation], [f:documentid,matchfeatures,rankfeatures,summaryfeatures], [f:matchfeatures]]"
            }
          ]
        },
        {
          "message": "Got 1 hits from source:test_content"
        },


In [None]:
# ^ When changed searchChain to `vespa`, we once again see that summary is being fetched

In [None]:
# here are the metrics at this point in the content node about docsum
vespa_metrics = metrics.fetch(vespa_docker)
content_node_metrics = metrics.from_search_node(vespa_metrics)
docsum_metrics = metrics.by_name(content_node_metrics, '.*docsum.*')
metrics.pp(docsum_metrics)