In [298]:
%load_ext autoreload
%autoreload 2
import mycode.vap as vap

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [299]:
# The goal is to:
# - Put a document with fields of various types into VESPA
# - Fetch document fields with match features
# - map match features to original document fields
# - render the document using match features as if they were from original document

In [300]:
# Create a sample VAP
from vespa.package import (ApplicationPackage, ServicesConfiguration, Field, Schema, Document, RankProfile, Function)
from vespa.configuration.services import services, container, document_api, search, chain, searcher, \
    document_processing, content, documents, document, redundancy

doc_schema = Schema(
    name="doc",
    document=Document(
        fields=[
            Field(name="id", type="int", indexing=["attribute"]),
            Field(name="byte_field", type="byte", indexing=["attribute"]),
            Field(name="str", type="string", indexing=["attribute"], ),
            Field(name="my_tensor", type="tensor<float>(x[1])", indexing=["attribute"]),
            Field(name="int_array", type="array<int>", indexing=["attribute"]),
            Field(name="str_array", type="array<string>", indexing=["attribute"]),
            Field(name="bool_field", type="bool", indexing=["attribute"]),
            Field(name="double_field", type="double", indexing=["attribute"]),
            Field(name="weightedset_field", type="weightedset<string>", indexing=["attribute"]),
        ]
    ),
    rank_profiles=[
        RankProfile(
            name='fields',
            inherits='unranked',
            first_phase="0",
            functions=[
                Function(name='id', expression='tensorFromLabels(attribute(id))'),
                Function(name='str', expression='tensorFromLabels(attribute(str))'),
                Function(name='my_tensor', expression='attribute(my_tensor)'),
                Function(name='int_array', expression='tensorFromLabels(attribute(int_array))'),
                Function(name='str_array', expression='tensorFromLabels(attribute(str_array))'),
                Function(name='bool_field', expression='attribute(bool_field)'),
                Function(name='double_field', expression='attribute(double_field)'),
                Function(name='byte_field', expression='tensorFromLabels(attribute(byte_field))'),
                Function(name='weightedset_field', expression='tensorFromWeightedSet(attribute(weightedset_field))'),
            ],
            match_features=[
                'id', 'str', 'int_array', 'str_array', 'my_tensor', 'bool_field', 'double_field', 'byte_field',
                'weightedset_field'
            ],
        )
    ]
)

services_config = ServicesConfiguration(
    application_name="test",
    services_config=services(
        container(
            document_api(),
            document_processing(),
            search(
                chain(
                    searcher(
                        id="lt.jocas.examples.MatchFeaturesMapperSearcher",
                        bundle="match-features-mapper-bundle",
                    ),
                    id="match_features_mapper",
                    inherits='vespa'
                )
            ),
            id="container",
            version="1.0"
        ),
        content(
            redundancy(1),
            documents(
                document(type="doc", mode="index"),
            ),
            id="content",
            version="1.0"
        )
    )
)

application_package = ApplicationPackage(
    name='test',
    schema=[doc_schema],
    services_config=services_config,
)

In [None]:
# Package into a bundle jar
!(cd ../examples/match-features-mapper-bundle && mvn -f pom.xml package)
# There should be a file `../examples/match-features-mapper-bundle/target/match-features-mapper-bundle-0.0.1-deploy.jar`
!ls -alh ../examples/match-features-mapper-bundle/target/match-features-mapper-bundle-0.0.1-deploy.jar

In [1]:
# The relevant code is
!cat ../examples/match-features-mapper-bundle/src/main/java/lt/jocas/examples/MatchFeaturesMapperSearcher.java

package lt.jocas.examples;

import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.TextNode;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
import com.yahoo.search.result.FeatureData;
import com.yahoo.search.result.Hit;
import com.yahoo.search.schema.Field;
import com.yahoo.search.schema.Schema;
import com.yahoo.search.schema.SchemaInfo;
import com.yahoo.search.searchchain.Execution;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorAddress;

import javax.inject.Inject;
import java.util.List;
import java.util.Set;

public class MatchFeaturesMapperSearcher extends Searcher {

    private final static String MF = "matchfeatures";
    // From here https://docs.vespa.ai/en/exposing-schema-information.html
    private final SchemaInfo schemaInfo;
    private final JsonNodeFactory jsonNodeFactory = JsonNodeFactory.instance;

    @Inject
    public MatchFea

In [None]:
from pathlib import Path

# Currently, pyvespa provides no way to add bundle jars nicely into VAP
# So, we need to do a little dance and add it manually.

tmp_vap_folder = '_tmp'
application_package.to_files(Path(tmp_vap_folder))

vap.add_bundles(
    application_root=tmp_vap_folder,
    bundles=['../examples/match-features-mapper-bundle/target/match-features-mapper-bundle-0.0.1-deploy.jar']
)

from vespa.deployment import VespaDocker

# In case running colima on macos run the following
# !sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
vespa_docker = VespaDocker(
    container_image="vespaengine/vespa:8.588.8",
)

client = vespa_docker.deploy_from_disk(
    application_name='test',
    application_root=Path(tmp_vap_folder),
)

In [None]:
def compile_deploy():
    a = !(cd ../examples/match-features-mapper-bundle && mvn -f pom.xml package)
    tmp_vap_folder = '_tmp'
    application_package.to_files(Path(tmp_vap_folder))
    vap.add_bundles(
        application_root=tmp_vap_folder,
        bundles=['../examples/match-features-mapper-bundle/target/match-features-mapper-bundle-0.0.1-deploy.jar']
    )
    vap.redeploy_from_disk(
        docker=vespa_docker,
        application_root=tmp_vap_folder
    )

In [301]:
compile_deploy()

Deploy status code: 200


In [302]:
# Create and feed 1 dummy doc
docs = [
    {
        'id': f'{1}',
        'fields': {
            'id': 1,
            'str': 'foo',
            'int_array': [1, 2],
            'str_array': ['one', 'two'],
            'my_tensor': [2],
            'bool_field': True,
            'double_field': 1.23,
            'byte_field': 12,
            'weightedset_field': {'bar': 9, 'baz': 8},
        }
    }
]

client.feed_iterable(docs, schema="doc", namespace="doc", callback=vap.feed_callback)

In [None]:
compile_deploy()

In [305]:
client.query(body={
    'yql': 'select documentid, matchfeatures from sources doc where true',
    'searchChain': 'match_features_mapper',
    'model.restrict': 'doc',
    'ranking': 'fields',
    # 'ranking': 'unranked',
    'presentation.summary': 'default',
    # 'trace.level': 1,
    'presentation.format.tensors': 'short-value',
}).json

{'root': {'id': 'toplevel',
  'relevance': 1.0,
  'fields': {'totalCount': 1},
  'coverage': {'coverage': 100,
   'documents': 1,
   'full': True,
   'nodes': 1,
   'results': 1,
   'resultsFull': 1},
  'children': [{'id': 'id:doc:doc::1',
    'relevance': 0.0,
    'source': 'content',
    'fields': {'bool_field': True,
     'byte_field': 12,
     'double_field': 1.23,
     'id': 1,
     'int_array': ['1', '2'],
     'my_tensor': [2.0],
     'str': 'foo',
     'str_array': ['one', 'two'],
     'weightedset_field': {'bar': 9.0, 'baz': 8.0},
     'documentid': 'id:doc:doc::1'}}]}}