In [62]:
# The goal is to learn how to get either the first or the second phase relevance score in the global phase without recalculating second-phase unnecessary.

In [63]:
!docker run --detach --rm --name vespa-globalrank --hostname vespa-globalrank --publish 0.0.0.0:8080:8080 --publish 0.0.0.0:19050:19050 --publish 0.0.0.0:19071:19071 vespaengine/vespa:8.551.21

d40856f418cdb8a79ac49f6fe8b903e256efd339b733a4af66f6a0efa59ad4f2


9f09841922f316bfc6ad3311163407018b6bc00f144dabd5e8750716161813c1


In [82]:
# Setup a simple Vespa application package
from vespa.package import (ApplicationPackage, Field, Schema, Document, RankProfile, Function,
                           QueryProfile, QueryProfileType, FirstPhaseRanking, GlobalPhaseRanking, SecondPhaseRanking,)
from vespa.io import VespaResponse

vap = ApplicationPackage(
    name="globalrelevance",
    query_profile_type=QueryProfileType(
        fields=[]
    ),
    query_profile=QueryProfile(
        fields=[]
    ),
    schema=[
        Schema(
            name="doc",
            document=Document(
                fields=[
                    Field(
                        name="id",
                        type="int",
                        indexing=["attribute"],
                        attribute=["fast-search"],
                    ),
                ]
            ),
            rank_profiles=[
                RankProfile(
                    name="global_1",
                    first_phase=FirstPhaseRanking(
                        expression='1',
                    ),
                    global_phase=GlobalPhaseRanking(
                        expression='firstPhase',
                        rerank_count=10,
                    ),
                    match_features=['firstPhase', 'firstPhaseRank'],
                    summary_features=['firstPhase'],
                ),
                RankProfile(
                    name="global_2",
                    first_phase=FirstPhaseRanking(
                        expression='2',
                    ),
                    global_phase=GlobalPhaseRanking(
                        expression='firstPhase',
                        rerank_count=10,
                    ),
                    match_features=['firstPhase', 'firstPhaseRank'],
                    summary_features=['firstPhase'],
                ),
                RankProfile(
                    name="global_3",
                    functions=[
                        Function(
                            name='sp',
                            args=['fprank'],
                            expression='if(fprank < 10000, fprank * 100000, firstPhase / 10000)'
                        ),
                        Function(
                            name='expensive_function',
                            # args=['first_phase_rank'],
                            expression='firstPhaseRank * 10', # let's pretend that here we are calling a neural net
                        ),
                        Function(
                            name='sp2',
                            args=[],
                            expression='if(firstPhaseRank < 10000, expensive_function, firstPhase - 50)'
                        )
                    ],
                    first_phase=FirstPhaseRanking(
                        expression='attribute(id)',
                    ),
                    second_phase=SecondPhaseRanking(
                        expression='sp2',
                        rerank_count=3,
                    ),
                    global_phase=GlobalPhaseRanking(
                        expression='secondPhase',
                        rerank_count=5,
                    ),
                    match_features=['firstPhase', 'firstPhaseRank', 'secondPhase'],
                ),
            ]
        )
    ]
)

In [83]:
vap_file_name = "global-relevance.zip"
vap.to_zipfile(vap_file_name)
! vespa deploy {vap_file_name} -t http://localhost:19071

Uploading application package... done;1m⣟[0;22m
[32mSuccess:[0m Deployed [36m'global-relevance.zip'[0m with session ID [36m9[0m


In [25]:
from vespa.application import Vespa

prod_vespa_host = "http://localhost"
app = Vespa(url=prod_vespa_host, port=8080)

In [66]:
# Create and feed 5 docs
docs = [
    {
        'id': f'{i}',
        'fields': {
            'id': i
        }
    } for i in range(10)]


def callback(response: VespaResponse, document_id: str):
    if not response.is_successful():
        print(f"Error when feeding document {document_id}: {response.get_json()}")


app.feed_iterable(docs, schema="doc", namespace="doc", callback=callback, max_connections=12, )

In [81]:
app.query(body={
    'yql': 'select * '
           'from sources * '
           'where true',
    'ranking.profile': 'global_3',
}).json

{'root': {'id': 'toplevel',
  'relevance': 1.0,
  'fields': {'totalCount': 10},
  'coverage': {'coverage': 100,
   'documents': 10,
   'full': True,
   'nodes': 1,
   'results': 1,
   'resultsFull': 1},
  'children': [{'id': 'id:doc:doc::7',
    'relevance': 30.0,
    'source': 'globalrelevance_content',
    'fields': {'matchfeatures': {'firstPhase': 7.0,
      'firstPhaseRank': 3.0,
      'secondPhase': 30.0},
     'sddocname': 'doc',
     'documentid': 'id:doc:doc::7'}},
   {'id': 'id:doc:doc::8',
    'relevance': 20.0,
    'source': 'globalrelevance_content',
    'fields': {'matchfeatures': {'firstPhase': 8.0,
      'firstPhaseRank': 2.0,
      'secondPhase': 20.0},
     'sddocname': 'doc',
     'documentid': 'id:doc:doc::8'}},
   {'id': 'id:doc:doc::9',
    'relevance': 10.0,
    'source': 'globalrelevance_content',
    'fields': {'matchfeatures': {'firstPhase': 9.0,
      'firstPhaseRank': 1.0,
      'secondPhase': 10.0},
     'sddocname': 'doc',
     'documentid': 'id:doc:doc::9'

In [25]:
# The above results are good, because only the first 3 hits have relevance score >3. The trace doesn't indicate that the second phase was calculated.

In [5]:
app.query(body={
    'yql': 'select * '
           'from sources * '
           'where true',
    'ranking.profile': 'global_3',
    # 'ranking.globalPhase.rerankCount': 0,
    'trace.level': 4,
}).json

{'trace': {'children': [{'message': "Using query profile 'default' of type 'root'"},
   {'message': 'Resolved properties:\n'},
   {'message': "Invoking chain 'vespa' [com.yahoo.prelude.statistics.StatisticsSearcher@native -> com.yahoo.prelude.querytransform.PhrasingSearcher@vespa -> ... -> federation@native]"},
   {'children': [{'message': "Invoke searcher 'com.yahoo.prelude.statistics.StatisticsSearcher in native'"},
     {'message': "Invoke searcher 'com.yahoo.prelude.querytransform.PhrasingSearcher in vespa'"},
     {'message': "Invoke searcher 'com.yahoo.prelude.searcher.FieldCollapsingSearcher in vespa'"},
     {'message': "Invoke searcher 'com.yahoo.search.yql.MinimalQueryInserter in vespa'"},
     {'message': 'YQL query parsed: [select * from sources * where true]'},
     {'message': "Invoke searcher 'com.yahoo.search.yql.FieldFilter in vespa'"},
     {'message': "Invoke searcher 'com.yahoo.prelude.searcher.JuniperSearcher in vespa'"},
     {'message': "Invoke searcher 'com.yaho

In [None]:
# learnings:
# 1. first phase rank is the rank before the second phase
# 2. if the global phase reranks more (N) than a second phase (M) hits, the second phase score is calculated for N hits, probably at different times (2nd phase and fill() phase)
# 3. There is no `currentRelevance` score, which is as of this moment.

In [56]:
!docker stop vespa-globalrank

vespa-randrank
