Oct 2, 2025

vespacli documentation: https://docs.vespa.ai/en/reference/vespa-cli/vespa_clone.html

# This notebook demonstrates how to set up and use Vespacli to create a simple text search.

# Dataset: https://microsoft.github.io/msmarco/

# Sample: {
    "put": "id:msmarco:msmarco::D1555982",
    "fields": {
        "id": "D1555982",
        "url": "https://answers.yahoo.com/question/index?qid=20071007114826AAwCFvR",
        "title": "The hot glowing surfaces of stars emit energy in the form of electromagnetic radiation",
        "body": "Science   Mathematics Physics The hot glowing surfaces of stars emit energy in the form of electromagnetic radiation ... "
    }
}

In [None]:
# Cell 1: Set up portable working directory
import os

# Get the current notebook's directory
notebook_dir = os.getcwd()
print(f"Notebook directory: {notebook_dir}")

# Set text-search directory relative to notebook location
text_search_dir = os.path.join(notebook_dir, "text-search")
print(f"Text search directory: {text_search_dir}")

# Create text-search directory if it doesn't exist
os.makedirs(text_search_dir, exist_ok=True)

In [None]:
!ls


In [None]:
#Clone the sample application.
!vespa clone text-search text-search

In [None]:
#Move to text-search folder
%cd text-search

In [None]:
!ls

In [None]:
! ./scripts/convert-msmarco.sh

In [None]:
!yes | vespa auth login 


In [None]:
!vespa config set target cloud

In [None]:
!vespa config set application cmh22025.text-search.default

In [None]:
!vespa status deploy --wait 300

In [None]:
!vespa auth cert -f

In [None]:
!yes | vespa deploy --wait 300 app

In [None]:
# Check deployment status
!vespa status deployment

In [None]:
# Feed with verbose output to see progress
!vespa feed dataset/documents.jsonl --verbose

In [None]:
!vespa query \
  'yql=select * from msmarco where userInput(@user-query)' \
  'user-query=what is dad bod' \
  'hits=3' \
  'language=en'

In [None]:
## use title index
!vespa query \
  'yql=select * from msmarco where {defaultIndex:"title"}userInput(@user-query)' \
  'user-query=what is dad bod' \
  'hits=3' \
  'language=en'

In [None]:
# use grammar all so only all mathced words are returned, expected 1 hit
!vespa query \
  'yql=select * from msmarco where {defaultIndex:"title", grammar:"all"}userInput(@user-query)' \
  'user-query=what is dad bod' \
  'hits=3' \
  'language=en'

In [None]:
# use bm25 ranking
!vespa query \
  'yql=select * from msmarco where userInput(@user-query)' \
  'user-query=what is dad bod' \
  'hits=3' \
  'language=en' \
  'ranking=bm25'