In [4]:
import os
import random

from vespa.package import ApplicationPackage, Component, Parameter
from vespa.package import Field, FieldSet, RankProfile
from vespa.application import Vespa

# Prepare a Vespa doctype

In [1]:
VESPA_CONFIG_DIR = "./vespa-app"
VESPA_CONFIG_ZIP = "app_package.zip"

### Define Vespa doctype

In [2]:
fields_spec = [
    {"name": "id", "type": "string"},
    {"name": "name", "type": "string", "indexing": ["index", "summary", "attribute"]},
    {
        "name": "description",
        "type": "string",
        "indexing": ["index", "summary", "attribute"],
        "index": "enable-bm25",
    },
    {"name": "width", "type": "float", "indexing": ["attribute"]},
]

In [6]:
app_name = "quanticpriests"
app_package = ApplicationPackage(
    name=app_name, 
    create_query_profile_by_default=False, 
)

In [9]:
app_package.schema.add_fields(*[Field(**field_spec) for field_spec in fields_spec])

app_package.schema.add_field_set(
    FieldSet(name="default", fields=["name", "description"])
)

app_package.schema.add_rank_profile(
    RankProfile(name="root", first_phase="bm25(description) + bm25(name)")
)

In [10]:
app_package.to_files(VESPA_CONFIG_DIR)
app_package.to_zipfile(VESPA_CONFIG_ZIP)

In [11]:
!curl -s --data-binary @app_package.zip -H "Content-Type: application/zip" -X POST http://vespa:19071/application/v2/tenant/default/prepareandactivate

{"error-code":"INVALID_APPLICATION_PACKAGE","message":"Invalid application: content-cluster-removal: Content cluster 'beer_content' is removed. This will cause loss of all data in this cluster. To allow this add <allow until='yyyy-mm-dd'>content-cluster-removal</allow> to validation-overrides.xml, see https://docs.vespa.ai/en/reference/validation-overrides.html"}

In [65]:
validation_overrides_str = """
<validation-overrides>
    <allow until='2025-01-20'>indexing-change</allow>
</validation-overrides>
"""
with open(os.path.join(VESPA_CONFIG_DIR, "validation-overrides.xml"), "w") as f:
    f.write(validation_overrides_str)

PermissionError: [Errno 13] Permission denied: '/vespa-config/validation-overrides.xml'

**NOW** go onto Vespa's container and manually deploy the new app
```
cd /vespa-config
chmod +x vespa-deploy.sh
./vespa-deploy.sh
```

# Push data to Vespa

In [66]:
client = Vespa(url="http://vespa", port=8080)
client.wait_for_application_up(5)

Application is up!


In [82]:
def craft_vespa_fields(s: pd.Series) -> dict:
    id_ = f"beer:{s['id']}"
    fields = {
        "id": str(s["id"]), # string
        "brewery_id": s["brewery_id"], # string
        "name": s["name"], # string
        "cat_id": s["cat_id"], # string
        "style_id": s["style_id"], # string
        "abv": s["abv"], # float
        "ibu": s["ibu"], # float
        "srm": s["srm"], # float
        "upc": s["upc"], # int
        "description_beer": s["description_beer"], # string
        "brewery": s["brewery"], # string
        "address1": s["address1"], # string
        "city": s["city"], # string
        "state": s["state"], # string
        "code": s["code"], # string
        "country": s["country"], # string
        "phone": s["phone"], # string
        "website": s["website"], # string
        "description_brewery": s["description_brewery"], # string
        "cat_name": s["cat_name"], # string
        "cat_id_styles": s["cat_id_styles"], # string
        "style_name": s["style_name"], # string
        "id_geo": s["id_geo"], # string
        "geoloc": {"lat": s["latitude"], "lng": s["longitude"]}, # position
        "accuracy": s["accuracy"], # string
    }
    return {"id": id_, "fields": fields}

def craft_vespa_fields_for_cli(s: pd.Series) -> dict:
    id_ = f"id:beer_content:beer::{s['id']}"
    fields = {
        "id": str(s["id"]), # string
        "brewery_id": s["brewery_id"], # string
        "name": s["name"], # string
        "cat_id": s["cat_id"], # string
        "style_id": s["style_id"], # string
        "abv": s["abv"], # float
        "ibu": s["ibu"], # float
        "srm": s["srm"], # float
        "upc": s["upc"], # int
        "description_beer": s["description_beer"], # string
        "brewery": s["brewery"], # string
        "address1": s["address1"], # string
        "city": s["city"], # string
        "state": s["state"], # string
        "code": s["code"], # string
        "country": s["country"], # string
        "phone": s["phone"], # string
        "website": s["website"], # string
        "description_brewery": s["description_brewery"], # string
        "cat_name": s["cat_name"], # string
        "cat_id_styles": s["cat_id_styles"], # string
        "style_name": s["style_name"], # string
        "id_geo": s["id_geo"], # string
        "geoloc": {"lat": s["latitude"], "lng": s["longitude"]}, # position
        "accuracy": s["accuracy"], # string
    }
    return {"id": id_, "fields": fields}
    
    

In [83]:
# produce a whole JSONL gathering every beers ready for Vespa CLI
df.apply(craft_vespa_fields_for_cli, axis=1).to_json("./beers_vespa.json", orient="records", lines=True)
print("""Now do:
- cp beers_vespa.json /datasets
- docker exec -it vespa bash
- cd /datasets
- vespa feed --verbose beers_vespa.json
You should have every data into Vespa""")

Now do:
- cp beers_vespa.json /datasets
- docker exec -it vespa bash
- cd /datasets
- vespa feed --verbose beers_vespa.json
You should have every data into Vespa


In [8]:
# PyVespa based push: not working well
point = df.apply(craft_vespa_fields, axis=1).iloc[0]
client.feed_iterable(df.apply(craft_vespa_fields, axis=1).to_list(), schema="beer")

## Final check

In [10]:
resp = client.query(
    {
        "yql": "select * from beer where userQuery()",
        "hits": 10,
        "query": "stout",
    }
)
resp.json["root"]["children"][0]["fields"]

{'sddocname': 'beer',
 'documentid': 'id:beer:beer::beer:4134',
 'name': 'Kalamazoo Stout',
 'description_beer': 'A full-bodied stout with plenty of roast flavor. Kalamazoo Stout is available year round, leading our vast portfolio of stouts.',
 'brewery': "Bell's Brewery Inc.",
 'country': 'United States',
 'cat_name': 'British Ale',
 'style_name': 'Sweet Stout',
 'summaryfeatures': {'bm25(brewery)': 0.0,
  'bm25(description_beer)': 3.73974816301847,
  'length_of_descr': 1.0,
  'vespa.summaryFeatures.cached': 0.0}}