# Certificados de segurança

In [2]:
import os

os.environ["TENANT_NAME"] = "grupo3"  # Replace with your tenant name
application = "findmypasta"
vespa_cli_command = (
    f'vespa config set application {os.environ["TENANT_NAME"]}.{application}'
)

!vespa config set target cloud
!{vespa_cli_command}
!vespa auth cert -N

[31mError:[0m private key '[36m/home/gustavo/.vespa/grupo3.findmypasta.default/data-plane-private-key.pem[0m' already exists
[36mHint:[0m Use -f flag to force overwriting


In [3]:
from os.path import exists
from pathlib import Path

cert_path = (
    Path.home()
    / ".vespa"
    / f"{os.environ['TENANT_NAME']}.{application}.default/data-plane-public-cert.pem"
)
key_path = (
    Path.home()
    / ".vespa"
    / f"{os.environ['TENANT_NAME']}.{application}.default/data-plane-private-key.pem"
)

if not exists(cert_path) or not exists(key_path):
    print(
        "ERROR: set the correct paths to security credentials. Correct paths above and rerun until you do not see this error"
    )

In [4]:
!vespa auth api-key

from pathlib import Path

api_key_path = Path.home() / ".vespa" / f"{os.environ['TENANT_NAME']}.api-key.pem"

[31mError:[0m refusing to overwrite '/home/gustavo/.vespa/grupo3.api-key.pem'
[36mHint:[0m Use -f to overwrite it

This is your public key:
[32m-----BEGIN PUBLIC KEY-----
MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEDsrvJYitrUhT5ttt/Eps5zdjdnli
JCZ2Me8nxkFMEn8W2eWKCZbVFtniPMlB+oqjuUd3cCZDAgI3FMzkNuhYig==
-----END PUBLIC KEY-----
[0m
Its fingerprint is:
[36m9c:df:23:36:8d:56:00:b8:9c:69:ad:20:be:13:c9:ac[0m

To use this key in Vespa Cloud click 'Add custom key' at
[36mhttps://console.vespa-cloud.com/tenant/grupo3/account/keys[0m
and paste the entire public key including the BEGIN and END lines.


In [5]:
from vespa.package import (
    ApplicationPackage,
    Field,
    Schema,
    Document,
    HNSW,
    RankProfile,
    Component,
    Parameter,
    FieldSet,
    GlobalPhaseRanking,
    Function,
)

package = ApplicationPackage(
    name=application,
    schema=[
        Schema(
            name="doc",
            document=Document(
                fields=[
                    Field(name="id", type="string", indexing=["summary"]),
                    Field(
                        name="title",
                        type="string",
                        indexing=["index", "summary"],
                        index="enable-bm25",
                    ),
                    Field(
                        name="body",
                        type="string",
                        indexing=["index", "summary"],
                        index="enable-bm25",
                        bolding=True,
                    ),
                    Field(
                        name="embedding",
                        type="tensor<float>(x[384])",
                        indexing=[
                            'input title . " " . input body',
                            "embed",
                            "index",
                            "attribute",
                        ],
                        ann=HNSW(distance_metric="angular"),
                        is_document_field=False,
                    ),
                ]
            ),
            fieldsets=[FieldSet(name="default", fields=["title", "body"])],
            rank_profiles=[
                RankProfile(
                    name="bm25",
                    inputs=[("query(q)", "tensor<float>(x[384])")],
                    functions=[
                        Function(name="bm25sum", expression="bm25(title) + bm25(body)")
                    ],
                    first_phase="bm25sum",
                ),
                RankProfile(
                    name="semantic",
                    inputs=[("query(q)", "tensor<float>(x[384])")],
                    first_phase="closeness(field, embedding)",
                ),
                RankProfile(
                    name="fusion",
                    inherits="bm25",
                    inputs=[("query(q)", "tensor<float>(x[384])")],
                    first_phase="closeness(field, embedding)",
                    global_phase=GlobalPhaseRanking(
                        expression="reciprocal_rank_fusion(bm25sum, closeness(field, embedding))",
                        rerank_count=1000,
                    ),
                ),
            ],
        )
    ],
    components=[
        Component(
            id="e5",
            type="hugging-face-embedder",
            parameters=[
                Parameter(
                    "transformer-model",
                    {
                        "url": "https://github.com/vespa-engine/sample-apps/raw/master/simple-semantic-search/model/e5-small-v2-int8.onnx"
                    },
                ),
                Parameter(
                    "tokenizer-model",
                    {
                        "url": "https://raw.githubusercontent.com/vespa-engine/sample-apps/master/simple-semantic-search/model/tokenizer.json"
                    },
                ),
            ],
        )
    ],
)

In [7]:
from vespa.deployment import VespaCloud


def read_secret():
    """Read the API key from the environment variable. This is
    only used for CI/CD purposes."""
    t = os.getenv("VESPA_TEAM_API_KEY")
    if t:
        return t.replace(r"\n", "\n")
    else:
        return t


vespa_cloud = VespaCloud(
    tenant=os.environ["TENANT_NAME"],
    application=application,
    key_content=read_secret() if read_secret() else None,
    key_location=api_key_path,
    application_package=package,
)

In [8]:
app = vespa_cloud.deploy()

Deployment started in run 1 of dev-aws-us-east-1c for grupo3.findmypasta. This may take a few minutes the first time.
INFO    [17:32:43]  Deploying platform version 8.344.63 and application dev build 1 for dev-aws-us-east-1c of default ...
INFO    [17:32:44]  Using CA signed certificate version 1
INFO    [17:32:45]  Using 1 nodes in container cluster 'findmypasta_container'
INFO    [17:33:37]  Session 2 for tenant 'grupo3' prepared and activated.
INFO    [17:33:42]  ######## Details for all nodes ########
INFO    [17:33:42]  h88969g.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP
INFO    [17:33:42]  --- platform vespa/cloud-tenant-rhel8:8.344.63
INFO    [17:33:42]  --- logserver-container on port 4080 has not started 
INFO    [17:33:42]  --- metricsproxy-container on port 19092 has not started 
INFO    [17:33:42]  h88969h.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP
INFO    [17:33:42]  --- platform vespa/cloud-tenant-rhel8:8.344.63
INFO    [17

In [9]:
endpoint = vespa_cloud.get_mtls_endpoint()
endpoint

'https://e1ac0ee9.dcfc642d.z.vespa-app.cloud/'

In [10]:
from datasets import load_dataset

dataset = load_dataset("BeIR/nfcorpus", "corpus", split="corpus", streaming=True)
vespa_feed = dataset.map(
    lambda x: {
        "id": x["_id"],
        "fields": {"title": x["title"], "body": x["text"], "id": x["_id"]},
    }
)



In [11]:
from vespa.io import VespaResponse, VespaQueryResponse


def callback(response: VespaResponse, id: str):
    if not response.is_successful():
        print(f"Error when feeding document {id}: {response.get_json()}")


app.feed_iterable(vespa_feed, schema="doc", namespace="tutorial", callback=callback)

In [12]:
import pandas as pd


def display_hits_as_df(response: VespaQueryResponse, fields) -> pd.DataFrame:
    records = []
    for hit in response.hits:
        record = {}
        for field in fields:
            record[field] = hit["fields"][field]
        records.append(record)
    return pd.DataFrame(records)

In [13]:
with app.syncio(connections=1) as session:
    query = "How Fruits and Vegetables Can Treat Asthma?"
    response: VespaQueryResponse = session.query(
        yql="select * from sources * where userQuery() limit 5",
        query=query,
        ranking="bm25",
    )
    assert response.is_successful()
    print(display_hits_as_df(response, ["id", "title"]))

         id                                              title
0  MED-2450  Protective effect of fruits, vegetables and th...
1  MED-2464  Low vegetable intake is associated with allerg...
2  MED-1162  Pesticide residues in imported, organic, and "...
3  MED-2461  The association of diet with respiratory sympt...
4  MED-2085  Antiplatelet, anticoagulant, and fibrinolytic ...
