# Load data (from previous notebook)

In [None]:
sentences = open("sentences.txt").read().split("@@@")

In [None]:
len(sentences)

In [None]:
import numpy as np
with open("sentences-arctic.npy", "rb") as f:
    sembeddings = np.load(f)

In [None]:
sembeddings.shape

# Vector DB

In [None]:
from usearch.index import Index, MetricKind, CompiledMetric

index = Index(ndim=sembeddings.shape[1], metric='cos')

In [None]:
%%time
index.add(list(range(len(sembeddings))), sembeddings)

In [None]:
index.save("sentences-arctic.usearch")

In [None]:
# need model for calculating new embeddings
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('Snowflake/snowflake-arctic-embed-l-v2.0')

In [None]:
import pandas as pd
def search(query, index, sentences, model, query_prompt_name=None, top=20):
    # code query to restrict search space
    question_embedding = model.encode(query, normalize_embeddings=True, prompt_name=query_prompt_name)
    
    # search vector database
    hits = index.search(question_embedding, top, MetricKind.Cos)
    
    # Return as dataframe
    return pd.DataFrame([{ "id": r.key, 
                           "text": sentences[r.key], 
                           "score": 1-r.distance } for r in hits] )

In [None]:
pd.set_option('display.max_colwidth', 0)

In [None]:
search("Is the climate crisis worse for poorer countries?", index, sentences, model)