In [10]:
import shortuuid
import torch as pt
import torch.distributed as dist
from pinecone import Pinecone, PodSpec
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import paired_distances
from dotenv import load_dotenv
import os

In [11]:
load_dotenv()

True

In [3]:
device = "cpu"
if pt.cuda.is_available():
    device = "cuda"
elif pt.backends.mps.is_available():
    device = "mps"

model = SentenceTransformer("all-MiniLM-L6-v2", device=device)

In [12]:
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
spec = PodSpec(environment="gcp-starter")
pc.create_index(
    name="quickstart-1",
    dimension=model.get_sentence_embedding_dimension(),
    metric="cosine",
    spec=spec
)
index = pc.Index("quickstart-1")

In [13]:
sentences = [
    "The new Finder puts a user 's folders , hard drive , network servers , iDisk and removable media in one location , providing one-click access .",
    "Panther 's redesigned Finder navigation tool puts a user 's favourite folders , hard drive , network servers , iDisk and removable media in one location .",
    "But tropical storm warnings and watches were posted today for Haiti , western portions of the Dominican Republic , the southeastern Bahamas and the Turk and Caicos islands .",
    "Tropical storm warnings were in place Thursday for Jamaica and Haiti and watches for the western Dominican Republic , the southeastern Bahamas and the Turks and Caicos islands .",
    "Singapore is already the United States ' 12th-largest trading partner , with two-way trade totaling more than $ 34 billion .",
    "Although a small city-state , Singapore is the 12th-largest trading partner of the United States , with trade volume of $ 33.4 billion last year ."
]

In [14]:
embs = [model.encode(sentence) for sentence in sentences]

vectors = [
    {
        "id": shortuuid.uuid()[:5],
        "values": emb,
        "metadata": {"sentence": sentence}
    }
    for emb, sentence in zip(embs, sentences)
]

index.upsert(vectors, "quickstart")

{'upserted_count': 6}

In [16]:
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 6e-05,
 'namespaces': {'quickstart': {'vector_count': 6}},
 'total_vector_count': 6}

In [17]:
query_sentence = "The dynamic economic partnership between Singapore and the United States has resulted in mutual trade volume exceeding $34 billion, with Singapore emerging as the U.S.' 12th-largest trading partner."
query_emb = model.encode(query_sentence)

In [18]:
matches = index.query(
    namespace="quickstart",
    vector=query_emb.tolist(),
    top_k=3,
    include_metadata=True,
    include_values=False
)

In [20]:
for match in matches["matches"]:
    print(f"score: {match["score"]}")
    print(match["metadata"]["sentence"])
    print("-"*20)

score: 0.858420372
Although a small city-state , Singapore is the 12th-largest trading partner of the United States , with trade volume of $ 33.4 billion last year .
--------------------
score: 0.837462783
Singapore is already the United States ' 12th-largest trading partner , with two-way trade totaling more than $ 34 billion .
--------------------
score: 0.0559085868
Panther 's redesigned Finder navigation tool puts a user 's favourite folders , hard drive , network servers , iDisk and removable media in one location .
--------------------


In [21]:
pc.delete_index("quickstart-1")