In [4]:
from pinecone import Pinecone, PodSpec
from sklearn.metrics import pairwise_distances
import numpy as np
from sklearn.metrics.pairwise import paired_distances
from dotenv import load_dotenv
import os

In [5]:
load_dotenv()

np.set_printoptions(
    edgeitems=30,
    linewidth=100_000,
    precision=3,
    suppress=True
)

In [8]:
def retrieve(query, vectors):
    db = np.array([vec["values"] for vec in vectors])
    queries = np.full_like(db, query)
    distances = paired_distances(queries, db, metric="cosine")
    return vectors[np.argmin(distances)]

In [9]:
def print_vectors(vectors):
    for vector in vectors:
        id = vector["id"]
        values = vector["values"]
        values_type = type(values)
        snippet = values[:3]
        shape = values.shape if hasattr(values, "shape") else len(values)
        print(f"id: {id}, snippet: {snippet}, type: {values_type}, shape: {shape}")

In [6]:
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
spec = PodSpec(environment="gcp-starter")
pc.create_index(
    name="quickstart",
    dimension=2,
    metric="cosine",
    spec=spec
)

In [7]:
index = pc.Index("quickstart")

In [10]:
np_vecs = [
    np.array([1., 2.]),
    np.array([2., 1.]),
    np.array([2., 2.])
]

vectors = [
    {"id": f"vec{i}", "values": vec} for i, vec in enumerate(np_vecs)
]

print_vectors(vectors)

id: vec0, snippet: [1. 2.], type: <class 'numpy.ndarray'>, shape: (2,)
id: vec1, snippet: [2. 1.], type: <class 'numpy.ndarray'>, shape: (2,)
id: vec2, snippet: [2. 2.], type: <class 'numpy.ndarray'>, shape: (2,)


In [11]:
index.upsert(
    vectors=vectors
)

{'upserted_count': 3}

In [15]:
index.describe_index_stats()

{'dimension': 2,
 'index_fullness': 3e-05,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}

In [12]:
print_vectors(vectors)

id: vec0, snippet: [1.0, 2.0], type: <class 'list'>, shape: 2
id: vec1, snippet: [2.0, 1.0], type: <class 'list'>, shape: 2
id: vec2, snippet: [2.0, 2.0], type: <class 'list'>, shape: 2


In [None]:
query = np.array([0.5, 1.5])
matches = index.query(
    vector=query.tolist(),  # does not support numpy natively
    top_k=3,
    include_values=True
)
matches

{'matches': [{'id': 'vec0', 'score': 0.989940703, 'values': [1.0, 2.0]},
             {'id': 'vec2', 'score': 0.892666519, 'values': [2.0, 2.0]},
             {'id': 'vec1', 'score': 0.708220363, 'values': [2.0, 1.0]}],
 'namespace': '',
 'usage': {'read_units': 6}}

As can be seen in the diagram below, the query vector [0.5, 1.5] has the smallest angle with [1, 2].
![vectors](./vectors.png)

In [16]:
pc.delete_index("quickstart")