In [1]:
%pip install pandas pinecone

Collecting pinecone
  Downloading pinecone-7.3.0-py3-none-any.whl (587 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.6/587.6 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting pinecone-plugin-assistant<2.0.0,>=1.6.0
  Downloading pinecone_plugin_assistant-1.7.0-py3-none-any.whl (239 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Installing collected packages: pinecone-plugin-interface, pinecone-plugin-assistant, pinecone
Successfully installed pinecone-7.3.0 pinecone-plugin-assistant-1.7.0 pinecone-plugin-interface-0.0.7

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update,

In [2]:
import os
from pinecone import Pinecone

api_key = os.environ.get("PINECONE_API_KEY") or "PINECONE_API_KEY"
pc = Pinecone(api_key=api_key)

First, create an index

In [3]:
index_name = "hello-pinecone"

In [4]:
if pc.has_index(name=index_name):
    pc.delete_index(name=index_name)

In [5]:
from pinecone import ServerlessSpec, CloudProvider, AwsRegion, Metric

pc.create_index(
    name=index_name,
    metric=Metric.COSINE,
    dimension=3,
    spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1)
)


{
    "name": "hello-pinecone",
    "metric": "cosine",
    "host": "hello-pinecone-dxs2y97.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 3,
    "deletion_protection": "disabled",
    "tags": null
}

In [6]:
description = pc.describe_index(name=index_name)

In [7]:
description

{
    "name": "hello-pinecone",
    "metric": "cosine",
    "host": "hello-pinecone-dxs2y97.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 3,
    "deletion_protection": "disabled",
    "tags": null
}

Upserting data in the index

In [8]:
index = pc.Index(host = description.host)

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
import random
import pandas as pd

def create_simulated_data(num_vectors):
    df = pd.DataFrame(
        data = {
            "id": [f"id-{i}" for i in range(num_vectors)],
            "vector": [
                [random.random() for i in range(description.dimension)]
                for _ in range(num_vectors)
            ],
        }
    )
    return df

df = create_simulated_data(10)
df.head()

Unnamed: 0,id,vector
0,id-0,"[0.659369338712196, 0.29911018201933526, 0.092..."
1,id-1,"[0.5614061697665887, 0.8970199641851216, 0.531..."
2,id-2,"[0.47684983677190085, 0.8494515631503514, 0.63..."
3,id-3,"[0.5007494719007828, 0.7295529875152408, 0.805..."
4,id-4,"[0.9726885402942819, 0.843113122392354, 0.6594..."


In [14]:
index.upsert(vectors=zip(df.id, df.vector))

{'upserted_count': 10}

In [15]:
import time

def is_fresh(index):
    stats = index.describe_index_stats()
    vector_count = stats.total_vector_count
    print(f"Vector count: ", vector_count)
    return vector_count > 0

while not is_fresh(index):
    time.sleep(5)

Vector count:  10


In [16]:
index.describe_index_stats()

{'dimension': 3,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 10}},
 'total_vector_count': 10,
 'vector_type': 'dense'}

running a query

In [18]:
query_embedding = [1.0,0.5,0.7]
index.query(vector=query_embedding, top_k=5, include_values=True)

{'matches': [{'id': 'id-5',
              'score': 0.979957759,
              'values': [0.563316941, 0.472606599, 0.44236514]},
             {'id': 'id-4',
              'score': 0.972685,
              'values': [0.972688556, 0.843113124, 0.659466207]},
             {'id': 'id-9',
              'score': 0.931567073,
              'values': [0.861466408, 0.0182106253, 0.626643777]},
             {'id': 'id-6',
              'score': 0.916704178,
              'values': [0.739296138, 0.0831025615, 0.894055545]},
             {'id': 'id-0',
              'score': 0.907330394,
              'values': [0.659369349, 0.299110174, 0.0923850611]}],
 'namespace': '',
 'usage': {'read_units': 1}}

deleting index

In [19]:
pc.delete_index(name=index_name)