# Diving into Pinecone

In [1]:
# loading the API keys in environment variables
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [4]:
#!pip install pinecone-client~=2.2.2

In [6]:
#https://github.com/pinecone-io/pinecone-python-client/blob/main/README.md
import pinecone

In [27]:
class PineconeOperations:

    def __init__(self):
        _ = load_dotenv(find_dotenv())  # read local .env file
        api_key = os.getenv('PINECONE_API_KEY')
        api_env = os.getenv('PINECONE_ENV')

        pinecone.init(
            api_key=api_key,
            environment=api_env
        )
        self.index = None

    def show_index(self):
        return pinecone.list_indexes()

    def create_index(self, index_name='default') -> list:
        # fetch the list of indexes
        indexes = pinecone.list_indexes()

        # create index if there are no indexes found
        if len(indexes) == 0:
            pinecone.create_index(index_name, dimension=8, metric="cosine")

        return indexes

    def connect_index(self):
        indexes = self.create_index()
        # connect to a specific index
        self.index = pinecone.Index(indexes[0])

    def upsert(self, data):
        # sample data of the format
        # [
        #     ("A", [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]),
        #     ("B", [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2]),
        #     ("C", [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]),
        #     ("D", [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4]),
        #     ("E", [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5])
        # ]
        # Upsert sample data (5 8-dimensional vectors)
        return json.loads(str(self.index.upsert(vectors=data, namespace="quickstart")).replace("'", '"'))

    def fetch_stats(self):
        # fetches stats about the index
        return self.index.describe_index_stats()

    def query(self, query_vector):
        # query from the index, eg: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
        response = self.index.query(
            vector=query_vector,
            top_k=2,
            include_values=True,
            namespace="quickstart"
        )
        return json.loads(str(response).replace("'", '"'))

In [32]:
# pine = PineconeOperations()
# pine.create_index(index_name='test-index')
# pine.connect_index()
# pine.fetch_stats()

In [33]:
pinecone.init(
    api_key=os.getenv('PINECONE_API_KEY'),
    environment=os.getenv('PINECONE_ENV')
)

In [34]:
pinecone.info.version()

VersionResponse(server='2.0.11', client='2.2.4')

### Pinecone Indexes

In [37]:
pinecone.list_indexes()

[]

#### Creating an index

In [41]:
# creating an index
index_name = 'example-index'
if index_name not in pinecone.list_indexes():
    print(f'Creating index {index_name} ....')
    pinecone.create_index(
        name='example-index', 
        dimension=1536, 
        metric="cosine", 
        pods=1,
        pod_type='p1.x2' # for updated API: spec=PodSpec(environment='gcp-starter', pod_type="p1.x1")
    )
else:
    print(f'Index {index_name} already exists!')

Creating index example-index ....


In [43]:
pinecone.describe_index(index_name)

IndexDescription(name='example-index', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='starter', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

#### Deleting an index

In [44]:
# deleting an index
index_name = input('Enter Pinecone index to delete: ')
if index_name in pinecone.list_indexes():
    print(f'Deleting index {index_name} ... ')
    pinecone.delete_index(index_name)
    print('Done')

Enter Pinecone index to delete:  not in there


#### Getting index statistics

In [46]:
# getting statistics
index = pinecone.Index('example-index')
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

#### Inserting into an index

In [47]:
# inserting some random vectors into a Pinecone index
import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]

# vectors id
ids = list('abcde')

In [48]:
ids

['a', 'b', 'c', 'd', 'e']

In [50]:
index_name = 'example-index'
index = pinecone.Index(index_name)
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

#### Updating a vector

In [51]:
# updating a vector
index.upsert(vectors=[('c', [0.3] * 1536)])

{'upserted_count': 1}

#### Fetching a vector

In [52]:
# fetching a vector
index = pinecone.Index('example-index')
index.fetch(ids=['c', 'd'])

{'namespace': '',
 'usage': {'readUnits': 2},
 'vectors': {'c': {'id': 'c',
                   'values': [0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
              

#### Deleting vectors

In [47]:
# deleting vectors
#index.delete(ids=['b', 'c'])

In [53]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 5e-05,
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5}

In [56]:
#index.fetch(ids=['b'])

In [57]:
#index.delete(delete_all=True)

#### Querying

In [58]:
# querying 
queries = [[random.random() for _ in range(1536)]]

In [60]:
index.query(
    vector=queries,
    top_k=3,
    include_values=False
)

{'matches': [{'id': 'c', 'score': 0.866251349, 'values': []},
             {'id': 'e', 'score': 0.755729437, 'values': []},
             {'id': 'b', 'score': 0.747715116, 'values': []}],
 'namespace': '',
 'usage': {'readUnits': 5}}