# Diving into pinecone

In [20]:
# loading the API keys in environment variables
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [3]:
pip install pinecone-client --user

Note: you may need to restart the kernel to use updated packages.


In [21]:
import pinecone
 
# initializing pinecone
pinecone.init(
    api_key=os.environ.get('PINECONE_API_KEY'),
    environment=os.environ.get('PINECONE_ENV')
)

In [22]:
pinecone.info.version()

VersionResponse(server='2.0.11', client='2.2.4')

# Pinecone indexes

In [23]:
# added in the openapi_config to resolve the timeout error during pinecone.list_indexes()  (next cell)

from pinecone.core.client.configuration import Configuration as OpenApiConfiguration

openapi_config = OpenApiConfiguration.get_default_copy()
openapi_config.proxy = "http://proxy-dmz.intel.com:912"
pinecone.init(
    api_key=os.environ.get('PINECONE_API_KEY'),
    environment=os.environ.get('PINECONE_ENV'),
    openapi_config=openapi_config,
)

In [24]:
pinecone.list_indexes()

['langchain-pinecone']

## create index

In [32]:
index_name = 'langchain-pinecone'
if index_name not in pinecone.list_indexes():
    print(f'Creating index {index_name} ....')
    pinecone.create_index(index_name, dimension=1536, metric='cosine', pods=1, pod_type='p1.x2')
    print('Done')
else:
    print(f'Index {index_name} already exists!')

Creating index langchain-pinecone ....
Done


In [33]:
pinecone.describe_index(index_name)

IndexDescription(name='langchain-pinecone', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='starter', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

## deleting an index

In [27]:
# deleting an index
index_name = input('Enter Pinecone index to delete: ')
if index_name in pinecone.list_indexes():
    print(f'Deleting index {index_name} ... ')
    pinecone.delete_index(index_name)
    print('Done')
else:
    print(f'Index {index_name} does not exist!')

Deleting index langchain-pinecone ... 
Done


## Getting index statistics

In [34]:
# getting statistics
index_name = 'langchain-pinecone'
index = pinecone.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [48]:
# inserting some random vectors into a Pinecone index
import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
 
# vectors id
ids = list('abcde')

In [49]:
index_name = 'langchain-pinecone'
index = pinecone.Index(index_name)
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [50]:
# updating a vector
index.upsert(vectors=[('c', [0.3] * 1536)])

{'upserted_count': 1}

In [51]:
# fetching a vector
index = pinecone.Index('langchain-pinecone')
index.fetch(ids=['c', 'd'])

{'namespace': '',
 'vectors': {'d': {'id': 'd',
                   'values': [0.557749093,
                              0.621639431,
                              0.493145019,
                              0.839663446,
                              0.140472159,
                              0.76056838,
                              0.971473098,
                              0.234801918,
                              0.448518604,
                              0.359445095,
                              0.283169061,
                              0.84719789,
                              0.272785395,
                              0.695380211,
                              0.633385181,
                              0.552970111,
                              0.0516359024,
                              0.704041541,
                              0.631837487,
                              0.988827646,
                              0.150148496,
                              0.719239,
          

In [39]:
# deleting vectors
index.delete(ids=['b', 'c'])

{}

In [52]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 5e-05,
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5}

In [53]:
index.fetch(ids=['b'])

{'namespace': '',
 'vectors': {'b': {'id': 'b',
                   'values': [0.901497364,
                              0.207508162,
                              0.551116109,
                              0.201435983,
                              0.178253308,
                              0.049599953,
                              0.644019485,
                              0.303839207,
                              0.292732984,
                              0.727128088,
                              0.211163,
                              0.865556121,
                              0.595346332,
                              0.496566117,
                              0.169092298,
                              0.403047472,
                              0.43655923,
                              0.138379648,
                              0.904285371,
                              0.296578437,
                              0.336559385,
                              0.520399153,
          

In [42]:
index.delete(delete_all=True)

{}

In [54]:
# querying 
queries = [[random.random() for _ in range(1536)] for v in range(2)]

In [57]:
for query in queries:
  result = index.query(
      vector=query,
      top_k=3,
      include_values=False
  )
  print(result)

{'matches': [{'id': 'd', 'score': 0.75039041, 'values': []},
             {'id': 'a', 'score': 0.747236371, 'values': []},
             {'id': 'e', 'score': 0.741415143, 'values': []}],
 'namespace': ''}
{'matches': [{'id': 'e', 'score': 0.753168702, 'values': []},
             {'id': 'd', 'score': 0.752364218, 'values': []},
             {'id': 'a', 'score': 0.739955246, 'values': []}],
 'namespace': ''}
