# Diving into Pinecone

This notebook uses the latest versions of the Pinecone library.

In [9]:
# authenticating to Pinecone. 
# the API KEY is in .env
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [10]:
from pinecone import Pinecone, ServerlessSpec

# Initilizing and authenticating the pinecone client
pc = Pinecone()
# pc = Pinecone(api_key='YOUR_API_KEY')

# checking authentication
pc.list_indexes()

{'indexes': [{'dimension': 1536,
              'host': 'docs-7vpchz3.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'docs',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

## Working with Pinecone Indexes

In [11]:
# listing all indexes
pc.list_indexes()

{'indexes': [{'dimension': 1536,
              'host': 'docs-7vpchz3.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'docs',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [13]:
index_name = 'docs'
# getting a complete description of a specific index:
pc.describe_index(index_name)

{'dimension': 1536,
 'host': 'docs-7vpchz3.svc.aped-4627-b74a.pinecone.io',
 'metric': 'cosine',
 'name': 'docs',
 'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
 'status': {'ready': True, 'state': 'Ready'}}

In [14]:
# getting a list with the index names 
pc.list_indexes().names()

['docs']

In [15]:
# deleting an index
if index_name in pc.list_indexes().names():
    print(f'Deleting index {index_name} ... ')
    pc.delete_index(index_name)
    print('Done')
else:
    print(f'Index {index_name} does not exist!')

Deleting index docs ... 
Done


In [16]:
# creating a Serverless Pinecone index 
# starter free plan permits 1 project, up to 5 indexes, up to 100 namespaces per index
index_name = 'langchain'

if index_name not in pc.list_indexes().names():
    print(f'Creating index {index_name}')
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ) 
    )
    print('Index created! 😊')
else:
    print(f'Index {index_name} already exists!')


Creating index langchain
Index created! 😊


In [17]:
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

## Working with Vectors

In [18]:
# inserting vectors
import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
# print(vectors)
ids = list('abcde')

index_name = 'langchain'
index = pc.Index(index_name)

index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [19]:
# updating vectors
index.upsert(vectors=[('c', [0.5] * 1536)])

{'upserted_count': 1}

In [20]:
# fetching vectors
# index = pc.Index(index_name)
index.fetch(ids=['c', 'd'])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'c': {'id': 'c',
                   'values': [0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
             

In [21]:
# deleting vectors
index.delete(ids=['b', 'c'])

{}

In [22]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}

In [23]:
# querying a non-existing vector returns an empty vector
index.fetch(ids=['x']) 

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

In [24]:
# querying vectors
query_vector = [random.random() for _ in range(1536)]

In [25]:
index.query(
    vector=query_vector,
    top_k=3,
    include_values=False
)

{'matches': [{'id': 'd', 'score': 0.76224035, 'values': []},
             {'id': 'e', 'score': 0.760216057, 'values': []},
             {'id': 'a', 'score': 0.759820819, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 5}}

## Namespaces

In [26]:
# index.describe_index_stats()
index = pc.Index('langchain')

import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
ids = list('abcde')
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [27]:
# partition the index into namespaces
# creating a new namespace
vectors = [[random.random() for _ in range(1536)] for v in range(3)]
ids = list('xyz')
index.upsert(vectors=zip(ids, vectors), namespace='first-namespace')

{'upserted_count': 3}

In [28]:
vectors = [[random.random() for _ in range(1536)] for v in range(2)]
ids = list('qp')
index.upsert(vectors=zip(ids, vectors), namespace='second-namespace')

{'upserted_count': 2}

In [29]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5},
                'first-namespace': {'vector_count': 3}},
 'total_vector_count': 8}

In [30]:
index.fetch(ids=['x'])

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

In [31]:
index.fetch(ids=['x'], namespace='first-namespace')


{'namespace': 'first-namespace',
 'usage': {'read_units': 1},
 'vectors': {'x': {'id': 'x',
                   'values': [0.83873421,
                              0.470745593,
                              0.192421377,
                              0.0347543582,
                              0.213159934,
                              0.97895503,
                              0.656315923,
                              0.57607466,
                              0.17450355,
                              0.0649907,
                              0.605660498,
                              0.729950428,
                              0.724497795,
                              0.326010317,
                              0.0936910659,
                              0.626128197,
                              0.382902026,
                              0.564529,
                              0.11282479,
                              0.447562158,
                              0.227761775,
             

In [32]:
index.delete(ids=['x'], namespace='first-namespace')

{}

In [33]:
index.delete(delete_all=True, namespace='first-namespace')

{}

In [34]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5}