## Pinecone

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override = True)

True

In [2]:
pip install -q pinecone-client

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install --upgrade -q pinecone-client

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip show pinecone-client

Name: pinecone-client
Version: 5.0.1
Summary: Pinecone client and SDK
Home-page: https://www.pinecone.io
Author: Pinecone Systems, Inc.
Author-email: support@pinecone.io
License: Apache-2.0
Location: c:\miniconda\Gen_AI_Automation_Deep_Learning\LangChain_Mastery_Develop_LLM_Apps_with_LangChain_&_Pinecone\venv\Lib\site-packages
Requires: certifi, pinecone-plugin-inference, pinecone-plugin-interface, tqdm, typing-extensions, urllib3
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [2]:
from pinecone import Pinecone, ServerlessSpec

# Initilizing and authenticating the pinecone client
pc = Pinecone()
# pc = Pinecone(api_key='YOUR_API_KEY')

# checking authentication
pc.list_indexes()

  from tqdm.autonotebook import tqdm


{'indexes': [{'deletion_protection': 'disabled',
              'dimension': 1536,
              'host': 'langchain-fetchb7.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'langchain',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [7]:
from pinecone import Pinecone, ServerlessSpec
index_name = 'langchain'

if index_name not in pc.list_indexes().names():
    print(f'Creating index {index_name}')
    pc.create_index(
        name=index_name,
        dimension=1536,  # Default dimension for text-based embeddings
        metric='cosine',
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"  # AWS region for Mumbai
        ) 
    )
    print('Index created! 😊')
else:
    print(f'Index {index_name} already exists!')

Creating index langchain
Index created! 😊


In [9]:
pc.list_indexes().names()

['langchain']

In [12]:
pc.describe_index("langchain")

{'deletion_protection': 'disabled',
 'dimension': 1536,
 'host': 'langchain-fetchb7.svc.aped-4627-b74a.pinecone.io',
 'metric': 'cosine',
 'name': 'langchain',
 'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
 'status': {'ready': True, 'state': 'Ready'}}

In [13]:
index_name = 'langchain'
if index_name in pc.list_indexes().names():
    print(f'Deleting index {index_name} ... ')
    pc.delete_index(index_name)
    print('Done')
else:
    print(f'Index {index_name} does not exist!')

Deleting index langchain ... 
Done


In [14]:
from pinecone import Pinecone, ServerlessSpec
index_name = 'langchain'

if index_name not in pc.list_indexes().names():
    print(f'Creating index {index_name}')
    pc.create_index(
        name=index_name,
        dimension=1536,  # Default dimension for text-based embeddings
        metric='cosine',
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"  # AWS region for Mumbai
        ) 
    )
    print('Index created! 😊')
else:
    print(f'Index {index_name} already exists!')

Creating index langchain
Index created! 😊


In [15]:
index = pc.Index(index_name) # Creates an object of the index created
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

## Working With Vectors

In [3]:
import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
# print(vectors)
ids = list("abcde")

index_name = "langchain"
index = pc.Index(index_name)

index.upsert(vectors = zip(ids, vectors))

{'upserted_count': 5}

In [4]:
# updating vectors
index.upsert(vectors=[('c', [0.5] * 1536)])

{'upserted_count': 1}

In [5]:
# fetching vectors
# index = pc.Index(index_name)
index.fetch(ids=['c', 'd'])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'c': {'id': 'c',
                   'values': [0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
             

In [6]:
# deleting vectors
index.delete(ids=['b', 'c'])

{}

In [7]:
# querying a non-existing vector returns an empty vector
index.fetch(ids=['x']) 

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

In [8]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}

In [9]:
# querying vectors
query_vector = [random.random() for _ in range(1536)]
index.query(
    vector=query_vector,
    top_k=3,
    include_values=False
)

{'matches': [{'id': 'd', 'score': 0.752131343, 'values': []},
             {'id': 'a', 'score': 0.749787152, 'values': []},
             {'id': 'e', 'score': 0.74973017, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 5}}

## Namespaces

In [11]:
# index.describe_index_stats()
index = pc.Index('langchain')

import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
ids = list('abcde')
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [12]:
# partition the index into namespaces
# creating a new namespace
vectors = [[random.random() for _ in range(1536)] for v in range(3)]
ids = list('xyz')
index.upsert(vectors=zip(ids, vectors), namespace='first-namespace')

{'upserted_count': 3}

In [13]:
vectors = [[random.random() for _ in range(1536)] for v in range(2)]
ids = list('qp')
index.upsert(vectors=zip(ids, vectors), namespace='second-namespace')

{'upserted_count': 2}

In [14]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5},
                'first-namespace': {'vector_count': 3},
                'second-namespace': {'vector_count': 2}},
 'total_vector_count': 10}

In [15]:
index.fetch(ids = ["x"])

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

In [16]:
index.fetch(ids = ["x"], namespace = "first-namespace")

{'namespace': 'first-namespace',
 'usage': {'read_units': 1},
 'vectors': {'x': {'id': 'x',
                   'values': [0.68961525,
                              0.987896442,
                              0.526078284,
                              0.480343461,
                              0.419746637,
                              0.564235926,
                              0.608451843,
                              0.827645242,
                              0.947486,
                              0.0281616319,
                              0.527445734,
                              0.460119814,
                              0.725347,
                              0.830198824,
                              0.857621312,
                              0.70260644,
                              0.904971242,
                              0.501634657,
                              0.912567198,
                              0.567438304,
                              0.0497423075,
           

In [17]:
index.delete(ids=['x'], namespace='first-namespace')

{}

In [18]:
index.delete(delete_all=True, namespace='first-namespace')

{}

In [19]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5},
                'second-namespace': {'vector_count': 2}},
 'total_vector_count': 7}