### Diving into Pinecone

In [3]:
# Authenticating to Pinecone. 
import os
from dotenv import load_dotenv, find_dotenv

# Load the environment file (.env) and override the existing environment variables
load_dotenv(find_dotenv(), override=True)

# Retrieving the API Key
pineconeAPIKey = os.environ.get('PINECONE_API_KEY')

In [4]:
# Import Pinecone Client
from pinecone import Pinecone 

# Initializing and authenticating the pinecone client
pc = Pinecone(api_key = pineconeAPIKey)

# checking authentication
pc.list_indexes()

{'indexes': [{'dimension': 384,
              'host': 'medium-blogs-embeddings-index-2e19388.svc.gcp-starter.pinecone.io',
              'metric': 'euclidean',
              'name': 'medium-blogs-embeddings-index',
              'spec': {'pod': {'environment': 'gcp-starter',
                               'pod_type': 'starter',
                               'pods': 1,
                               'replicas': 1,
                               'shards': 1}},
              'status': {'ready': True, 'state': 'Ready'}}]}

### Working with Pinecone Indexes

In [5]:
# 1. Listing all indexes
pc.list_indexes()

{'indexes': [{'dimension': 384,
              'host': 'medium-blogs-embeddings-index-2e19388.svc.gcp-starter.pinecone.io',
              'metric': 'euclidean',
              'name': 'medium-blogs-embeddings-index',
              'spec': {'pod': {'environment': 'gcp-starter',
                               'pod_type': 'starter',
                               'pods': 1,
                               'replicas': 1,
                               'shards': 1}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [8]:
# 1.1 Code to get the index name
pc.list_indexes()[0]['name']

'medium-blogs-embeddings-index'

In [9]:
# 2. Getting description of the Indexes
index_name = 'medium-blogs-embeddings-index'

# Getting a complete description of a specific index:
pc.describe_index(index_name)

{'dimension': 384,
 'host': 'medium-blogs-embeddings-index-2e19388.svc.gcp-starter.pinecone.io',
 'metric': 'euclidean',
 'name': 'medium-blogs-embeddings-index',
 'spec': {'pod': {'environment': 'gcp-starter',
                  'pod_type': 'starter',
                  'pods': 1,
                  'replicas': 1,
                  'shards': 1}},
 'status': {'ready': True, 'state': 'Ready'}}

In [10]:
# 3. Getting a list with the index names 
pc.list_indexes().names()

['medium-blogs-embeddings-index']

In [12]:
# 4. Deleting an index
index_name = "langchain"
pc.delete_index(index_name)

# Run the Loop to delete all the indexes
if index_name in pc.list_indexes().names():
    print(f'Deleting index {index_name} ... ')
    pc.delete_index(index_name)
    print('Done')
else:
    print(f'Index {index_name} does not exist!')

Index langchain does not exist!


### Creating an Index in Pinecone

In [13]:
# 5. Creating a Pinecone index

# Importing the PodSpec class from Pinecone
from pinecone import PodSpec

# Creating a new index named 'langchain'
index_name = 'langchain'

# Checking if the index already exists
if index_name not in pc.list_indexes().names():    
    # Creating the index
    pc.create_index(name=index_name, dimension=1536, metric='cosine', spec=PodSpec(environment='gcp-starter'))
else:
    print(f'Index {index_name} already exists!')

In [14]:
# To perform any operation on the index, we must first select the index
index_name = 'langchain'
index = pc.Index(index_name)

# Code to describe the index
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

### Working with Vectors

In [15]:
# 1. Inserting vectors with dimension '1536'
import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]

# print(vectors)
ids = list('abcde')

# Step 1: Before inserting the vector, first select the index
index_name = 'langchain'
index = pc.Index(index_name)

# Inserting the vectors: To Insert a vector we need (a) The 'Vector' and (b) The 'ID'
# Upsert: used to insert or update the vectors
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [16]:
# 2. Updating vectors
index.upsert(vectors=[('c', [0.5] * 1536)])

{'upserted_count': 1}

In [17]:
# 3. Fetching vectors

# Select the index
index_name = 'langchain'
index = pc.Index(index_name)

# Fetch vectors for the given IDs
index.fetch(ids=['c', 'd'])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'c': {'id': 'c',
                   'values': [0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
             

In [18]:
# 4. Deleting vectors

# Select the index
index_name = 'langchain'
index = pc.Index(index_name)

# Delete vectors for the given IDs
index.delete(ids=['b', 'c'])

{}

In [None]:
index.describe_index_stats()

In [19]:
# 5. Querying a non-existing vector returns an empty vector

# Select the index
index_name = 'langchain'
index = pc.Index(index_name)

# Fetch vectors for the given IDs (non-existing ID 'x' is passed)
# It returns an empty vector
index.fetch(ids=['x']) 

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

### Querying the Vector Store

In [20]:
# Creating a Querying vectors
query_vector = [random.random() for _ in range(1536)]

In [21]:
# Select the index
index_name = 'langchain'
index = pc.Index(index_name)

# Retrieving the top 3 vector Ids similar to the query vector along with their cosine similarity scores
index.query(
    vector=query_vector,
    top_k=3,
    include_values=False
)

{'matches': [{'id': 'e', 'score': 0.760013521, 'values': []},
             {'id': 'd', 'score': 0.757843077, 'values': []},
             {'id': 'a', 'score': 0.757513463, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 5}}

### Namespaces

In [23]:
# index.describe_index_stats()
index = pc.Index('langchain')

import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
ids = list('abcde')
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [24]:
# partition the index into namespaces
# creating a new namespace and inserting three new vectors to the new namespace
vectors = [[random.random() for _ in range(1536)] for v in range(3)]
ids = list('xyz')
index.upsert(vectors=zip(ids, vectors), namespace='first-namespace')

{'upserted_count': 3}

In [25]:
# creating another namespace and inserting two new vectors to the new namespace
vectors = [[random.random() for _ in range(1536)] for v in range(2)]
ids = list('qp')
index.upsert(vectors=zip(ids, vectors), namespace='second-namespace')

{'upserted_count': 2}

In [27]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0001,
 'namespaces': {'': {'vector_count': 5},
                'first-namespace': {'vector_count': 3},
                'second-namespace': {'vector_count': 2}},
 'total_vector_count': 10}

In [28]:
index.fetch(ids=['x'])

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

In [29]:
index.fetch(ids=['x'], namespace='first-namespace')


{'namespace': 'first-namespace',
 'usage': {'read_units': 1},
 'vectors': {'x': {'id': 'x',
                   'values': [0.765501201,
                              0.901047409,
                              0.802936375,
                              0.727531791,
                              0.242381036,
                              0.17082119,
                              0.342684537,
                              0.972642481,
                              0.227756873,
                              0.187442899,
                              0.933799446,
                              0.909276247,
                              0.660948,
                              0.278907448,
                              0.636384904,
                              0.896743655,
                              0.0680567175,
                              0.215964437,
                              0.625274956,
                              0.908388555,
                              0.160707653,
        

In [None]:
index.delete(ids=['x'], namespace='first-namespace')

In [30]:
index.delete(delete_all=True, namespace='first-namespace')

{}

In [31]:
index.delete(delete_all=True, namespace='second-namespace')

{}

In [None]:
index.describe_index_stats()