# Deep dive into Pinecone

- Working with Pinecone indexes
- Working with vectors
- Namespaces

In [1]:
pip install -q pinecone

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip show pinecone

Name: pinecone
Version: 7.3.0
Summary: Pinecone client and SDK
Home-page: https://www.pinecone.io
Author: Pinecone Systems, Inc.
Author-email: support@pinecone.io
License: Apache-2.0
Location: /opt/anaconda3/lib/python3.12/site-packages
Requires: certifi, pinecone-plugin-assistant, pinecone-plugin-interface, python-dateutil, typing-extensions, urllib3
Required-by: langchain-pinecone
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [4]:
from pinecone import Pinecone
pc = Pinecone()
pc.list_indexes()

[]

## Working with Pinecone Indexes

In [5]:
# Delete index
index_name = "langchain-demo"
if index_name in pc.list_indexes().names():
    print(f'Deleting index: {index_name}...')
    pc.delete_index(index_name)
    print('Index deleted!')
else:
    print(f'Index {index_name} does not exist!')

Index langchain-demo does not exist!


In [6]:
from pinecone import Pinecone, ServerlessSpec

# Create index
index_name = "langchain-demo"

if index_name not in pc.list_indexes().names():
    print(f'Creating index: {index_name}...')
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )
    print('Index created!')
else:
    print(f'Index {index_name} already exists!')

Creating index: langchain-demo...
Index created!


In [7]:
pc.list_indexes()[0]['name']
pc.describe_index(index_name) # Where langchain-demo is the name of index from line 1
pc.list_indexes().names() # To list all names of indexes available

['langchain-demo']

In [8]:
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}

## Working with Vectors

In [9]:
# Inserting vectors

import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
# print(vectors)
ids = list('abcde')

index = pc.Index(index_name)

index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [10]:
# Updating vectors
index.upsert(vectors=[('c', [0.5] * 1536)])

{'upserted_count': 1}

In [11]:
# Fetching vectors
index.fetch(ids=['c', 'd'])

FetchResponse(namespace='', vectors={'c': Vector(id='c', values=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 

In [12]:
# Deleting vectors
index.delete(ids=['b', 'c'])

{}

In [13]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3,
 'vector_type': 'dense'}

In [14]:
# Fetch index that don't exist returns an empty vector
index.fetch(ids=['x'])

FetchResponse(namespace='', vectors={}, usage={'read_units': 1})

In [15]:
# query for similar vectors
query_vector = [random.random() for _ in range(1536)]

In [16]:
index.query(
    vector=query_vector,
    top_k=3,
    include_values=False
)

{'matches': [{'id': 'd', 'score': 0.762344837, 'values': []},
             {'id': 'e', 'score': 0.754647434, 'values': []},
             {'id': 'a', 'score': 0.750887036, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 1}}

## Namespace

In [17]:
# Insert 5 vectors with empty namespace
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
ids = list('abcde')
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [18]:
# Insert 3 vectors with a namespace: first-namespace
vectors = [[random.random() for _ in range(1536)] for v in range(3)]
ids = list('xyz')
index.upsert(vectors=zip(ids, vectors), namespace='first-namespace')

{'upserted_count': 3}

In [19]:
# Insert 2 vectors with a namespace: second-namespace
vectors = [[random.random() for _ in range(1536)] for v in range(2)]
ids = list('gh')
index.upsert(vectors=zip(ids, vectors), namespace='second-namespace')

{'upserted_count': 2}

In [20]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5},
                'first-namespace': {'vector_count': 3}},
 'total_vector_count': 8,
 'vector_type': 'dense'}

In [21]:
# Won't find it because namespace was not specified and by default it searches the namespase of ""
index.fetch(ids=['x'])

FetchResponse(namespace='', vectors={}, usage={'read_units': 1})

In [22]:
index.fetch(ids=['x'], namespace='first-namespace')

FetchResponse(namespace='first-namespace', vectors={'x': Vector(id='x', values=[0.84210515, 0.793016493, 0.432615161, 0.644184351, 0.511622608, 0.0628359839, 0.318293273, 6.67085333e-05, 0.544837832, 0.812546134, 0.909740925, 0.510501325, 0.147645801, 0.0651393235, 0.87763232, 0.0578864887, 0.993318, 0.22553429, 0.551717877, 0.920382679, 0.18207401, 0.0193921048, 0.672301114, 0.761582792, 0.185816646, 0.93648994, 0.913551629, 0.483782858, 0.416026145, 0.738653183, 0.648696363, 0.463015139, 0.603665352, 0.981060803, 0.469784141, 0.514602363, 0.096319, 0.198917016, 0.741958737, 0.842866421, 0.850967646, 0.486544669, 0.73457396, 0.04225168, 0.463538557, 0.713171363, 0.828429163, 0.668151557, 0.227840081, 0.767631829, 0.0967603475, 0.802779615, 0.320016354, 0.177191854, 0.644589663, 0.654713333, 0.536572874, 0.409288645, 0.208583817, 0.648289263, 0.754021347, 0.420870751, 0.447029501, 0.661640525, 0.947170198, 0.256326258, 0.132782981, 0.0960297883, 0.124681905, 0.828774154, 0.476964444, 0

In [23]:
# Same with deleting
index.delete(ids=['x'], namespace='first-namespace')

{}

In [24]:
# To delete all from a namespace (also deletes the namespace itself)
index.delete(delete_all=True)
index.delete(delete_all=True, namespace='first-namespace')
index.delete(delete_all=True, namespace='second-namespace')

{}

In [25]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}