# Pinecone

#### Load the environment variables

In [1]:
# Create a .env file in the current directory and add the API Key of the service name
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

#### The installation of Pinecone is as follows:

In [2]:
# pip install -q pinecone-client
# pip install --upgrade -q pinecone-client 
# pip show pinecone-client

#### The following code is used to list all the indexes in the Pinecone database

In [3]:
from pinecone import Pinecone
pc = Pinecone()

## Working with Indexes

#### List all the indexes in the Pinecone database

In [None]:
pc.list_indexes()

In [None]:
pc.list_indexes().names()

#### Create an index on Pinecone

In [None]:
from pinecone import PodSpec
# Define an index name on Pinecone
index_name = 'my_index_name'

if index_name not in pc.list_indexes().names():
    # If the index does not exist, create it on Pinecone
    print(f'Creating index {index_name}')
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=PodSpec(
            environment='gcp-starter'
        )
    )
    print('Index created!')
else:
    print(f'Index {index_name} already exist!')


#### Delete an index on Pinecone

In [None]:
index_name = 'my_index_name'
if index_name in pc.list_indexes().names():
    print(f'Deleting index {index_name}')
    pc.delete_index(index_name)
    print('Done')
else:
    print(f'Index {index_name} does not exist!')

#### Selecting Index; to perform any operation with an index, you must first select the index.

In [None]:
index_name = 'my_index_name'
index = pc.Index(index_name)

#### Describe the index statistics; the number of vectors, the number of namespaces, and the total size of the index and dimension of the index

In [None]:
index.describe_index_stats()

## Working with Vectors

#### Define 5 vectors each of them has a size of 1536 as an example

In [None]:
import random

vectors = [[random.random() for _ in range(1536)] for v in range(5)]
vectors

#### Inserting vectors

In [None]:
# To insert a vector, first, we need to create the ids list that each character represents a vector. Since there are 5 vectors, we're creating a list with five elements which represent the IDs.
ids = list('abcde')

# select the index on Pinecone
index_name = 'my_index_name'
index = pc.Index(index_name)

# The upsert is a single operation that can be used to insert a new value or update an existing value if it already exists. Before inserting them we're mapping each vector with its ids by using `zip` built-in function
index.upsert(vectors=zip(ids, vectors))

#### Updating the vectors

In [None]:
# This is an example of updating each element of 'c' vector.
index.upsert(vectors=[('c', [0.5] * 1536)])

#### Fetching the vector by ID

In [None]:
index.fetch(ids=['d'])

#### Deleting vectors by ID

In [None]:
# That will delete both 'b' and 'd' vectors on the index
index.delete(ids=['b', 'd'])

In [None]:
# Always check the index after each changes on the index.
index.describe_index_stats()

## Query 

In [None]:
# Creating a query vector with random decimal numbers of a size 1536
query_vector = [random.random() for _ in range(1536)]

The query operation will retreive the IDs of the most similar vectors in the index, along with their similarity scores.

In [None]:
index.query(
    vector=query_vector,
    top_k=3, # The number of results to return
    include_values=False
)

## Namespaces
Pinecone allows you to partition the vectors in an index into namespaces.

Queries and other operations are scoped to a specific namespace, allowing diferent requests to search different subsets of your index.

Imagine you are dealing with news articles. You might want to create a namespace for indexing articles by content and another for indexing articles by title.

`Key information` about namespaces:
* Every index consists of one or more namespaces.
* Each vector exists in exactly one namespace.
* Namespaces are uniquely identified by a namespace name.
* The default namespace is represented by the empty string and is used if no specific namespace is specified.

#### Create the first vectors with a default namespace on the index

In [None]:
# Select the index
index = pc.Index('my_index_name')

import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
ids = list('abcde')
index.upsert(vectors=zip(ids, vectors))

#### Create the second vectors with a namespace `second-namespace` on the index

In [None]:
vectors = [[random.random() for _ in range(1536)] for v in range(2)]
ids = list('qp')
index.upsert(vectors=zip(ids, vectors), namespace='second-namespace')

In [None]:
# Get the description of the index
index.describe_index_stats()

In [None]:
# Fetch 'q' vector from the `second-namespace`
index.fetch(ids=['q'], namespace='second-namespace')

In [None]:
# Delete a vector
index.delete(ids=['q'], namespace='second-namespace')

In [None]:
# Delete all records from a namespace. Deleting all records from a namespace also deletes the namespace itself
index.delete(delete_all=True, namespace='second-namespace')

In [None]:
# Get the description of the index
index.describe_index_stats()